diff options
author | lexborisov <lex.borisov@gmail.com> | 2016-07-19 20:53:21 +0300 |
---|---|---|
committer | lexborisov <lex.borisov@gmail.com> | 2016-07-19 20:53:21 +0300 |
commit | f9f19e19a58dc24409e09ccfc97bf3830c237ac9 (patch) | |
tree | 31e6f02a7cba04235cb578ff52e8cb732ddd3544 | |
parent | 1b5ac2c1d90ba926b8cc87a3588f8f245379dd94 (diff) |
Synchronization with the latest versions of MyHTML
-rw-r--r-- | Changes | 6 | ||||
-rw-r--r-- | MANIFEST | 2 | ||||
-rwxr-xr-x | MyHTML.pm | 126 | ||||
-rwxr-xr-x | MyHTML.xs | 267 | ||||
-rw-r--r-- | README | 2 | ||||
-rw-r--r-- | README.md | 138 | ||||
-rw-r--r-- | examples/callback.pl | 45 | ||||
-rwxr-xr-x | typemap | 2 | ||||
-rwxr-xr-x | xs/token_node.xs | 118 | ||||
-rwxr-xr-x | xs/tree_node.xs | 14 |
10 files changed, 713 insertions, 7 deletions
@@ -1,8 +1,10 @@ -1.02 Mon Apr 04 2016 22:21:03 GMT+0300 +1.02 Update MyHTML source. up to 1.0.2 - Release version Added method for set tree parse flags myhtml_tree_parse_flags_set Added methods for Node get_nodes_by_attribute_key, get_nodes_by_attribute_value_whitespace_separated, get_nodes_by_attribute_value_begin, get_nodes_by_attribute_value_end, get_nodes_by_attribute_value_contain, get_nodes_by_attribute_value_hyphen_separated, get_nodes_by_tag_id - + Added callback methods for Tree: callback_before_token_done_set, callback_after_token_done_set, callback_node_insert_set, callback_node_remove_set + Added example for callback: see examples/callback.pl + Added Token Node methods 0.35 Mon Apr 04 2016 22:21:03 GMT+0300 Fixes for build in NetBSD @@ -10,10 +10,12 @@ typemap examples/detect_encoding.pl examples/clear_text.pl +examples/callback.pl xs/tree.xs xs/tree_attr.xs xs/tree_node.xs +xs/token_node.xs source/myhtml/api.h source/myhtml/api_encoding.h @@ -466,6 +466,42 @@ Return: array list of elements HTML::MyHTML::Tree::Node Return: array list of elements HTML::MyHTML::Tree::Node +=head3 callback_before_token_done_set + +Set callback for tokens before processing. + +Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option; + + $tree->callback_before_token_done_set($sub_callback [, $ctx]); + + +=head3 callback_after_token_done_set + +Set callback for tokens after processing + +Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option; + + $tree->callback_after_token_done_set($sub_callback [, $ctx]); + + +=head3 callback_node_insert_set + +Set callback for tree node after inserted + +Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option; + + $tree->callback_node_insert_set($sub_callback [, $ctx]); + + +=head3 callback_node_remove_set + +Set callback for tree node after removed + +Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option; + + $tree->callback_node_remove_set($sub_callback [, $ctx]); + + =head2 Attributes =head3 info @@ -599,6 +635,15 @@ Get last child of node Return: HTML::MyHTML::Tree::Node if exists, otherwise an UNDEF value +=head3 token + +Get token node + + my $token_node = $node->token(); + +Return: HTML::MyHTML::Token::Node if exists, otherwise an UNDEF value + + =head3 get_nodes_by_attribute_key Get nodes by attribute key of current node @@ -828,6 +873,87 @@ Print tree of a node. Print including current node $node->print_all($tree, $fh); +=head2 Token Node + +=head3 info + +Get information of token node: tag name, tag id, attr + + my $res = $token_node->info($tree); + +Return: hash ref + + +=head3 tag_id + +Get token node tag id + + my $tag_id = $token_node->tag_id(); + +Return: tag_id + + +=head3 tag_name + +Get tag name of a token node + + my $res = $token_node->tag_name($tree); + +Return: tag name + + +=head3 is_close_self + +Node has self-closing flag? + + my $bool = $token_node->is_close_self(); + +Return: 1 (true) or 0 (false) + + +=head3 attr_first + +Get first attribute of a token node + + my $attr = $token_node->attr_first(); + +Return: HTML::MyHTML::Tree::Attr if exists, otherwise an UNDEF value + + +=head3 attr_last + +Get last attribute of a token node + + my $attr = $token_node->attr_last(); + +Return: HTML::MyHTML::Tree::Attr if exists, otherwise an UNDEF value + + +=head3 text + +Get text of a token node. Only for a MyHTML_TAG__TEXT or MyHTML_TAG__COMMENT tags + + my $res = $token_node->text(); + +Return: text if exists, otherwise an UNDEF value + + +=head3 string + +Get myhtml_string_t object by token node + + my $string = $token_node->string(); + +Return: HTML::MyHTML::String if exists, otherwise an NULL value + + +=head3 wait_for_done + +Wait for process token all parsing stage. Need if you use thread mode + + $token_node->wait_for_done(); + + =head2 Detect encoding =head3 encoding_detect @@ -61,6 +61,13 @@ typedef myhtml_tag_index_t * HTML__MyHTML__Tag__Index; typedef myhtml_tag_index_node_t * HTML__MyHTML__Tag__Index__Node; typedef myhtml_collection_t * HTML__MyHTML__Collection; typedef myhtml_string_t * HTML__MyHTML__String; +typedef myhtml_token_node_t * HTML__MyHTML__Token__Node; + +struct myhtml_perl_callback_ctx { + SV* callback; + SV* ctx; +} +typedef myhtml_perl_callback_ctx_t; typedef myhtml_collection_t* (*myhtml_perl_get_attr_by_val_f)(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, myhtml_status_t* status); @@ -106,12 +113,10 @@ HV * sm_get_attr_info(myhtml_tree_attr_t* attr) return hash; } -HV * sm_get_node_attr_info(myhtml_tree_node_t* node) +HV * sm_get_node_attr_info(myhtml_tree_attr_t* attr) { HV* hash = newHV(); - myhtml_tree_attr_t* attr = myhtml_node_attribute_first(node); - while(attr) { size_t name_len, value_len; @@ -161,9 +166,18 @@ HV * sm_get_node_info(myhtml_tree_t *tree, myhtml_tree_node_t *node) size_t length; const char* tag_name = myhtml_tag_name_by_id(tree, myhtml_node_tag_id(node), &length); + myhtml_position_t element_pos = myhtml_node_element_pasition(node); + myhtml_position_t raw_pos = myhtml_node_raw_pasition(node); + ha = hv_store(hash, "tag", 3, newSVpv(tag_name, length), 0); ha = hv_store(hash, "tag_id", 6, newSViv(myhtml_node_tag_id(node)), 0); + ha = hv_store(hash, "element_begin", 13, newSViv(element_pos.begin), 0); + ha = hv_store(hash, "element_length", 14, newSViv(element_pos.length), 0); + + ha = hv_store(hash, "raw_begin", 9, newSViv(raw_pos.begin), 0); + ha = hv_store(hash, "raw_length", 10, newSViv(raw_pos.length), 0); + switch (myhtml_node_namespace(node)) { case MyHTML_NAMESPACE_SVG: @@ -179,7 +193,32 @@ HV * sm_get_node_info(myhtml_tree_t *tree, myhtml_tree_node_t *node) hv_store(hash, "namespace_id", 12, newSViv(myhtml_node_namespace(node)), 0); - hv_store(hash, "attr", 4, newRV_noinc((SV *)sm_get_node_attr_info(node)), 0); + hv_store(hash, "attr", 4, newRV_noinc((SV *)sm_get_node_attr_info( myhtml_node_attribute_first(node) )), 0); + + return hash; +} + +HV * sm_get_token_node_info(myhtml_tree_t *tree, myhtml_token_node_t *token_node) +{ + HV* hash = newHV(); + SV **ha; + + size_t length; + const char* tag_name = myhtml_tag_name_by_id(tree, myhtml_token_node_tag_id(token_node), &length); + + myhtml_position_t element_pos = myhtml_token_node_element_pasition(token_node); + myhtml_position_t raw_pos = myhtml_token_node_raw_pasition(token_node); + + ha = hv_store(hash, "tag", 3, newSVpv(tag_name, length), 0); + ha = hv_store(hash, "tag_id", 6, newSViv(myhtml_token_node_tag_id(token_node)), 0); + + ha = hv_store(hash, "element_begin", 13, newSViv(element_pos.begin), 0); + ha = hv_store(hash, "element_length", 14, newSViv(element_pos.length), 0); + + ha = hv_store(hash, "raw_begin", 9, newSViv(raw_pos.begin), 0); + ha = hv_store(hash, "raw_length", 10, newSViv(raw_pos.length), 0); + + hv_store(hash, "attr", 4, newRV_noinc((SV *)sm_get_node_attr_info( myhtml_token_node_attribute_first(token_node) )), 0); return hash; } @@ -253,6 +292,71 @@ SV* sm_get_nodes_by_attribute_value(myhtml_tree_node_t* node, myhtml_tree_t* tre return &PL_sv_undef; } +void * myhtml_perl_callback_token_done(myhtml_tree_t* tree, myhtml_token_node_t* token, void* ctx) +{ + myhtml_perl_callback_ctx_t *perl_ctx = (myhtml_perl_callback_ctx_t *)ctx; + + { + dSP; + + ENTER; + SAVETMPS; + + SV *perl_tree = sv_newmortal(); + sv_setref_pv(perl_tree, "HTML::MyHTML::Tree", (void*)tree); + + SV *perl_token = sv_newmortal(); + sv_setref_pv(perl_token, "HTML::MyHTML::Token::Node", (void*)token); + + PUSHMARK(sp); + XPUSHs(perl_tree); + XPUSHs(perl_token); + + if(perl_ctx->ctx) { + XPUSHs(perl_ctx->ctx); + } + PUTBACK; + + call_sv((SV *)perl_ctx->callback, G_SCALAR); + + FREETMPS; + LEAVE; + } + + return ctx; +} + +void myhtml_perl_callback_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, void* ctx) +{ + myhtml_perl_callback_ctx_t *perl_ctx = (myhtml_perl_callback_ctx_t *)ctx; + + { + dSP; + + ENTER; + SAVETMPS; + + SV *perl_tree = sv_newmortal(); + sv_setref_pv(perl_tree, "HTML::MyHTML::Tree", (void*)tree); + + SV *perl_node = sv_newmortal(); + sv_setref_pv(perl_node, "HTML::MyHTML::Tree::Node", (void*)node); + + PUSHMARK(sp); + XPUSHs(perl_tree); + XPUSHs(perl_node); + + if(perl_ctx->ctx) { + XPUSHs(perl_ctx->ctx); + } + PUTBACK; + + call_sv((SV *)perl_ctx->callback, G_SCALAR); + + FREETMPS; + LEAVE; + } +} //#### //# @@ -333,6 +437,7 @@ new_tree(myhtml, out_status = &PL_sv_undef) INCLUDE: xs/tree.xs INCLUDE: xs/tree_node.xs INCLUDE: xs/tree_attr.xs +INCLUDE: xs/token_node.xs #### # @@ -519,6 +624,7 @@ parse_chunk_end(myhtml, tree) OUTPUT: RETVAL + #************************************************************************************ # # MyHTML_TREE @@ -586,6 +692,20 @@ tree_destroy(tree) HTML::MyHTML::Tree tree; CODE: + if(tree) { + if(tree->callback_before_token_ctx) + free(tree->callback_before_token_ctx); + + if(tree->callback_after_token_ctx) + free(tree->callback_after_token_ctx); + + if(tree->callback_tree_node_insert_ctx) + free(tree->callback_tree_node_insert_ctx); + + if(tree->callback_tree_node_remove_ctx) + free(tree->callback_tree_node_remove_ctx); + } + RETVAL = myhtml_tree_destroy(tree); OUTPUT: RETVAL @@ -673,6 +793,145 @@ tree_print_node(tree, node, fh) CODE: myhtml_tree_print_node(tree, node, fh); +void +callback_before_token_done_set(tree, callback, ctx = &PL_sv_undef) + HTML::MyHTML::Tree tree; + SV* callback; + SV* ctx; + + CODE: + if(SvOK(callback)) { + myhtml_perl_callback_ctx_t *perl_ctx; + + if(tree->callback_before_token_ctx) { + perl_ctx = (myhtml_perl_callback_ctx_t*)tree->callback_before_token_ctx; + } + else { + perl_ctx = (myhtml_perl_callback_ctx_t*)calloc(1, sizeof(myhtml_perl_callback_ctx_t)); + } + + setbuf(stdout, NULL); + + if(perl_ctx) + { + perl_ctx->callback = newSVsv(callback); + perl_ctx->ctx = newSVsv(ctx); + + tree->callback_before_token = myhtml_perl_callback_token_done; + tree->callback_before_token_ctx = perl_ctx; + } + } + else { + if(tree->callback_before_token_ctx) + free(tree->callback_before_token_ctx); + + tree->callback_before_token = NULL; + tree->callback_before_token_ctx = NULL; + } + +void +callback_after_token_done_set(tree, callback, ctx = &PL_sv_undef) + HTML::MyHTML::Tree tree; + SV* callback; + SV* ctx; + + CODE: + if(SvOK(callback)) { + myhtml_perl_callback_ctx_t *perl_ctx; + + if(tree->callback_after_token_ctx) { + perl_ctx = (myhtml_perl_callback_ctx_t*)tree->callback_after_token_ctx; + } + else { + perl_ctx = (myhtml_perl_callback_ctx_t*)calloc(1, sizeof(myhtml_perl_callback_ctx_t)); + } + + if(perl_ctx) + { + perl_ctx->callback = newSVsv(callback); + perl_ctx->ctx = newSVsv(ctx); + + tree->callback_after_token = myhtml_perl_callback_token_done; + tree->callback_after_token_ctx = perl_ctx; + } + } + else { + if(tree->callback_after_token_ctx) + free(tree->callback_after_token_ctx); + + tree->callback_after_token = NULL; + tree->callback_after_token_ctx = NULL; + } + +void +callback_node_insert_set(tree, callback, ctx = &PL_sv_undef) + HTML::MyHTML::Tree tree; + SV* callback; + SV* ctx; + + CODE: + if(SvOK(callback)) { + myhtml_perl_callback_ctx_t *perl_ctx; + + if(tree->callback_tree_node_insert_ctx) { + perl_ctx = (myhtml_perl_callback_ctx_t*)tree->callback_tree_node_insert_ctx; + } + else { + perl_ctx = (myhtml_perl_callback_ctx_t*)calloc(1, sizeof(myhtml_perl_callback_ctx_t)); + } + + if(perl_ctx) + { + perl_ctx->callback = newSVsv(callback); + perl_ctx->ctx = newSVsv(ctx); + + tree->callback_tree_node_insert = myhtml_perl_callback_node; + tree->callback_tree_node_insert_ctx = perl_ctx; + } + } + else { + if(tree->callback_tree_node_insert_ctx) + free(tree->callback_tree_node_insert_ctx); + + tree->callback_tree_node_insert = NULL; + tree->callback_tree_node_insert_ctx = NULL; + } + +void +callback_node_remove_set(tree, callback, ctx = &PL_sv_undef) + HTML::MyHTML::Tree tree; + SV* callback; + SV* ctx; + + CODE: + if(SvOK(callback)) { + myhtml_perl_callback_ctx_t *perl_ctx; + + if(tree->callback_tree_node_remove_ctx) { + perl_ctx = (myhtml_perl_callback_ctx_t*)tree->callback_tree_node_remove_ctx; + } + else { + perl_ctx = (myhtml_perl_callback_ctx_t*)calloc(1, sizeof(myhtml_perl_callback_ctx_t)); + } + + if(perl_ctx) + { + perl_ctx->callback = newSVsv(callback); + perl_ctx->ctx = newSVsv(ctx); + + tree->callback_tree_node_remove = myhtml_perl_callback_node; + tree->callback_tree_node_remove_ctx = perl_ctx; + } + } + else { + if(tree->callback_tree_node_remove_ctx) + free(tree->callback_tree_node_remove_ctx); + + tree->callback_tree_node_remove = NULL; + tree->callback_tree_node_remove_ctx = NULL; + } + + #************************************************************************************ # # MyHTML_NODE @@ -5,7 +5,7 @@ DESCRIPTION Fast HTML Parser using Threads with no outside dependencies -This Parser based on MyHTML library (it includes version 0.6.1) +This Parser based on MyHTML library (it includes version 1.0.2) See https://github.com/lexborisov/myhtml @@ -401,6 +401,45 @@ Return: array list of elements HTML::MyHTML::Tree::Node Return: array list of elements HTML::MyHTML::Tree::Node +### callback_before_token_done_set + +Set callback for tokens before processing. + +Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option; + +```perl + $tree->callback_before_token_done_set($sub_callback [, $ctx]); +``` + +### callback_after_token_done_set + +Set callback for tokens after processing + +Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option; + +```perl + $tree->callback_after_token_done_set($sub_callback [, $ctx]); +``` + +### callback_node_insert_set + +Set callback for tree node after inserted + +Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option; + +```perl + $tree->callback_node_insert_set($sub_callback [, $ctx]); +``` + +### callback_node_remove_set + +Set callback for tree node after removed + +Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option; + +```perl + $tree->callback_node_remove_set($sub_callback [, $ctx]); +``` ## Attributes @@ -564,6 +603,16 @@ Get last child of node Return: HTML::MyHTML::Tree::Node if exists, otherwise an UNDEF value +### token + +Get token node + +```perl + my $token_node = $node->token(); +``` + +Return: HTML::MyHTML::Token::Node if exists, otherwise an UNDEF value + ### get_nodes_by_attribute_key Get nodes by attribute key of current node @@ -844,6 +893,95 @@ Print tree of a node. Print including current node $node->print_all($tree, $fh); ``` +## Token Node + +### info + +Get information of token node: tag name, tag id, attr + +```perl + my $res = $token_node->info($tree); +``` + +Return: hash ref + +### tag_id + +Get token node tag id + +```perl + my $tag_id = $token_node->tag_id(); +``` + +Return: tag_id + +### tag_name + +Get tag name of a token node + +```perl + my $res = $token_node->tag_name($tree); +``` + +Return: tag name + +### is_close_self + +Node has self-closing flag? + +```perl + my $bool = $token_node->is_close_self(); +``` + +Return: 1 (true) or 0 (false) + +### attr_first + +Get first attribute of a token node + +```perl + my $attr = $token_node->attr_first(); +``` + +Return: HTML::MyHTML::Tree::Attr if exists, otherwise an UNDEF value + +### attr_last + +Get last attribute of a token node + +```perl + my $attr = $token_node->attr_last(); +``` + +Return: HTML::MyHTML::Tree::Attr if exists, otherwise an UNDEF value + +### text + +Get text of a token node. Only for a MyHTML_TAG__TEXT or MyHTML_TAG__COMMENT tags + +```perl + my $res = $token_node->text(); +``` + +Return: text if exists, otherwise an UNDEF value + +### string + +Get myhtml_string_t object by token node + +```perl + my $string = $token_node->string(); +``` + +Return: HTML::MyHTML::String if exists, otherwise an NULL value + +### wait_for_done + +Wait for process token all parsing stage. Need if you use thread mode + +```perl + $token_node->wait_for_done(); +``` ## Detect encoding diff --git a/examples/callback.pl b/examples/callback.pl new file mode 100644 index 0000000..417716d --- /dev/null +++ b/examples/callback.pl @@ -0,0 +1,45 @@ +#!/usr/bin/perl -w + +use utf8; +use strict; +use Encode; + +use HTML::MyHTML; +use LWP::UserAgent; + +my $ua = LWP::UserAgent->new; +my $req = HTTP::Request->new(GET => "http://edition.cnn.com/2016/03/22/weather/great-barrier-reef-coral-bleaching/index.html"); +my $res = $ua->request($req); + +my $body = $res->content; + +# init +# is normally if parse thread only one, otherwise use single mode MyHTML_OPTIONS_PARSE_MODE_SINGLE +# or methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single +# for development use single mode, it will be easier to debug +my $myhtml = HTML::MyHTML->new(MyHTML_OPTIONS_DEFAULT, 1); +my $tree = $myhtml->new_tree(); + +# detect encoding +my $encoding; +$myhtml->encoding_detect($body, $encoding); + +my $args = {count => 0}; + +$tree->callback_before_token_done_set(sub { + my ($tree, $token_node, $ctx) = @_; + use bytes; + + $ctx->{count}++; + + my $info = $token_node->info($tree); + + my $str = substr $body, $info->{element_begin}, $info->{element_length}; + print $str, "\n"; + +}, $args); + +# parse +$myhtml->parse($tree, $encoding, $body); + +print "Total count: ", $args->{count}, "\n"; @@ -11,6 +11,7 @@ myhtml_string_t * T_PTROBJ myhtml_tree_attr_t * T_PTROBJ myhtml_tag_index_entry_t * T_PTROBJ myhtml_tag_index_node_t * T_PTROBJ +myhtml_token_node_t * T_PTROBJ myhtml_status_t T_IV myhtml_encoding_t T_IV @@ -33,3 +34,4 @@ HTML::MyHTML::Tag::Index::Node T_PTROBJ HTML::MyHTML::Collection T_PTROBJ HTML::MyHTML::String T_PTROBJ +HTML::MyHTML::Token::Node T_PTROBJ diff --git a/xs/token_node.xs b/xs/token_node.xs new file mode 100755 index 0000000..7431a79 --- /dev/null +++ b/xs/token_node.xs @@ -0,0 +1,118 @@ +#/* +# Copyright 2015-2016 Alexander Borisov +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: lex.borisov@gmail.com (Alexander Borisov) +#*/ + +MODULE = HTML::MyHTML::Token::Node PACKAGE = HTML::MyHTML::Token::Node +PROTOTYPES: DISABLE + +#=sort 1 + +SV* +info(token_node, tree) + HTML::MyHTML::Token::Node token_node; + HTML::MyHTML::Tree tree; + + CODE: + RETVAL = newRV_noinc((SV *)sm_get_token_node_info(tree, token_node)); + OUTPUT: + RETVAL + +myhtml_tag_id_t +tag_id(token_node) + HTML::MyHTML::Token::Node token_node; + + CODE: + RETVAL = myhtml_token_node_tag_id(token_node); + OUTPUT: + RETVAL + +SV* +tag_name(token_node, tree) + HTML::MyHTML::Token::Node token_node; + HTML::MyHTML::Tree tree; + + CODE: + size_t length; + const char* name = myhtml_tag_name_by_id(tree, myhtml_token_node_tag_id(token_node), &length); + RETVAL = newSVpv(name, length); + OUTPUT: + RETVAL + +#=sort 14 + +bool +is_close_self(token_node) + HTML::MyHTML::Token::Node token_node; + + CODE: + RETVAL = myhtml_token_node_is_close_self(token_node); + OUTPUT: + RETVAL + +#=sort 15 + +HTML::MyHTML::Tree::Attr +attr_first(token_node) + HTML::MyHTML::Token::Node token_node; + + CODE: + RETVAL = myhtml_token_node_attribute_first(token_node); + OUTPUT: + RETVAL + +#=sort 16 + +HTML::MyHTML::Tree::Attr +attr_last(token_node) + HTML::MyHTML::Token::Node token_node; + + CODE: + RETVAL = myhtml_token_node_attribute_last(token_node); + OUTPUT: + RETVAL + +SV* +text(token_node) + HTML::MyHTML::Token::Node token_node; + + CODE: + size_t length; + const char* text = myhtml_token_node_text(token_node, &length); + RETVAL = newSVpv(text, length); + OUTPUT: + RETVAL + +#=sort 21 + +HTML::MyHTML::String +string(token_node) + HTML::MyHTML::Token::Node token_node; + + CODE: + RETVAL = myhtml_token_node_string(token_node); + OUTPUT: + RETVAL + +#=sort 22 + +void +wait_for_done(token_node) + HTML::MyHTML::Token::Node token_node; + + CODE: + myhtml_token_node_wait_for_done(token_node); + diff --git a/xs/tree_node.xs b/xs/tree_node.xs index ee4fe28..6c564c9 100755 --- a/xs/tree_node.xs +++ b/xs/tree_node.xs @@ -103,6 +103,20 @@ last_child(node) #=sort 6 +HTML::MyHTML::Token::Node +token(node) + HTML::MyHTML::Tree::Node node; + + CODE: + RETVAL = myhtml_node_token(node); + OUTPUT: + RETVAL + POSTCALL: + if(RETVAL == NULL) + XSRETURN_UNDEF; + +#=sort 6 + SV* get_nodes_by_attribute_key(node, tree, key, out_status = &PL_sv_undef) HTML::MyHTML::Tree::Node node; |