Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/lexborisov/perl-html-myhtml.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlexborisov <lex.borisov@gmail.com>2016-07-19 20:53:21 +0300
committerlexborisov <lex.borisov@gmail.com>2016-07-19 20:53:21 +0300
commitf9f19e19a58dc24409e09ccfc97bf3830c237ac9 (patch)
tree31e6f02a7cba04235cb578ff52e8cb732ddd3544
parent1b5ac2c1d90ba926b8cc87a3588f8f245379dd94 (diff)
Synchronization with the latest versions of MyHTML
-rw-r--r--Changes6
-rw-r--r--MANIFEST2
-rwxr-xr-xMyHTML.pm126
-rwxr-xr-xMyHTML.xs267
-rw-r--r--README2
-rw-r--r--README.md138
-rw-r--r--examples/callback.pl45
-rwxr-xr-xtypemap2
-rwxr-xr-xxs/token_node.xs118
-rwxr-xr-xxs/tree_node.xs14
10 files changed, 713 insertions, 7 deletions
diff --git a/Changes b/Changes
index b02bbdf..d4acbd1 100644
--- a/Changes
+++ b/Changes
@@ -1,8 +1,10 @@
-1.02 Mon Apr 04 2016 22:21:03 GMT+0300
+1.02
Update MyHTML source. up to 1.0.2 - Release version
Added method for set tree parse flags myhtml_tree_parse_flags_set
Added methods for Node get_nodes_by_attribute_key, get_nodes_by_attribute_value_whitespace_separated, get_nodes_by_attribute_value_begin, get_nodes_by_attribute_value_end, get_nodes_by_attribute_value_contain, get_nodes_by_attribute_value_hyphen_separated, get_nodes_by_tag_id
-
+ Added callback methods for Tree: callback_before_token_done_set, callback_after_token_done_set, callback_node_insert_set, callback_node_remove_set
+ Added example for callback: see examples/callback.pl
+ Added Token Node methods
0.35 Mon Apr 04 2016 22:21:03 GMT+0300
Fixes for build in NetBSD
diff --git a/MANIFEST b/MANIFEST
index 07c21c5..e9dc4fb 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -10,10 +10,12 @@ typemap
examples/detect_encoding.pl
examples/clear_text.pl
+examples/callback.pl
xs/tree.xs
xs/tree_attr.xs
xs/tree_node.xs
+xs/token_node.xs
source/myhtml/api.h
source/myhtml/api_encoding.h
diff --git a/MyHTML.pm b/MyHTML.pm
index a47a699..986ff90 100755
--- a/MyHTML.pm
+++ b/MyHTML.pm
@@ -466,6 +466,42 @@ Return: array list of elements HTML::MyHTML::Tree::Node
Return: array list of elements HTML::MyHTML::Tree::Node
+=head3 callback_before_token_done_set
+
+Set callback for tokens before processing.
+
+Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option;
+
+ $tree->callback_before_token_done_set($sub_callback [, $ctx]);
+
+
+=head3 callback_after_token_done_set
+
+Set callback for tokens after processing
+
+Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option;
+
+ $tree->callback_after_token_done_set($sub_callback [, $ctx]);
+
+
+=head3 callback_node_insert_set
+
+Set callback for tree node after inserted
+
+Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option;
+
+ $tree->callback_node_insert_set($sub_callback [, $ctx]);
+
+
+=head3 callback_node_remove_set
+
+Set callback for tree node after removed
+
+Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option;
+
+ $tree->callback_node_remove_set($sub_callback [, $ctx]);
+
+
=head2 Attributes
=head3 info
@@ -599,6 +635,15 @@ Get last child of node
Return: HTML::MyHTML::Tree::Node if exists, otherwise an UNDEF value
+=head3 token
+
+Get token node
+
+ my $token_node = $node->token();
+
+Return: HTML::MyHTML::Token::Node if exists, otherwise an UNDEF value
+
+
=head3 get_nodes_by_attribute_key
Get nodes by attribute key of current node
@@ -828,6 +873,87 @@ Print tree of a node. Print including current node
$node->print_all($tree, $fh);
+=head2 Token Node
+
+=head3 info
+
+Get information of token node: tag name, tag id, attr
+
+ my $res = $token_node->info($tree);
+
+Return: hash ref
+
+
+=head3 tag_id
+
+Get token node tag id
+
+ my $tag_id = $token_node->tag_id();
+
+Return: tag_id
+
+
+=head3 tag_name
+
+Get tag name of a token node
+
+ my $res = $token_node->tag_name($tree);
+
+Return: tag name
+
+
+=head3 is_close_self
+
+Node has self-closing flag?
+
+ my $bool = $token_node->is_close_self();
+
+Return: 1 (true) or 0 (false)
+
+
+=head3 attr_first
+
+Get first attribute of a token node
+
+ my $attr = $token_node->attr_first();
+
+Return: HTML::MyHTML::Tree::Attr if exists, otherwise an UNDEF value
+
+
+=head3 attr_last
+
+Get last attribute of a token node
+
+ my $attr = $token_node->attr_last();
+
+Return: HTML::MyHTML::Tree::Attr if exists, otherwise an UNDEF value
+
+
+=head3 text
+
+Get text of a token node. Only for a MyHTML_TAG__TEXT or MyHTML_TAG__COMMENT tags
+
+ my $res = $token_node->text();
+
+Return: text if exists, otherwise an UNDEF value
+
+
+=head3 string
+
+Get myhtml_string_t object by token node
+
+ my $string = $token_node->string();
+
+Return: HTML::MyHTML::String if exists, otherwise an NULL value
+
+
+=head3 wait_for_done
+
+Wait for process token all parsing stage. Need if you use thread mode
+
+ $token_node->wait_for_done();
+
+
=head2 Detect encoding
=head3 encoding_detect
diff --git a/MyHTML.xs b/MyHTML.xs
index 69f0f7d..7ee90a2 100755
--- a/MyHTML.xs
+++ b/MyHTML.xs
@@ -61,6 +61,13 @@ typedef myhtml_tag_index_t * HTML__MyHTML__Tag__Index;
typedef myhtml_tag_index_node_t * HTML__MyHTML__Tag__Index__Node;
typedef myhtml_collection_t * HTML__MyHTML__Collection;
typedef myhtml_string_t * HTML__MyHTML__String;
+typedef myhtml_token_node_t * HTML__MyHTML__Token__Node;
+
+struct myhtml_perl_callback_ctx {
+ SV* callback;
+ SV* ctx;
+}
+typedef myhtml_perl_callback_ctx_t;
typedef myhtml_collection_t* (*myhtml_perl_get_attr_by_val_f)(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node, bool case_insensitive,
const char* key, size_t key_len, const char* value, size_t value_len, myhtml_status_t* status);
@@ -106,12 +113,10 @@ HV * sm_get_attr_info(myhtml_tree_attr_t* attr)
return hash;
}
-HV * sm_get_node_attr_info(myhtml_tree_node_t* node)
+HV * sm_get_node_attr_info(myhtml_tree_attr_t* attr)
{
HV* hash = newHV();
- myhtml_tree_attr_t* attr = myhtml_node_attribute_first(node);
-
while(attr)
{
size_t name_len, value_len;
@@ -161,9 +166,18 @@ HV * sm_get_node_info(myhtml_tree_t *tree, myhtml_tree_node_t *node)
size_t length;
const char* tag_name = myhtml_tag_name_by_id(tree, myhtml_node_tag_id(node), &length);
+ myhtml_position_t element_pos = myhtml_node_element_pasition(node);
+ myhtml_position_t raw_pos = myhtml_node_raw_pasition(node);
+
ha = hv_store(hash, "tag", 3, newSVpv(tag_name, length), 0);
ha = hv_store(hash, "tag_id", 6, newSViv(myhtml_node_tag_id(node)), 0);
+ ha = hv_store(hash, "element_begin", 13, newSViv(element_pos.begin), 0);
+ ha = hv_store(hash, "element_length", 14, newSViv(element_pos.length), 0);
+
+ ha = hv_store(hash, "raw_begin", 9, newSViv(raw_pos.begin), 0);
+ ha = hv_store(hash, "raw_length", 10, newSViv(raw_pos.length), 0);
+
switch (myhtml_node_namespace(node))
{
case MyHTML_NAMESPACE_SVG:
@@ -179,7 +193,32 @@ HV * sm_get_node_info(myhtml_tree_t *tree, myhtml_tree_node_t *node)
hv_store(hash, "namespace_id", 12, newSViv(myhtml_node_namespace(node)), 0);
- hv_store(hash, "attr", 4, newRV_noinc((SV *)sm_get_node_attr_info(node)), 0);
+ hv_store(hash, "attr", 4, newRV_noinc((SV *)sm_get_node_attr_info( myhtml_node_attribute_first(node) )), 0);
+
+ return hash;
+}
+
+HV * sm_get_token_node_info(myhtml_tree_t *tree, myhtml_token_node_t *token_node)
+{
+ HV* hash = newHV();
+ SV **ha;
+
+ size_t length;
+ const char* tag_name = myhtml_tag_name_by_id(tree, myhtml_token_node_tag_id(token_node), &length);
+
+ myhtml_position_t element_pos = myhtml_token_node_element_pasition(token_node);
+ myhtml_position_t raw_pos = myhtml_token_node_raw_pasition(token_node);
+
+ ha = hv_store(hash, "tag", 3, newSVpv(tag_name, length), 0);
+ ha = hv_store(hash, "tag_id", 6, newSViv(myhtml_token_node_tag_id(token_node)), 0);
+
+ ha = hv_store(hash, "element_begin", 13, newSViv(element_pos.begin), 0);
+ ha = hv_store(hash, "element_length", 14, newSViv(element_pos.length), 0);
+
+ ha = hv_store(hash, "raw_begin", 9, newSViv(raw_pos.begin), 0);
+ ha = hv_store(hash, "raw_length", 10, newSViv(raw_pos.length), 0);
+
+ hv_store(hash, "attr", 4, newRV_noinc((SV *)sm_get_node_attr_info( myhtml_token_node_attribute_first(token_node) )), 0);
return hash;
}
@@ -253,6 +292,71 @@ SV* sm_get_nodes_by_attribute_value(myhtml_tree_node_t* node, myhtml_tree_t* tre
return &PL_sv_undef;
}
+void * myhtml_perl_callback_token_done(myhtml_tree_t* tree, myhtml_token_node_t* token, void* ctx)
+{
+ myhtml_perl_callback_ctx_t *perl_ctx = (myhtml_perl_callback_ctx_t *)ctx;
+
+ {
+ dSP;
+
+ ENTER;
+ SAVETMPS;
+
+ SV *perl_tree = sv_newmortal();
+ sv_setref_pv(perl_tree, "HTML::MyHTML::Tree", (void*)tree);
+
+ SV *perl_token = sv_newmortal();
+ sv_setref_pv(perl_token, "HTML::MyHTML::Token::Node", (void*)token);
+
+ PUSHMARK(sp);
+ XPUSHs(perl_tree);
+ XPUSHs(perl_token);
+
+ if(perl_ctx->ctx) {
+ XPUSHs(perl_ctx->ctx);
+ }
+ PUTBACK;
+
+ call_sv((SV *)perl_ctx->callback, G_SCALAR);
+
+ FREETMPS;
+ LEAVE;
+ }
+
+ return ctx;
+}
+
+void myhtml_perl_callback_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, void* ctx)
+{
+ myhtml_perl_callback_ctx_t *perl_ctx = (myhtml_perl_callback_ctx_t *)ctx;
+
+ {
+ dSP;
+
+ ENTER;
+ SAVETMPS;
+
+ SV *perl_tree = sv_newmortal();
+ sv_setref_pv(perl_tree, "HTML::MyHTML::Tree", (void*)tree);
+
+ SV *perl_node = sv_newmortal();
+ sv_setref_pv(perl_node, "HTML::MyHTML::Tree::Node", (void*)node);
+
+ PUSHMARK(sp);
+ XPUSHs(perl_tree);
+ XPUSHs(perl_node);
+
+ if(perl_ctx->ctx) {
+ XPUSHs(perl_ctx->ctx);
+ }
+ PUTBACK;
+
+ call_sv((SV *)perl_ctx->callback, G_SCALAR);
+
+ FREETMPS;
+ LEAVE;
+ }
+}
//####
//#
@@ -333,6 +437,7 @@ new_tree(myhtml, out_status = &PL_sv_undef)
INCLUDE: xs/tree.xs
INCLUDE: xs/tree_node.xs
INCLUDE: xs/tree_attr.xs
+INCLUDE: xs/token_node.xs
####
#
@@ -519,6 +624,7 @@ parse_chunk_end(myhtml, tree)
OUTPUT:
RETVAL
+
#************************************************************************************
#
# MyHTML_TREE
@@ -586,6 +692,20 @@ tree_destroy(tree)
HTML::MyHTML::Tree tree;
CODE:
+ if(tree) {
+ if(tree->callback_before_token_ctx)
+ free(tree->callback_before_token_ctx);
+
+ if(tree->callback_after_token_ctx)
+ free(tree->callback_after_token_ctx);
+
+ if(tree->callback_tree_node_insert_ctx)
+ free(tree->callback_tree_node_insert_ctx);
+
+ if(tree->callback_tree_node_remove_ctx)
+ free(tree->callback_tree_node_remove_ctx);
+ }
+
RETVAL = myhtml_tree_destroy(tree);
OUTPUT:
RETVAL
@@ -673,6 +793,145 @@ tree_print_node(tree, node, fh)
CODE:
myhtml_tree_print_node(tree, node, fh);
+void
+callback_before_token_done_set(tree, callback, ctx = &PL_sv_undef)
+ HTML::MyHTML::Tree tree;
+ SV* callback;
+ SV* ctx;
+
+ CODE:
+ if(SvOK(callback)) {
+ myhtml_perl_callback_ctx_t *perl_ctx;
+
+ if(tree->callback_before_token_ctx) {
+ perl_ctx = (myhtml_perl_callback_ctx_t*)tree->callback_before_token_ctx;
+ }
+ else {
+ perl_ctx = (myhtml_perl_callback_ctx_t*)calloc(1, sizeof(myhtml_perl_callback_ctx_t));
+ }
+
+ setbuf(stdout, NULL);
+
+ if(perl_ctx)
+ {
+ perl_ctx->callback = newSVsv(callback);
+ perl_ctx->ctx = newSVsv(ctx);
+
+ tree->callback_before_token = myhtml_perl_callback_token_done;
+ tree->callback_before_token_ctx = perl_ctx;
+ }
+ }
+ else {
+ if(tree->callback_before_token_ctx)
+ free(tree->callback_before_token_ctx);
+
+ tree->callback_before_token = NULL;
+ tree->callback_before_token_ctx = NULL;
+ }
+
+void
+callback_after_token_done_set(tree, callback, ctx = &PL_sv_undef)
+ HTML::MyHTML::Tree tree;
+ SV* callback;
+ SV* ctx;
+
+ CODE:
+ if(SvOK(callback)) {
+ myhtml_perl_callback_ctx_t *perl_ctx;
+
+ if(tree->callback_after_token_ctx) {
+ perl_ctx = (myhtml_perl_callback_ctx_t*)tree->callback_after_token_ctx;
+ }
+ else {
+ perl_ctx = (myhtml_perl_callback_ctx_t*)calloc(1, sizeof(myhtml_perl_callback_ctx_t));
+ }
+
+ if(perl_ctx)
+ {
+ perl_ctx->callback = newSVsv(callback);
+ perl_ctx->ctx = newSVsv(ctx);
+
+ tree->callback_after_token = myhtml_perl_callback_token_done;
+ tree->callback_after_token_ctx = perl_ctx;
+ }
+ }
+ else {
+ if(tree->callback_after_token_ctx)
+ free(tree->callback_after_token_ctx);
+
+ tree->callback_after_token = NULL;
+ tree->callback_after_token_ctx = NULL;
+ }
+
+void
+callback_node_insert_set(tree, callback, ctx = &PL_sv_undef)
+ HTML::MyHTML::Tree tree;
+ SV* callback;
+ SV* ctx;
+
+ CODE:
+ if(SvOK(callback)) {
+ myhtml_perl_callback_ctx_t *perl_ctx;
+
+ if(tree->callback_tree_node_insert_ctx) {
+ perl_ctx = (myhtml_perl_callback_ctx_t*)tree->callback_tree_node_insert_ctx;
+ }
+ else {
+ perl_ctx = (myhtml_perl_callback_ctx_t*)calloc(1, sizeof(myhtml_perl_callback_ctx_t));
+ }
+
+ if(perl_ctx)
+ {
+ perl_ctx->callback = newSVsv(callback);
+ perl_ctx->ctx = newSVsv(ctx);
+
+ tree->callback_tree_node_insert = myhtml_perl_callback_node;
+ tree->callback_tree_node_insert_ctx = perl_ctx;
+ }
+ }
+ else {
+ if(tree->callback_tree_node_insert_ctx)
+ free(tree->callback_tree_node_insert_ctx);
+
+ tree->callback_tree_node_insert = NULL;
+ tree->callback_tree_node_insert_ctx = NULL;
+ }
+
+void
+callback_node_remove_set(tree, callback, ctx = &PL_sv_undef)
+ HTML::MyHTML::Tree tree;
+ SV* callback;
+ SV* ctx;
+
+ CODE:
+ if(SvOK(callback)) {
+ myhtml_perl_callback_ctx_t *perl_ctx;
+
+ if(tree->callback_tree_node_remove_ctx) {
+ perl_ctx = (myhtml_perl_callback_ctx_t*)tree->callback_tree_node_remove_ctx;
+ }
+ else {
+ perl_ctx = (myhtml_perl_callback_ctx_t*)calloc(1, sizeof(myhtml_perl_callback_ctx_t));
+ }
+
+ if(perl_ctx)
+ {
+ perl_ctx->callback = newSVsv(callback);
+ perl_ctx->ctx = newSVsv(ctx);
+
+ tree->callback_tree_node_remove = myhtml_perl_callback_node;
+ tree->callback_tree_node_remove_ctx = perl_ctx;
+ }
+ }
+ else {
+ if(tree->callback_tree_node_remove_ctx)
+ free(tree->callback_tree_node_remove_ctx);
+
+ tree->callback_tree_node_remove = NULL;
+ tree->callback_tree_node_remove_ctx = NULL;
+ }
+
+
#************************************************************************************
#
# MyHTML_NODE
diff --git a/README b/README
index fb2b784..8dcf5c2 100644
--- a/README
+++ b/README
@@ -5,7 +5,7 @@ DESCRIPTION
Fast HTML Parser using Threads with no outside dependencies
-This Parser based on MyHTML library (it includes version 0.6.1)
+This Parser based on MyHTML library (it includes version 1.0.2)
See https://github.com/lexborisov/myhtml
diff --git a/README.md b/README.md
index 6832b06..645e746 100644
--- a/README.md
+++ b/README.md
@@ -401,6 +401,45 @@ Return: array list of elements HTML::MyHTML::Tree::Node
Return: array list of elements HTML::MyHTML::Tree::Node
+### callback_before_token_done_set
+
+Set callback for tokens before processing.
+
+Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option;
+
+```perl
+ $tree->callback_before_token_done_set($sub_callback [, $ctx]);
+```
+
+### callback_after_token_done_set
+
+Set callback for tokens after processing
+
+Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option;
+
+```perl
+ $tree->callback_after_token_done_set($sub_callback [, $ctx]);
+```
+
+### callback_node_insert_set
+
+Set callback for tree node after inserted
+
+Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option;
+
+```perl
+ $tree->callback_node_insert_set($sub_callback [, $ctx]);
+```
+
+### callback_node_remove_set
+
+Set callback for tree node after removed
+
+Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Build without threads or use methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single or create myhtml with MyHTML_OPTIONS_PARSE_MODE_SINGLE option;
+
+```perl
+ $tree->callback_node_remove_set($sub_callback [, $ctx]);
+```
## Attributes
@@ -564,6 +603,16 @@ Get last child of node
Return: HTML::MyHTML::Tree::Node if exists, otherwise an UNDEF value
+### token
+
+Get token node
+
+```perl
+ my $token_node = $node->token();
+```
+
+Return: HTML::MyHTML::Token::Node if exists, otherwise an UNDEF value
+
### get_nodes_by_attribute_key
Get nodes by attribute key of current node
@@ -844,6 +893,95 @@ Print tree of a node. Print including current node
$node->print_all($tree, $fh);
```
+## Token Node
+
+### info
+
+Get information of token node: tag name, tag id, attr
+
+```perl
+ my $res = $token_node->info($tree);
+```
+
+Return: hash ref
+
+### tag_id
+
+Get token node tag id
+
+```perl
+ my $tag_id = $token_node->tag_id();
+```
+
+Return: tag_id
+
+### tag_name
+
+Get tag name of a token node
+
+```perl
+ my $res = $token_node->tag_name($tree);
+```
+
+Return: tag name
+
+### is_close_self
+
+Node has self-closing flag?
+
+```perl
+ my $bool = $token_node->is_close_self();
+```
+
+Return: 1 (true) or 0 (false)
+
+### attr_first
+
+Get first attribute of a token node
+
+```perl
+ my $attr = $token_node->attr_first();
+```
+
+Return: HTML::MyHTML::Tree::Attr if exists, otherwise an UNDEF value
+
+### attr_last
+
+Get last attribute of a token node
+
+```perl
+ my $attr = $token_node->attr_last();
+```
+
+Return: HTML::MyHTML::Tree::Attr if exists, otherwise an UNDEF value
+
+### text
+
+Get text of a token node. Only for a MyHTML_TAG__TEXT or MyHTML_TAG__COMMENT tags
+
+```perl
+ my $res = $token_node->text();
+```
+
+Return: text if exists, otherwise an UNDEF value
+
+### string
+
+Get myhtml_string_t object by token node
+
+```perl
+ my $string = $token_node->string();
+```
+
+Return: HTML::MyHTML::String if exists, otherwise an NULL value
+
+### wait_for_done
+
+Wait for process token all parsing stage. Need if you use thread mode
+
+```perl
+ $token_node->wait_for_done();
+```
## Detect encoding
diff --git a/examples/callback.pl b/examples/callback.pl
new file mode 100644
index 0000000..417716d
--- /dev/null
+++ b/examples/callback.pl
@@ -0,0 +1,45 @@
+#!/usr/bin/perl -w
+
+use utf8;
+use strict;
+use Encode;
+
+use HTML::MyHTML;
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new;
+my $req = HTTP::Request->new(GET => "http://edition.cnn.com/2016/03/22/weather/great-barrier-reef-coral-bleaching/index.html");
+my $res = $ua->request($req);
+
+my $body = $res->content;
+
+# init
+# is normally if parse thread only one, otherwise use single mode MyHTML_OPTIONS_PARSE_MODE_SINGLE
+# or methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single
+# for development use single mode, it will be easier to debug
+my $myhtml = HTML::MyHTML->new(MyHTML_OPTIONS_DEFAULT, 1);
+my $tree = $myhtml->new_tree();
+
+# detect encoding
+my $encoding;
+$myhtml->encoding_detect($body, $encoding);
+
+my $args = {count => 0};
+
+$tree->callback_before_token_done_set(sub {
+ my ($tree, $token_node, $ctx) = @_;
+ use bytes;
+
+ $ctx->{count}++;
+
+ my $info = $token_node->info($tree);
+
+ my $str = substr $body, $info->{element_begin}, $info->{element_length};
+ print $str, "\n";
+
+}, $args);
+
+# parse
+$myhtml->parse($tree, $encoding, $body);
+
+print "Total count: ", $args->{count}, "\n";
diff --git a/typemap b/typemap
index c184fb5..4d231c4 100755
--- a/typemap
+++ b/typemap
@@ -11,6 +11,7 @@ myhtml_string_t * T_PTROBJ
myhtml_tree_attr_t * T_PTROBJ
myhtml_tag_index_entry_t * T_PTROBJ
myhtml_tag_index_node_t * T_PTROBJ
+myhtml_token_node_t * T_PTROBJ
myhtml_status_t T_IV
myhtml_encoding_t T_IV
@@ -33,3 +34,4 @@ HTML::MyHTML::Tag::Index::Node T_PTROBJ
HTML::MyHTML::Collection T_PTROBJ
HTML::MyHTML::String T_PTROBJ
+HTML::MyHTML::Token::Node T_PTROBJ
diff --git a/xs/token_node.xs b/xs/token_node.xs
new file mode 100755
index 0000000..7431a79
--- /dev/null
+++ b/xs/token_node.xs
@@ -0,0 +1,118 @@
+#/*
+# Copyright 2015-2016 Alexander Borisov
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: lex.borisov@gmail.com (Alexander Borisov)
+#*/
+
+MODULE = HTML::MyHTML::Token::Node PACKAGE = HTML::MyHTML::Token::Node
+PROTOTYPES: DISABLE
+
+#=sort 1
+
+SV*
+info(token_node, tree)
+ HTML::MyHTML::Token::Node token_node;
+ HTML::MyHTML::Tree tree;
+
+ CODE:
+ RETVAL = newRV_noinc((SV *)sm_get_token_node_info(tree, token_node));
+ OUTPUT:
+ RETVAL
+
+myhtml_tag_id_t
+tag_id(token_node)
+ HTML::MyHTML::Token::Node token_node;
+
+ CODE:
+ RETVAL = myhtml_token_node_tag_id(token_node);
+ OUTPUT:
+ RETVAL
+
+SV*
+tag_name(token_node, tree)
+ HTML::MyHTML::Token::Node token_node;
+ HTML::MyHTML::Tree tree;
+
+ CODE:
+ size_t length;
+ const char* name = myhtml_tag_name_by_id(tree, myhtml_token_node_tag_id(token_node), &length);
+ RETVAL = newSVpv(name, length);
+ OUTPUT:
+ RETVAL
+
+#=sort 14
+
+bool
+is_close_self(token_node)
+ HTML::MyHTML::Token::Node token_node;
+
+ CODE:
+ RETVAL = myhtml_token_node_is_close_self(token_node);
+ OUTPUT:
+ RETVAL
+
+#=sort 15
+
+HTML::MyHTML::Tree::Attr
+attr_first(token_node)
+ HTML::MyHTML::Token::Node token_node;
+
+ CODE:
+ RETVAL = myhtml_token_node_attribute_first(token_node);
+ OUTPUT:
+ RETVAL
+
+#=sort 16
+
+HTML::MyHTML::Tree::Attr
+attr_last(token_node)
+ HTML::MyHTML::Token::Node token_node;
+
+ CODE:
+ RETVAL = myhtml_token_node_attribute_last(token_node);
+ OUTPUT:
+ RETVAL
+
+SV*
+text(token_node)
+ HTML::MyHTML::Token::Node token_node;
+
+ CODE:
+ size_t length;
+ const char* text = myhtml_token_node_text(token_node, &length);
+ RETVAL = newSVpv(text, length);
+ OUTPUT:
+ RETVAL
+
+#=sort 21
+
+HTML::MyHTML::String
+string(token_node)
+ HTML::MyHTML::Token::Node token_node;
+
+ CODE:
+ RETVAL = myhtml_token_node_string(token_node);
+ OUTPUT:
+ RETVAL
+
+#=sort 22
+
+void
+wait_for_done(token_node)
+ HTML::MyHTML::Token::Node token_node;
+
+ CODE:
+ myhtml_token_node_wait_for_done(token_node);
+
diff --git a/xs/tree_node.xs b/xs/tree_node.xs
index ee4fe28..6c564c9 100755
--- a/xs/tree_node.xs
+++ b/xs/tree_node.xs
@@ -103,6 +103,20 @@ last_child(node)
#=sort 6
+HTML::MyHTML::Token::Node
+token(node)
+ HTML::MyHTML::Tree::Node node;
+
+ CODE:
+ RETVAL = myhtml_node_token(node);
+ OUTPUT:
+ RETVAL
+ POSTCALL:
+ if(RETVAL == NULL)
+ XSRETURN_UNDEF;
+
+#=sort 6
+
SV*
get_nodes_by_attribute_key(node, tree, key, out_status = &PL_sv_undef)
HTML::MyHTML::Tree::Node node;