Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/lexborisov/perl-html-myhtml.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlexborisov <lex.borisov@gmail.com>2016-07-19 20:53:21 +0300
committerlexborisov <lex.borisov@gmail.com>2016-07-19 20:53:21 +0300
commitf9f19e19a58dc24409e09ccfc97bf3830c237ac9 (patch)
tree31e6f02a7cba04235cb578ff52e8cb732ddd3544 /examples
parent1b5ac2c1d90ba926b8cc87a3588f8f245379dd94 (diff)
Synchronization with the latest versions of MyHTML
Diffstat (limited to 'examples')
-rw-r--r--examples/callback.pl45
1 files changed, 45 insertions, 0 deletions
diff --git a/examples/callback.pl b/examples/callback.pl
new file mode 100644
index 0000000..417716d
--- /dev/null
+++ b/examples/callback.pl
@@ -0,0 +1,45 @@
+#!/usr/bin/perl -w
+
+use utf8;
+use strict;
+use Encode;
+
+use HTML::MyHTML;
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new;
+my $req = HTTP::Request->new(GET => "http://edition.cnn.com/2016/03/22/weather/great-barrier-reef-coral-bleaching/index.html");
+my $res = $ua->request($req);
+
+my $body = $res->content;
+
+# init
+# is normally if parse thread only one, otherwise use single mode MyHTML_OPTIONS_PARSE_MODE_SINGLE
+# or methods parse_single, parse_fragment_single, parse_chunk_single, parse_chunk_fragment_single
+# for development use single mode, it will be easier to debug
+my $myhtml = HTML::MyHTML->new(MyHTML_OPTIONS_DEFAULT, 1);
+my $tree = $myhtml->new_tree();
+
+# detect encoding
+my $encoding;
+$myhtml->encoding_detect($body, $encoding);
+
+my $args = {count => 0};
+
+$tree->callback_before_token_done_set(sub {
+ my ($tree, $token_node, $ctx) = @_;
+ use bytes;
+
+ $ctx->{count}++;
+
+ my $info = $token_node->info($tree);
+
+ my $str = substr $body, $info->{element_begin}, $info->{element_length};
+ print $str, "\n";
+
+}, $args);
+
+# parse
+$myhtml->parse($tree, $encoding, $body);
+
+print "Total count: ", $args->{count}, "\n";