From 1c95dc8fd5f9f39e2c74a44233643df179831d0e Mon Sep 17 00:00:00 2001 From: lexborisov Date: Wed, 20 Jul 2016 10:28:10 +0400 Subject: Synchronization with the latest versions of MyHTML --- Changes | 5 ++- MyHTML.pm | 114 ++++++++++++++++++++++++++++++++++++++++++++++++ MyHTML.xs | 45 +++++++++++++++++++ README.md | 116 +++++++++++++++++++++++++++++++++++++++++++++++++ source/myhtml/api.h | 2 +- typemap | 4 +- xs/incoming_buffer.xs | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++ xs/tree.xs | 9 ++++ 8 files changed, 409 insertions(+), 3 deletions(-) create mode 100755 xs/incoming_buffer.xs diff --git a/Changes b/Changes index d4acbd1..9020d30 100644 --- a/Changes +++ b/Changes @@ -1,10 +1,13 @@ -1.02 +1.02 Wed Jul 20 2016 09:25:45 GMT+0300 Update MyHTML source. up to 1.0.2 - Release version Added method for set tree parse flags myhtml_tree_parse_flags_set Added methods for Node get_nodes_by_attribute_key, get_nodes_by_attribute_value_whitespace_separated, get_nodes_by_attribute_value_begin, get_nodes_by_attribute_value_end, get_nodes_by_attribute_value_contain, get_nodes_by_attribute_value_hyphen_separated, get_nodes_by_tag_id Added callback methods for Tree: callback_before_token_done_set, callback_after_token_done_set, callback_node_insert_set, callback_node_remove_set Added example for callback: see examples/callback.pl Added Token Node methods + Added method for set Tree parse flags: parse_flags_set; MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE, MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN, MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN, MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE + Added namespace functions: namespace_name_by_id, namespace_id_by_name + Added Incoming Buffer methods 0.35 Mon Apr 04 2016 22:21:03 GMT+0300 Fixes for build in NetBSD diff --git a/MyHTML.pm b/MyHTML.pm index 986ff90..018693b 100755 --- a/MyHTML.pm +++ b/MyHTML.pm @@ -103,6 +103,8 @@ BEGIN { MyHTML_STATUS_MCOBJECT_ERROR_CACHE_REALLOC MyHTML_OPTIONS_DEFAULT MyHTML_OPTIONS_PARSE_MODE_SINGLE MyHTML_OPTIONS_PARSE_MODE_ALL_IN_ONE MyHTML_OPTIONS_PARSE_MODE_SEPARATELY + + namespace_name_by_id namespace_id_by_name ); }; @@ -502,6 +504,15 @@ Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Bui $tree->callback_node_remove_set($sub_callback [, $ctx]); +=head3 incoming_buffer_first + +Get first Incoming Buffer + + my $incoming_buffer = $tree->incoming_buffer_first(); + +Return: HTML::Incoming::Buffer if exists, otherwise an UNDEF value + + =head2 Attributes =head3 info @@ -1009,6 +1020,109 @@ Now available for detect UTF-8, UTF-16LE, UTF-16BE Return: 1 (true) if encoding found, otherwise 0 (false) +=head2 Incoming Buffer + +=head3 find_by_position + +Get Incoming Buffer by position + + my $incoming_buffer = $incoming_buffer->find_by_position($begin_position); + +Return: HTML::Incoming::Buffer if successful, otherwise a UNDEF value + + +=head3 data + +Get data of Incoming Buffer + + my $data = $incoming_buffer->data(); + +Return: text scalar if successful, otherwise a UNDEF value + + +=head3 length + +Get data length of Incoming Buffer + + my $length = $incoming_buffer->length(); + +Return: scalar length + + +=head3 size + +Get data size of Incoming Buffer + + my $size = $incoming_buffer->size(); + +Return: scalar size + + +=head3 offset + +Get data offset of Incoming Buffer. Global position of begin Incoming Buffer. + + my $offset = $incoming_buffer->offset(); + +Return: scalar offset + + +=head3 relative_begin + +Get Relative Position for Incoming Buffer. Incoming Buffer should be prepared by find_by_position. + + my $relative_begin = $incoming_buffer->relative_begin(); + +Return: scalar relative begin + + +=head3 available_length + +This function returns number of available data by Incoming Buffer. Incoming buffer may be incomplete. See next. + + my $available_length = $incoming_buffer->available_length(); + +Return: scalar available length + + +=head3 next + +Get next buffer + + my $next_incoming_buffer = $incoming_buffer->next(); + +Return: HTML::Incoming::Buffer if exists, otherwise a UNDEF value + + +=head3 prev + +Get prev buffer + + my $prev_incoming_buffer = $incoming_buffer->prev(); + +Return: HTML::Incoming::Buffer if exists, otherwise a UNDEF value + + +=head2 Namespace + +=head3 namespace_name_by_id + +Get namespace text by namespace type (id) + + my $namespace_name = namespace_name_by_id($namespace_id); + +Return: text if successful, otherwise a UNDEF value + + +=head3 namespace_id_by_name + +Get namespace type (id) by namespace text + + my $namespace_id = namespace_id_by_name($namespace_name); + +Return: namespace id + + =head1 Constants =head2 Tags diff --git a/MyHTML.xs b/MyHTML.xs index 7ee90a2..42cbd70 100755 --- a/MyHTML.xs +++ b/MyHTML.xs @@ -62,6 +62,7 @@ typedef myhtml_tag_index_node_t * HTML__MyHTML__Tag__Index__Node; typedef myhtml_collection_t * HTML__MyHTML__Collection; typedef myhtml_string_t * HTML__MyHTML__String; typedef myhtml_token_node_t * HTML__MyHTML__Token__Node; +typedef myhtml_incoming_buffer_t * HTML__Incoming__Buffer; struct myhtml_perl_callback_ctx { SV* callback; @@ -438,6 +439,7 @@ INCLUDE: xs/tree.xs INCLUDE: xs/tree_node.xs INCLUDE: xs/tree_attr.xs INCLUDE: xs/token_node.xs +INCLUDE: xs/incoming_buffer.xs #### # @@ -1738,6 +1740,49 @@ string_length(str) MODULE = HTML::MyHTML PACKAGE = HTML::MyHTML PROTOTYPES: DISABLE +#************************************************************************************ +# +# MyHTML_NAMESPACE +# +#************************************************************************************ + +SV* +namespace_name_by_id(ns) + SV* ns; + + CODE: + size_t length = 0; + const char *ns_name = myhtml_namespace_name_by_id(SvIV(ns), &length); + + if(ns_name == NULL || length == 0) { + RETVAL = newSVpv("", 0); + } + else { + RETVAL = newSVpv(ns_name, length); + } + OUTPUT: + RETVAL + +SV* +namespace_id_by_name(name) + SV* name; + + PREINIT: + STRLEN len; + CODE: + const char *char_name = NULL; + myhtml_namespace_t ns = MyHTML_NAMESPACE_UNDEF; + + if(SvOK(name)) { + char_name = SvPV(name, len); + myhtml_namespace_id_by_name(char_name, len, &ns); + } + + RETVAL = newSViv(ns); + OUTPUT: + RETVAL + + #************************************************************************************ # # MyHTML_PARSE_FLAGS constants diff --git a/README.md b/README.md index 645e746..b7e9109 100644 --- a/README.md +++ b/README.md @@ -1046,6 +1046,122 @@ Now available for detect UTF-8, UTF-16LE, UTF-16BE Return: 1 (true) if encoding found, otherwise 0 (false) +## Incoming Buffer + +### find_by_position + +Get Incoming Buffer by position + +```perl + my $incoming_buffer = $incoming_buffer->find_by_position($begin_position); +``` + +Return: HTML::Incoming::Buffer if successful, otherwise a UNDEF value + +### data + +Get data of Incoming Buffer + +```perl + my $data = $incoming_buffer->data(); +``` + +Return: text scalar if successful, otherwise a UNDEF value + +### length + +Get data length of Incoming Buffer + +```perl + my $length = $incoming_buffer->length(); +``` + +Return: scalar length + +### size + +Get data size of Incoming Buffer + +```perl + my $size = $incoming_buffer->size(); +``` + +Return: scalar size + +### offset + +Get data offset of Incoming Buffer. Global position of begin Incoming Buffer. + +```perl + my $offset = $incoming_buffer->offset(); +``` + +Return: scalar offset + +### relative_begin + +Get Relative Position for Incoming Buffer. Incoming Buffer should be prepared by find_by_position. + +```perl + my $relative_begin = $incoming_buffer->relative_begin(); +``` + +Return: scalar relative begin + +### available_length + +This function returns number of available data by Incoming Buffer. Incoming buffer may be incomplete. See next. + +```perl + my $available_length = $incoming_buffer->available_length(); +``` + +Return: scalar available length + +### next + +Get next buffer + +```perl + my $next_incoming_buffer = $incoming_buffer->next(); +``` + +Return: HTML::Incoming::Buffer if exists, otherwise a UNDEF value + +### prev + +Get prev buffer + +```perl + my $prev_incoming_buffer = $incoming_buffer->prev(); +``` + +Return: HTML::Incoming::Buffer if exists, otherwise a UNDEF value + + +## Namespace + +### namespace_name_by_id + +Get namespace text by namespace type (id) + +```perl + my $namespace_name = namespace_name_by_id($namespace_id); +``` + +Return: text if successful, otherwise a UNDEF value + +### namespace_id_by_name + +Get namespace type (id) by namespace text + +```perl + my $namespace_id = namespace_id_by_name($namespace_name); +``` + +Return: namespace id + + # Constants ## Tags diff --git a/source/myhtml/api.h b/source/myhtml/api.h index 41e3b31..d94b32b 100644 --- a/source/myhtml/api.h +++ b/source/myhtml/api.h @@ -2526,7 +2526,7 @@ void myhtml_callback_before_token_done_set(myhtml_tree_t* tree, myhtml_callback_token_f func, void* ctx); /** - * Set callback for tokens before processing + * Set callback for tokens after processing * * Warning! * If you using thread mode parsing then this callback calls from thread (not Main thread) diff --git a/typemap b/typemap index 4d231c4..bd3de59 100755 --- a/typemap +++ b/typemap @@ -12,6 +12,7 @@ myhtml_tree_attr_t * T_PTROBJ myhtml_tag_index_entry_t * T_PTROBJ myhtml_tag_index_node_t * T_PTROBJ myhtml_token_node_t * T_PTROBJ +myhtml_incoming_buffer_t * T_PTROBJ myhtml_status_t T_IV myhtml_encoding_t T_IV @@ -26,6 +27,7 @@ HTML::MyHTML T_PTROBJ HTML::MyHTML::Tree T_PTROBJ HTML::MyHTML::Tree::Node T_PTROBJ HTML::MyHTML::Tree::Attr T_PTROBJ +HTML::MyHTML::Token::Node T_PTROBJ HTML::MyHTML::Tag T_PTROBJ HTML::MyHTML::Tag::Index T_PTROBJ @@ -34,4 +36,4 @@ HTML::MyHTML::Tag::Index::Node T_PTROBJ HTML::MyHTML::Collection T_PTROBJ HTML::MyHTML::String T_PTROBJ -HTML::MyHTML::Token::Node T_PTROBJ +HTML::Incoming::Buffer T_PTROBJ diff --git a/xs/incoming_buffer.xs b/xs/incoming_buffer.xs new file mode 100755 index 0000000..0a2422d --- /dev/null +++ b/xs/incoming_buffer.xs @@ -0,0 +1,117 @@ +#/* +# Copyright 2015-2016 Alexander Borisov +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: lex.borisov@gmail.com (Alexander Borisov) +#*/ + +MODULE = HTML::Incoming::Buffer PACKAGE = HTML::Incoming::Buffer +PROTOTYPES: DISABLE + + +HTML::Incoming::Buffer +find_by_position(inc_buffer, begin) + HTML::Incoming::Buffer inc_buffer; + SV* begin; + + CODE: + RETVAL = myhtml_incoming_buffer_find_by_position(inc_buffer, SvIV(begin)); + OUTPUT: + RETVAL + POSTCALL: + if(RETVAL == NULL) + XSRETURN_UNDEF; + +SV* +data(inc_buffer) + HTML::Incoming::Buffer inc_buffer; + + CODE: + const char *data = myhtml_incoming_buffer_data(inc_buffer); + RETVAL = newSVpv(data, myhtml_incoming_buffer_size(inc_buffer)); + OUTPUT: + RETVAL + +SV* +length(inc_buffer) + HTML::Incoming::Buffer inc_buffer; + + CODE: + RETVAL = newSViv(myhtml_incoming_buffer_length(inc_buffer)); + OUTPUT: + RETVAL + +SV* +size(inc_buffer) + HTML::Incoming::Buffer inc_buffer; + + CODE: + RETVAL = newSViv(myhtml_incoming_buffer_size(inc_buffer)); + OUTPUT: + RETVAL + +SV* +offset(inc_buffer) + HTML::Incoming::Buffer inc_buffer; + + CODE: + RETVAL = newSViv(myhtml_incoming_buffer_offset(inc_buffer)); + OUTPUT: + RETVAL + +SV* +relative_begin(inc_buffer, begin) + HTML::Incoming::Buffer inc_buffer; + SV* begin; + + CODE: + RETVAL = newSViv(myhtml_incoming_buffer_relative_begin(inc_buffer, SvIV(begin))); + OUTPUT: + RETVAL + +SV* +available_length(inc_buffer, relative_begin, length) + HTML::Incoming::Buffer inc_buffer; + SV* relative_begin; + SV* length; + + CODE: + RETVAL = newSViv(myhtml_incoming_buffer_available_length(inc_buffer, SvIV(relative_begin), SvIV(length))); + OUTPUT: + RETVAL + +HTML::Incoming::Buffer +next(inc_buffer) + HTML::Incoming::Buffer inc_buffer; + + CODE: + RETVAL = myhtml_incoming_buffer_next(inc_buffer); + OUTPUT: + RETVAL + POSTCALL: + if(RETVAL == NULL) + XSRETURN_UNDEF; + +HTML::Incoming::Buffer +prev(inc_buffer) + HTML::Incoming::Buffer inc_buffer; + + CODE: + RETVAL = myhtml_incoming_buffer_prev(inc_buffer); + OUTPUT: + RETVAL + POSTCALL: + if(RETVAL == NULL) + XSRETURN_UNDEF; + diff --git a/xs/tree.xs b/xs/tree.xs index 784950b..8ae615b 100755 --- a/xs/tree.xs +++ b/xs/tree.xs @@ -195,4 +195,13 @@ get_elements_by_tag_name(tree, tag_name) OUTPUT: RETVAL +HTML::Incoming::Buffer +incoming_buffer_first(tree) + HTML::MyHTML::Tree tree; + + CODE: + RETVAL = myhtml_tree_incoming_buffer_first(tree); + OUTPUT: + RETVAL + -- cgit v1.2.3