Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/lexborisov/perl-html-myhtml.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlexborisov <lex.borisov@gmail.com>2016-07-20 09:28:10 +0300
committerlexborisov <lex.borisov@gmail.com>2016-07-20 09:28:10 +0300
commit1c95dc8fd5f9f39e2c74a44233643df179831d0e (patch)
tree578f5a0a250363ccdb97d120f6ff82f4ba90f27f
parentf9f19e19a58dc24409e09ccfc97bf3830c237ac9 (diff)
Synchronization with the latest versions of MyHTML
-rw-r--r--Changes5
-rwxr-xr-xMyHTML.pm114
-rwxr-xr-xMyHTML.xs45
-rw-r--r--README.md116
-rw-r--r--source/myhtml/api.h2
-rwxr-xr-xtypemap4
-rwxr-xr-xxs/incoming_buffer.xs117
-rwxr-xr-xxs/tree.xs9
8 files changed, 409 insertions, 3 deletions
diff --git a/Changes b/Changes
index d4acbd1..9020d30 100644
--- a/Changes
+++ b/Changes
@@ -1,10 +1,13 @@
-1.02
+1.02 Wed Jul 20 2016 09:25:45 GMT+0300
Update MyHTML source. up to 1.0.2 - Release version
Added method for set tree parse flags myhtml_tree_parse_flags_set
Added methods for Node get_nodes_by_attribute_key, get_nodes_by_attribute_value_whitespace_separated, get_nodes_by_attribute_value_begin, get_nodes_by_attribute_value_end, get_nodes_by_attribute_value_contain, get_nodes_by_attribute_value_hyphen_separated, get_nodes_by_tag_id
Added callback methods for Tree: callback_before_token_done_set, callback_after_token_done_set, callback_node_insert_set, callback_node_remove_set
Added example for callback: see examples/callback.pl
Added Token Node methods
+ Added method for set Tree parse flags: parse_flags_set; MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE, MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN, MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN, MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE
+ Added namespace functions: namespace_name_by_id, namespace_id_by_name
+ Added Incoming Buffer methods
0.35 Mon Apr 04 2016 22:21:03 GMT+0300
Fixes for build in NetBSD
diff --git a/MyHTML.pm b/MyHTML.pm
index 986ff90..018693b 100755
--- a/MyHTML.pm
+++ b/MyHTML.pm
@@ -103,6 +103,8 @@ BEGIN {
MyHTML_STATUS_MCOBJECT_ERROR_CACHE_REALLOC
MyHTML_OPTIONS_DEFAULT MyHTML_OPTIONS_PARSE_MODE_SINGLE MyHTML_OPTIONS_PARSE_MODE_ALL_IN_ONE MyHTML_OPTIONS_PARSE_MODE_SEPARATELY
+
+ namespace_name_by_id namespace_id_by_name
);
};
@@ -502,6 +504,15 @@ Important!!! Only for Perl! Do not use this callback in Thread mode parsing; Bui
$tree->callback_node_remove_set($sub_callback [, $ctx]);
+=head3 incoming_buffer_first
+
+Get first Incoming Buffer
+
+ my $incoming_buffer = $tree->incoming_buffer_first();
+
+Return: HTML::Incoming::Buffer if exists, otherwise an UNDEF value
+
+
=head2 Attributes
=head3 info
@@ -1009,6 +1020,109 @@ Now available for detect UTF-8, UTF-16LE, UTF-16BE
Return: 1 (true) if encoding found, otherwise 0 (false)
+=head2 Incoming Buffer
+
+=head3 find_by_position
+
+Get Incoming Buffer by position
+
+ my $incoming_buffer = $incoming_buffer->find_by_position($begin_position);
+
+Return: HTML::Incoming::Buffer if successful, otherwise a UNDEF value
+
+
+=head3 data
+
+Get data of Incoming Buffer
+
+ my $data = $incoming_buffer->data();
+
+Return: text scalar if successful, otherwise a UNDEF value
+
+
+=head3 length
+
+Get data length of Incoming Buffer
+
+ my $length = $incoming_buffer->length();
+
+Return: scalar length
+
+
+=head3 size
+
+Get data size of Incoming Buffer
+
+ my $size = $incoming_buffer->size();
+
+Return: scalar size
+
+
+=head3 offset
+
+Get data offset of Incoming Buffer. Global position of begin Incoming Buffer.
+
+ my $offset = $incoming_buffer->offset();
+
+Return: scalar offset
+
+
+=head3 relative_begin
+
+Get Relative Position for Incoming Buffer. Incoming Buffer should be prepared by find_by_position.
+
+ my $relative_begin = $incoming_buffer->relative_begin();
+
+Return: scalar relative begin
+
+
+=head3 available_length
+
+This function returns number of available data by Incoming Buffer. Incoming buffer may be incomplete. See next.
+
+ my $available_length = $incoming_buffer->available_length();
+
+Return: scalar available length
+
+
+=head3 next
+
+Get next buffer
+
+ my $next_incoming_buffer = $incoming_buffer->next();
+
+Return: HTML::Incoming::Buffer if exists, otherwise a UNDEF value
+
+
+=head3 prev
+
+Get prev buffer
+
+ my $prev_incoming_buffer = $incoming_buffer->prev();
+
+Return: HTML::Incoming::Buffer if exists, otherwise a UNDEF value
+
+
+=head2 Namespace
+
+=head3 namespace_name_by_id
+
+Get namespace text by namespace type (id)
+
+ my $namespace_name = namespace_name_by_id($namespace_id);
+
+Return: text if successful, otherwise a UNDEF value
+
+
+=head3 namespace_id_by_name
+
+Get namespace type (id) by namespace text
+
+ my $namespace_id = namespace_id_by_name($namespace_name);
+
+Return: namespace id
+
+
=head1 Constants
=head2 Tags
diff --git a/MyHTML.xs b/MyHTML.xs
index 7ee90a2..42cbd70 100755
--- a/MyHTML.xs
+++ b/MyHTML.xs
@@ -62,6 +62,7 @@ typedef myhtml_tag_index_node_t * HTML__MyHTML__Tag__Index__Node;
typedef myhtml_collection_t * HTML__MyHTML__Collection;
typedef myhtml_string_t * HTML__MyHTML__String;
typedef myhtml_token_node_t * HTML__MyHTML__Token__Node;
+typedef myhtml_incoming_buffer_t * HTML__Incoming__Buffer;
struct myhtml_perl_callback_ctx {
SV* callback;
@@ -438,6 +439,7 @@ INCLUDE: xs/tree.xs
INCLUDE: xs/tree_node.xs
INCLUDE: xs/tree_attr.xs
INCLUDE: xs/token_node.xs
+INCLUDE: xs/incoming_buffer.xs
####
#
@@ -1740,6 +1742,49 @@ PROTOTYPES: DISABLE
#************************************************************************************
#
+# MyHTML_NAMESPACE
+#
+#************************************************************************************
+
+SV*
+namespace_name_by_id(ns)
+ SV* ns;
+
+ CODE:
+ size_t length = 0;
+ const char *ns_name = myhtml_namespace_name_by_id(SvIV(ns), &length);
+
+ if(ns_name == NULL || length == 0) {
+ RETVAL = newSVpv("", 0);
+ }
+ else {
+ RETVAL = newSVpv(ns_name, length);
+ }
+ OUTPUT:
+ RETVAL
+
+SV*
+namespace_id_by_name(name)
+ SV* name;
+
+ PREINIT:
+ STRLEN len;
+ CODE:
+ const char *char_name = NULL;
+ myhtml_namespace_t ns = MyHTML_NAMESPACE_UNDEF;
+
+ if(SvOK(name)) {
+ char_name = SvPV(name, len);
+ myhtml_namespace_id_by_name(char_name, len, &ns);
+ }
+
+ RETVAL = newSViv(ns);
+ OUTPUT:
+ RETVAL
+
+
+#************************************************************************************
+#
# MyHTML_PARSE_FLAGS constants
#
#************************************************************************************
diff --git a/README.md b/README.md
index 645e746..b7e9109 100644
--- a/README.md
+++ b/README.md
@@ -1046,6 +1046,122 @@ Now available for detect UTF-8, UTF-16LE, UTF-16BE
Return: 1 (true) if encoding found, otherwise 0 (false)
+## Incoming Buffer
+
+### find_by_position
+
+Get Incoming Buffer by position
+
+```perl
+ my $incoming_buffer = $incoming_buffer->find_by_position($begin_position);
+```
+
+Return: HTML::Incoming::Buffer if successful, otherwise a UNDEF value
+
+### data
+
+Get data of Incoming Buffer
+
+```perl
+ my $data = $incoming_buffer->data();
+```
+
+Return: text scalar if successful, otherwise a UNDEF value
+
+### length
+
+Get data length of Incoming Buffer
+
+```perl
+ my $length = $incoming_buffer->length();
+```
+
+Return: scalar length
+
+### size
+
+Get data size of Incoming Buffer
+
+```perl
+ my $size = $incoming_buffer->size();
+```
+
+Return: scalar size
+
+### offset
+
+Get data offset of Incoming Buffer. Global position of begin Incoming Buffer.
+
+```perl
+ my $offset = $incoming_buffer->offset();
+```
+
+Return: scalar offset
+
+### relative_begin
+
+Get Relative Position for Incoming Buffer. Incoming Buffer should be prepared by find_by_position.
+
+```perl
+ my $relative_begin = $incoming_buffer->relative_begin();
+```
+
+Return: scalar relative begin
+
+### available_length
+
+This function returns number of available data by Incoming Buffer. Incoming buffer may be incomplete. See next.
+
+```perl
+ my $available_length = $incoming_buffer->available_length();
+```
+
+Return: scalar available length
+
+### next
+
+Get next buffer
+
+```perl
+ my $next_incoming_buffer = $incoming_buffer->next();
+```
+
+Return: HTML::Incoming::Buffer if exists, otherwise a UNDEF value
+
+### prev
+
+Get prev buffer
+
+```perl
+ my $prev_incoming_buffer = $incoming_buffer->prev();
+```
+
+Return: HTML::Incoming::Buffer if exists, otherwise a UNDEF value
+
+
+## Namespace
+
+### namespace_name_by_id
+
+Get namespace text by namespace type (id)
+
+```perl
+ my $namespace_name = namespace_name_by_id($namespace_id);
+```
+
+Return: text if successful, otherwise a UNDEF value
+
+### namespace_id_by_name
+
+Get namespace type (id) by namespace text
+
+```perl
+ my $namespace_id = namespace_id_by_name($namespace_name);
+```
+
+Return: namespace id
+
+
# Constants
## Tags
diff --git a/source/myhtml/api.h b/source/myhtml/api.h
index 41e3b31..d94b32b 100644
--- a/source/myhtml/api.h
+++ b/source/myhtml/api.h
@@ -2526,7 +2526,7 @@ void
myhtml_callback_before_token_done_set(myhtml_tree_t* tree, myhtml_callback_token_f func, void* ctx);
/**
- * Set callback for tokens before processing
+ * Set callback for tokens after processing
*
* Warning!
* If you using thread mode parsing then this callback calls from thread (not Main thread)
diff --git a/typemap b/typemap
index 4d231c4..bd3de59 100755
--- a/typemap
+++ b/typemap
@@ -12,6 +12,7 @@ myhtml_tree_attr_t * T_PTROBJ
myhtml_tag_index_entry_t * T_PTROBJ
myhtml_tag_index_node_t * T_PTROBJ
myhtml_token_node_t * T_PTROBJ
+myhtml_incoming_buffer_t * T_PTROBJ
myhtml_status_t T_IV
myhtml_encoding_t T_IV
@@ -26,6 +27,7 @@ HTML::MyHTML T_PTROBJ
HTML::MyHTML::Tree T_PTROBJ
HTML::MyHTML::Tree::Node T_PTROBJ
HTML::MyHTML::Tree::Attr T_PTROBJ
+HTML::MyHTML::Token::Node T_PTROBJ
HTML::MyHTML::Tag T_PTROBJ
HTML::MyHTML::Tag::Index T_PTROBJ
@@ -34,4 +36,4 @@ HTML::MyHTML::Tag::Index::Node T_PTROBJ
HTML::MyHTML::Collection T_PTROBJ
HTML::MyHTML::String T_PTROBJ
-HTML::MyHTML::Token::Node T_PTROBJ
+HTML::Incoming::Buffer T_PTROBJ
diff --git a/xs/incoming_buffer.xs b/xs/incoming_buffer.xs
new file mode 100755
index 0000000..0a2422d
--- /dev/null
+++ b/xs/incoming_buffer.xs
@@ -0,0 +1,117 @@
+#/*
+# Copyright 2015-2016 Alexander Borisov
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Author: lex.borisov@gmail.com (Alexander Borisov)
+#*/
+
+MODULE = HTML::Incoming::Buffer PACKAGE = HTML::Incoming::Buffer
+PROTOTYPES: DISABLE
+
+
+HTML::Incoming::Buffer
+find_by_position(inc_buffer, begin)
+ HTML::Incoming::Buffer inc_buffer;
+ SV* begin;
+
+ CODE:
+ RETVAL = myhtml_incoming_buffer_find_by_position(inc_buffer, SvIV(begin));
+ OUTPUT:
+ RETVAL
+ POSTCALL:
+ if(RETVAL == NULL)
+ XSRETURN_UNDEF;
+
+SV*
+data(inc_buffer)
+ HTML::Incoming::Buffer inc_buffer;
+
+ CODE:
+ const char *data = myhtml_incoming_buffer_data(inc_buffer);
+ RETVAL = newSVpv(data, myhtml_incoming_buffer_size(inc_buffer));
+ OUTPUT:
+ RETVAL
+
+SV*
+length(inc_buffer)
+ HTML::Incoming::Buffer inc_buffer;
+
+ CODE:
+ RETVAL = newSViv(myhtml_incoming_buffer_length(inc_buffer));
+ OUTPUT:
+ RETVAL
+
+SV*
+size(inc_buffer)
+ HTML::Incoming::Buffer inc_buffer;
+
+ CODE:
+ RETVAL = newSViv(myhtml_incoming_buffer_size(inc_buffer));
+ OUTPUT:
+ RETVAL
+
+SV*
+offset(inc_buffer)
+ HTML::Incoming::Buffer inc_buffer;
+
+ CODE:
+ RETVAL = newSViv(myhtml_incoming_buffer_offset(inc_buffer));
+ OUTPUT:
+ RETVAL
+
+SV*
+relative_begin(inc_buffer, begin)
+ HTML::Incoming::Buffer inc_buffer;
+ SV* begin;
+
+ CODE:
+ RETVAL = newSViv(myhtml_incoming_buffer_relative_begin(inc_buffer, SvIV(begin)));
+ OUTPUT:
+ RETVAL
+
+SV*
+available_length(inc_buffer, relative_begin, length)
+ HTML::Incoming::Buffer inc_buffer;
+ SV* relative_begin;
+ SV* length;
+
+ CODE:
+ RETVAL = newSViv(myhtml_incoming_buffer_available_length(inc_buffer, SvIV(relative_begin), SvIV(length)));
+ OUTPUT:
+ RETVAL
+
+HTML::Incoming::Buffer
+next(inc_buffer)
+ HTML::Incoming::Buffer inc_buffer;
+
+ CODE:
+ RETVAL = myhtml_incoming_buffer_next(inc_buffer);
+ OUTPUT:
+ RETVAL
+ POSTCALL:
+ if(RETVAL == NULL)
+ XSRETURN_UNDEF;
+
+HTML::Incoming::Buffer
+prev(inc_buffer)
+ HTML::Incoming::Buffer inc_buffer;
+
+ CODE:
+ RETVAL = myhtml_incoming_buffer_prev(inc_buffer);
+ OUTPUT:
+ RETVAL
+ POSTCALL:
+ if(RETVAL == NULL)
+ XSRETURN_UNDEF;
+
diff --git a/xs/tree.xs b/xs/tree.xs
index 784950b..8ae615b 100755
--- a/xs/tree.xs
+++ b/xs/tree.xs
@@ -195,4 +195,13 @@ get_elements_by_tag_name(tree, tag_name)
OUTPUT:
RETVAL
+HTML::Incoming::Buffer
+incoming_buffer_first(tree)
+ HTML::MyHTML::Tree tree;
+
+ CODE:
+ RETVAL = myhtml_tree_incoming_buffer_first(tree);
+ OUTPUT:
+ RETVAL
+