From 7024458c55c0baedfb29c2f9db213d3b0474c851 Mon Sep 17 00:00:00 2001 From: lexborisov Date: Tue, 21 Mar 2017 09:56:42 +0300 Subject: Merge branch 'master' into coverity_scan --- source/myhtml/Makefile.mk | 21 +- source/myhtml/api.h | 578 +- source/myhtml/api_encoding.h | 71 - source/myhtml/callback.c | 2 +- source/myhtml/callback.h | 2 +- source/myhtml/charef.c | 2 +- source/myhtml/charef.h | 2 +- source/myhtml/charef_resource.h | 2 +- source/myhtml/data_process.c | 54 +- source/myhtml/data_process.h | 24 +- source/myhtml/def.h | 2 +- source/myhtml/encoding.c | 1222 ---- source/myhtml/encoding.h | 163 - source/myhtml/encoding_detect.c | 948 --- source/myhtml/encoding_detect_resource.h | 5626 ------------------ source/myhtml/encoding_resource.h | 9330 ------------------------------ source/myhtml/incoming.c | 232 - source/myhtml/incoming.h | 68 - source/myhtml/myhtml.c | 409 +- source/myhtml/myhtml.h | 97 +- source/myhtml/mynamespace.c | 10 +- source/myhtml/mynamespace.h | 4 +- source/myhtml/mynamespace_resource.h | 2 +- source/myhtml/myosi.c | 50 - source/myhtml/myosi.h | 260 +- source/myhtml/mystring.c | 449 +- source/myhtml/mystring.h | 128 +- source/myhtml/parser.c | 66 +- source/myhtml/parser.h | 8 +- source/myhtml/perf.c | 288 - source/myhtml/rules.c | 69 +- source/myhtml/rules.h | 6 +- source/myhtml/serialization.c | 326 +- source/myhtml/serialization.h | 14 +- source/myhtml/stream.c | 26 +- source/myhtml/stream.h | 8 +- source/myhtml/tag.c | 45 +- source/myhtml/tag.h | 20 +- source/myhtml/tag_const.h | 2 +- source/myhtml/tag_init.c | 12 +- source/myhtml/thread.c | 1185 ---- source/myhtml/thread.h | 224 - source/myhtml/token.c | 294 +- source/myhtml/token.h | 26 +- source/myhtml/tokenizer.c | 122 +- source/myhtml/tokenizer.h | 18 +- source/myhtml/tokenizer_doctype.c | 6 +- source/myhtml/tokenizer_doctype.h | 4 +- source/myhtml/tokenizer_end.c | 2 +- source/myhtml/tokenizer_end.h | 2 +- source/myhtml/tokenizer_script.c | 18 +- source/myhtml/tokenizer_script.h | 4 +- source/myhtml/tree.c | 440 +- source/myhtml/tree.h | 46 +- source/myhtml/utils.c | 201 - source/myhtml/utils.h | 49 - source/myhtml/utils/avl_tree.c | 353 -- source/myhtml/utils/avl_tree.h | 68 - source/myhtml/utils/mchar_async.c | 770 --- source/myhtml/utils/mchar_async.h | 131 - source/myhtml/utils/mcobject.c | 194 - source/myhtml/utils/mcobject.h | 70 - source/myhtml/utils/mcobject_async.c | 475 -- source/myhtml/utils/mcobject_async.h | 114 - source/myhtml/utils/mcsimple.c | 125 - source/myhtml/utils/mcsimple.h | 61 - source/myhtml/utils/mcsync.c | 155 - source/myhtml/utils/mcsync.h | 81 - source/myhtml/utils/mctree.c | 285 - source/myhtml/utils/mctree.h | 97 - source/myhtml/utils/mhash.c | 267 - source/myhtml/utils/mhash.h | 65 - source/myhtml/utils/resources.h | 217 - 73 files changed, 1408 insertions(+), 25409 deletions(-) delete mode 100644 source/myhtml/api_encoding.h delete mode 100644 source/myhtml/encoding.c delete mode 100644 source/myhtml/encoding.h delete mode 100644 source/myhtml/encoding_detect.c delete mode 100644 source/myhtml/encoding_detect_resource.h delete mode 100644 source/myhtml/encoding_resource.h delete mode 100644 source/myhtml/incoming.c delete mode 100644 source/myhtml/incoming.h delete mode 100644 source/myhtml/myosi.c delete mode 100644 source/myhtml/perf.c delete mode 100644 source/myhtml/thread.c delete mode 100644 source/myhtml/thread.h delete mode 100644 source/myhtml/utils.c delete mode 100644 source/myhtml/utils.h delete mode 100644 source/myhtml/utils/avl_tree.c delete mode 100644 source/myhtml/utils/avl_tree.h delete mode 100644 source/myhtml/utils/mchar_async.c delete mode 100644 source/myhtml/utils/mchar_async.h delete mode 100644 source/myhtml/utils/mcobject.c delete mode 100644 source/myhtml/utils/mcobject.h delete mode 100644 source/myhtml/utils/mcobject_async.c delete mode 100644 source/myhtml/utils/mcobject_async.h delete mode 100644 source/myhtml/utils/mcsimple.c delete mode 100644 source/myhtml/utils/mcsimple.h delete mode 100644 source/myhtml/utils/mcsync.c delete mode 100644 source/myhtml/utils/mcsync.h delete mode 100644 source/myhtml/utils/mctree.c delete mode 100644 source/myhtml/utils/mctree.h delete mode 100644 source/myhtml/utils/mhash.c delete mode 100644 source/myhtml/utils/mhash.h delete mode 100644 source/myhtml/utils/resources.h (limited to 'source/myhtml') diff --git a/source/myhtml/Makefile.mk b/source/myhtml/Makefile.mk index 81d5d63..b4b30cb 100644 --- a/source/myhtml/Makefile.mk +++ b/source/myhtml/Makefile.mk @@ -1,14 +1,13 @@ -find_files_h = $(wildcard $(SRCDIR)/myhtml/$(dir)/*.h) -find_files_c = $(wildcard $(SRCDIR)/myhtml/$(dir)/*.c) +myhtml_dirs := . +myhtml_objs := $(call MODEST_UTILS_OBJS,myhtml,$(myhtml_dirs)) -SUBDIRS := . utils -HDRS += $(foreach dir,$(SUBDIRS),$(find_files_h)) -SRCS += $(foreach dir,$(SUBDIRS),$(find_files_c)) +myhtml_description := HTML parser by https://html.spec.whatwg.org/multipage/ +myhtml_dependencies := mycore myencoding myport -myhtml_clone: MyHTML_DIR_$(SUBDIRS) - mkdir -p $(INCLUDE_TMP)/myhtml/utils - cp $(SRCDIR)/myhtml/utils/*.h $(INCLUDE_TMP)/myhtml/utils +myhtml_all: $(myhtml_objs) -MyHTML_DIR_$(SUBDIRS): - mkdir -p $(INCLUDE_TMP)/myhtml/$(patsubst MyHTML_DIR_%,%,$@) - cp $(SRCDIR)/myhtml/$(patsubst MyHTML_DIR_%,%,$@)/*.h $(INCLUDE_TMP)/myhtml/$(patsubst MyHTML_DIR_%,%,$@)/ +myhtml_clean: + rm -f $(myhtml_objs) + +myhtml_clone: + $(call MODEST_UTILS_HDRS_CLONE,myhtml,$(myhtml_dirs)) diff --git a/source/myhtml/api.h b/source/myhtml/api.h index e7eb4a6..9599750 100755 --- a/source/myhtml/api.h +++ b/source/myhtml/api.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -34,14 +34,22 @@ * */ -#define MyHTML_VERSION_MAJOR 3 +#define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MINOR 0 #define MyHTML_VERSION_PATCH 0 +#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) + #include #include #include +#include +#include +#include +#include +#include + #if defined(_MSC_VER) # define MyHTML_DEPRECATED(func) __declspec(deprecated) func #elif defined(__GNUC__) || defined(__INTEL_COMPILER) @@ -54,56 +62,6 @@ extern "C" { #endif -/** - * encodings type - */ -enum myhtml_encoding_list { - MyHTML_ENCODING_DEFAULT = 0x00, -// MyHTML_ENCODING_AUTO = 0x01, // future - MyHTML_ENCODING_NOT_DETERMINED = 0x02, - MyHTML_ENCODING_UTF_8 = 0x00, // default encoding - MyHTML_ENCODING_UTF_16LE = 0x04, - MyHTML_ENCODING_UTF_16BE = 0x05, - MyHTML_ENCODING_X_USER_DEFINED = 0x06, - MyHTML_ENCODING_BIG5 = 0x07, - MyHTML_ENCODING_EUC_JP = 0x08, - MyHTML_ENCODING_EUC_KR = 0x09, - MyHTML_ENCODING_GB18030 = 0x0a, - MyHTML_ENCODING_GBK = 0x0b, - MyHTML_ENCODING_IBM866 = 0x0c, - MyHTML_ENCODING_ISO_2022_JP = 0x0d, - MyHTML_ENCODING_ISO_8859_10 = 0x0e, - MyHTML_ENCODING_ISO_8859_13 = 0x0f, - MyHTML_ENCODING_ISO_8859_14 = 0x10, - MyHTML_ENCODING_ISO_8859_15 = 0x11, - MyHTML_ENCODING_ISO_8859_16 = 0x12, - MyHTML_ENCODING_ISO_8859_2 = 0x13, - MyHTML_ENCODING_ISO_8859_3 = 0x14, - MyHTML_ENCODING_ISO_8859_4 = 0x15, - MyHTML_ENCODING_ISO_8859_5 = 0x16, - MyHTML_ENCODING_ISO_8859_6 = 0x17, - MyHTML_ENCODING_ISO_8859_7 = 0x18, - MyHTML_ENCODING_ISO_8859_8 = 0x19, - MyHTML_ENCODING_ISO_8859_8_I = 0x1a, - MyHTML_ENCODING_KOI8_R = 0x1b, - MyHTML_ENCODING_KOI8_U = 0x1c, - MyHTML_ENCODING_MACINTOSH = 0x1d, - MyHTML_ENCODING_SHIFT_JIS = 0x1e, - MyHTML_ENCODING_WINDOWS_1250 = 0x1f, - MyHTML_ENCODING_WINDOWS_1251 = 0x20, - MyHTML_ENCODING_WINDOWS_1252 = 0x21, - MyHTML_ENCODING_WINDOWS_1253 = 0x22, - MyHTML_ENCODING_WINDOWS_1254 = 0x23, - MyHTML_ENCODING_WINDOWS_1255 = 0x24, - MyHTML_ENCODING_WINDOWS_1256 = 0x25, - MyHTML_ENCODING_WINDOWS_1257 = 0x26, - MyHTML_ENCODING_WINDOWS_1258 = 0x27, - MyHTML_ENCODING_WINDOWS_874 = 0x28, - MyHTML_ENCODING_X_MAC_CYRILLIC = 0x29, - MyHTML_ENCODING_LAST_ENTRY = 0x2a -} -typedef myhtml_encoding_t; - /** * @struct basic tag ids */ @@ -384,54 +342,27 @@ enum myhtml_status { MyHTML_STATUS_OK = 0x0000, MyHTML_STATUS_ERROR = 0x0001, MyHTML_STATUS_ERROR_MEMORY_ALLOCATION = 0x0002, - MyHTML_STATUS_THREAD_ERROR_MEMORY_ALLOCATION = 0x0009, - MyHTML_STATUS_THREAD_ERROR_LIST_INIT = 0x000a, - MyHTML_STATUS_THREAD_ERROR_ATTR_MALLOC = 0x000b, - MyHTML_STATUS_THREAD_ERROR_ATTR_INIT = 0x000c, - MyHTML_STATUS_THREAD_ERROR_ATTR_SET = 0x000d, - MyHTML_STATUS_THREAD_ERROR_ATTR_DESTROY = 0x000e, - MyHTML_STATUS_THREAD_ERROR_NO_SLOTS = 0x000f, - MyHTML_STATUS_THREAD_ERROR_BATCH_INIT = 0x0010, - MyHTML_STATUS_THREAD_ERROR_WORKER_MALLOC = 0x0011, - MyHTML_STATUS_THREAD_ERROR_WORKER_SEM_CREATE = 0x0012, - MyHTML_STATUS_THREAD_ERROR_WORKER_THREAD_CREATE = 0x0013, - MyHTML_STATUS_THREAD_ERROR_MASTER_THREAD_CREATE = 0x0014, - MyHTML_STATUS_THREAD_ERROR_SEM_PREFIX_MALLOC = 0x0032, - MyHTML_STATUS_THREAD_ERROR_SEM_CREATE = 0x0033, - MyHTML_STATUS_THREAD_ERROR_QUEUE_MALLOC = 0x003c, - MyHTML_STATUS_THREAD_ERROR_QUEUE_NODES_MALLOC = 0x003d, - MyHTML_STATUS_THREAD_ERROR_QUEUE_NODE_MALLOC = 0x003e, - MyHTML_STATUS_THREAD_ERROR_MUTEX_MALLOC = 0x0046, - MyHTML_STATUS_THREAD_ERROR_MUTEX_INIT = 0x0047, - MyHTML_STATUS_THREAD_ERROR_MUTEX_LOCK = 0x0048, - MyHTML_STATUS_THREAD_ERROR_MUTEX_UNLOCK = 0x0049, - MyHTML_STATUS_RULES_ERROR_MEMORY_ALLOCATION = 0x0064, - MyHTML_STATUS_PERF_ERROR_COMPILED_WITHOUT_PERF = 0x00c8, - MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK = 0x00c9, - MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION = 0x012c, - MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT = 0x012d, - MyHTML_STATUS_TAGS_ERROR_MEMORY_ALLOCATION = 0x0190, - MyHTML_STATUS_TAGS_ERROR_MCOBJECT_CREATE = 0x0191, - MyHTML_STATUS_TAGS_ERROR_MCOBJECT_MALLOC = 0x0192, - MyHTML_STATUS_TAGS_ERROR_MCOBJECT_CREATE_NODE = 0x0193, - MyHTML_STATUS_TAGS_ERROR_CACHE_MEMORY_ALLOCATION = 0x0194, - MyHTML_STATUS_TAGS_ERROR_INDEX_MEMORY_ALLOCATION = 0x0195, - MyHTML_STATUS_TREE_ERROR_MEMORY_ALLOCATION = 0x01f4, - MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE = 0x01f5, - MyHTML_STATUS_TREE_ERROR_MCOBJECT_INIT = 0x01f6, - MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE_NODE = 0x01f7, - MyHTML_STATUS_TREE_ERROR_INCOMING_BUFFER_CREATE = 0x01f8, - MyHTML_STATUS_ATTR_ERROR_ALLOCATION = 0x0258, - MyHTML_STATUS_ATTR_ERROR_CREATE = 0x0259, - MyHTML_STATUS_STREAM_BUFFER_ERROR_CREATE = 0x0300, - MyHTML_STATUS_STREAM_BUFFER_ERROR_INIT = 0x0301, - MyHTML_STATUS_STREAM_BUFFER_ENTRY_ERROR_CREATE = 0x0302, - MyHTML_STATUS_STREAM_BUFFER_ENTRY_ERROR_INIT = 0x0303, - MyHTML_STATUS_STREAM_BUFFER_ERROR_ADD_ENTRY = 0x0304, - MyHTML_STATUS_MCOBJECT_ERROR_CACHE_CREATE = 0x0340, - MyHTML_STATUS_MCOBJECT_ERROR_CHUNK_CREATE = 0x0341, - MyHTML_STATUS_MCOBJECT_ERROR_CHUNK_INIT = 0x0342, - MyHTML_STATUS_MCOBJECT_ERROR_CACHE_REALLOC = 0x0343 + MyHTML_STATUS_RULES_ERROR_MEMORY_ALLOCATION = 0x9064, + MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION = 0x912c, + MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT = 0x912d, + MyHTML_STATUS_TAGS_ERROR_MEMORY_ALLOCATION = 0x9190, + MyHTML_STATUS_TAGS_ERROR_MCOBJECT_CREATE = 0x9191, + MyHTML_STATUS_TAGS_ERROR_MCOBJECT_MALLOC = 0x9192, + MyHTML_STATUS_TAGS_ERROR_MCOBJECT_CREATE_NODE = 0x9193, + MyHTML_STATUS_TAGS_ERROR_CACHE_MEMORY_ALLOCATION = 0x9194, + MyHTML_STATUS_TAGS_ERROR_INDEX_MEMORY_ALLOCATION = 0x9195, + MyHTML_STATUS_TREE_ERROR_MEMORY_ALLOCATION = 0x91f4, + MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE = 0x91f5, + MyHTML_STATUS_TREE_ERROR_MCOBJECT_INIT = 0x91f6, + MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE_NODE = 0x91f7, + MyHTML_STATUS_TREE_ERROR_INCOMING_BUFFER_CREATE = 0x91f8, + MyHTML_STATUS_ATTR_ERROR_ALLOCATION = 0x9258, + MyHTML_STATUS_ATTR_ERROR_CREATE = 0x9259, + MyHTML_STATUS_STREAM_BUFFER_ERROR_CREATE = 0x9300, + MyHTML_STATUS_STREAM_BUFFER_ERROR_INIT = 0x9301, + MyHTML_STATUS_STREAM_BUFFER_ENTRY_ERROR_CREATE = 0x9302, + MyHTML_STATUS_STREAM_BUFFER_ENTRY_ERROR_INIT = 0x9303, + MyHTML_STATUS_STREAM_BUFFER_ERROR_ADD_ENTRY = 0x9304 } typedef myhtml_status_t; @@ -492,6 +423,11 @@ typedef struct myhtml myhtml_t; */ typedef struct myhtml_tree myhtml_tree_t; +/** + * @struct myhtml_token_t MyHTML_TOKEN + */ +typedef struct myhtml_token myhtml_token_t; + typedef struct myhtml_token_attr myhtml_tree_attr_t; typedef struct myhtml_tree_node myhtml_tree_node_t; @@ -502,39 +438,6 @@ typedef struct myhtml_tree_node myhtml_tree_node_t; typedef size_t myhtml_tag_id_t; typedef struct myhtml_tag myhtml_tag_t; -/** - * MCHAR_ASYNC structures - * - */ -typedef struct mchar_async mchar_async_t; - -/** - * MyHTML_INCOMING structures - * - */ -typedef struct myhtml_incoming_buffer myhtml_incoming_buffer_t; - -/** - * MyHTML_STRING structures - * - */ -struct myhtml_string { - char* data; - size_t size; - size_t length; - - mchar_async_t *mchar; - size_t node_idx; -} -typedef myhtml_string_t; - -struct myhtml_string_raw { - char* data; - size_t size; - size_t length; -} -typedef myhtml_string_raw_t; - /** * @struct myhtml_collection_t */ @@ -572,7 +475,6 @@ typedef myhtml_version_t; // callback functions typedef void* (*myhtml_callback_token_f)(myhtml_tree_t* tree, myhtml_token_node_t* token, void* ctx); typedef void (*myhtml_callback_tree_node_f)(myhtml_tree_t* tree, myhtml_tree_node_t* node, void* ctx); -typedef void (*myhtml_callback_serialize_f)(const char* buffer, size_t size, void* ctx); /*********************************************************************************** * @@ -602,7 +504,7 @@ myhtml_create(void); * * @return MyHTML_STATUS_OK if successful, otherwise an error status value. */ -myhtml_status_t +mystatus_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_count, size_t queue_size); @@ -627,7 +529,7 @@ myhtml_destroy(myhtml_t* myhtml); * Parsing HTML * * @param[in] previously created structure myhtml_tree_t* - * @param[in] Input character encoding; Default: MyHTML_ENCODING_UTF_8 or MyHTML_ENCODING_DEFAULT or 0 + * @param[in] Input character encoding; Default: MyENCODING_UTF_8 or MyENCODING_DEFAULT or 0 * @param[in] HTML * @param[in] HTML size * @@ -635,15 +537,15 @@ myhtml_destroy(myhtml_t* myhtml); * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t -myhtml_parse(myhtml_tree_t* tree, myhtml_encoding_t encoding, +mystatus_t +myhtml_parse(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size); /** * Parsing fragment of HTML * * @param[in] previously created structure myhtml_tree_t* - * @param[in] Input character encoding; Default: MyHTML_ENCODING_UTF_8 or MyHTML_ENCODING_DEFAULT or 0 + * @param[in] Input character encoding; Default: MyENCODING_UTF_8 or MyENCODING_DEFAULT or 0 * @param[in] HTML * @param[in] HTML size * @param[in] fragment base (root) tag id. Default: MyHTML_TAG_DIV if set 0 @@ -653,8 +555,8 @@ myhtml_parse(myhtml_tree_t* tree, myhtml_encoding_t encoding, * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t -myhtml_parse_fragment(myhtml_tree_t* tree, myhtml_encoding_t encoding, +mystatus_t +myhtml_parse_fragment(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); @@ -663,7 +565,7 @@ myhtml_parse_fragment(myhtml_tree_t* tree, myhtml_encoding_t encoding, * No matter what was said during initialization MyHTML * * @param[in] previously created structure myhtml_tree_t* - * @param[in] Input character encoding; Default: MyHTML_ENCODING_UTF_8 or MyHTML_ENCODING_DEFAULT or 0 + * @param[in] Input character encoding; Default: MyENCODING_UTF_8 or MyENCODING_DEFAULT or 0 * @param[in] HTML * @param[in] HTML size * @@ -671,8 +573,8 @@ myhtml_parse_fragment(myhtml_tree_t* tree, myhtml_encoding_t encoding, * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t -myhtml_parse_single(myhtml_tree_t* tree, myhtml_encoding_t encoding, +mystatus_t +myhtml_parse_single(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size); /** @@ -680,7 +582,7 @@ myhtml_parse_single(myhtml_tree_t* tree, myhtml_encoding_t encoding, * No matter what was said during initialization MyHTML * * @param[in] previously created structure myhtml_tree_t* - * @param[in] Input character encoding; Default: MyHTML_ENCODING_UTF_8 or MyHTML_ENCODING_DEFAULT or 0 + * @param[in] Input character encoding; Default: MyENCODING_UTF_8 or MyENCODING_DEFAULT or 0 * @param[in] HTML * @param[in] HTML size * @param[in] fragment base (root) tag id. Default: MyHTML_TAG_DIV if set 0 @@ -690,8 +592,8 @@ myhtml_parse_single(myhtml_tree_t* tree, myhtml_encoding_t encoding, * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t -myhtml_parse_fragment_single(myhtml_tree_t* tree, myhtml_encoding_t encoding, +mystatus_t +myhtml_parse_fragment_single(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); @@ -704,7 +606,7 @@ myhtml_parse_fragment_single(myhtml_tree_t* tree, myhtml_encoding_t encoding, * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t +mystatus_t myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t html_size); /** @@ -718,7 +620,7 @@ myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t html_size); * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t +mystatus_t myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* html,size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); @@ -732,7 +634,7 @@ myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* html,size_t html_si * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t +mystatus_t myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, size_t html_size); /** @@ -747,7 +649,7 @@ myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, size_t html_siz * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t +mystatus_t myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); @@ -758,7 +660,7 @@ myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const char* html, size_t * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t +mystatus_t myhtml_parse_chunk_end(myhtml_tree_t* tree); /*********************************************************************************** @@ -783,7 +685,7 @@ myhtml_tree_create(void); * * @return MyHTML_STATUS_OK if successful, otherwise an error status */ -myhtml_status_t +mystatus_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml); /** @@ -935,48 +837,14 @@ myhtml_tree_get_mchar(myhtml_tree_t* tree); size_t myhtml_tree_get_mchar_node_id(myhtml_tree_t* tree); -/** - * Print tree of a node. Print including current node - * - * @param[in] myhtml_tree_t* - * @param[in] myhtml_tree_node_t* - * @param[in] file handle, for example use stdout - * @param[in] tab (\t) increment for pretty print, set 0 - */ -void -myhtml_tree_print_by_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, - FILE* out, size_t inc); - -/** - * Print tree of a node. Print excluding current node - * - * @param[in] myhtml_tree_t* - * @param[in] myhtml_tree_node_t* - * @param[in] file handle, for example use stdout - * @param[in] tab (\t) increment for pretty print, set 0 - */ -void -myhtml_tree_print_node_children(myhtml_tree_t* tree, myhtml_tree_node_t* node, - FILE* out, size_t inc); - -/** - * Print a node - * - * @param[in] myhtml_tree_t* - * @param[in] myhtml_tree_node_t* - * @param[in] file handle, for example use stdout - */ -void -myhtml_tree_print_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out); - /** * Get first Incoming Buffer * * @param[in] myhtml_tree_t* * - * @return myhtml_incoming_buffer_t* if successful, otherwise a NULL value + * @return mycore_incoming_buffer_t* if successful, otherwise a NULL value */ -myhtml_incoming_buffer_t* +mycore_incoming_buffer_t* myhtml_tree_incoming_buffer_first(myhtml_tree_t *tree); /*********************************************************************************** @@ -1007,7 +875,7 @@ myhtml_node_first(myhtml_tree_t* tree); */ myhtml_collection_t* myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_collection_t *collection, - myhtml_tag_id_t tag_id, myhtml_status_t *status); + myhtml_tag_id_t tag_id, mystatus_t *status); /** * Get nodes by tag name @@ -1022,7 +890,7 @@ myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_collection_t *collection, */ myhtml_collection_t* myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection, - const char* name, size_t length, myhtml_status_t *status); + const char* name, size_t length, mystatus_t *status); /** * Get nodes by attribute key @@ -1039,7 +907,7 @@ myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_collection_t* myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* scope_node, - const char* key, size_t key_len, myhtml_status_t* status); + const char* key, size_t key_len, mystatus_t* status); /** * Get nodes by attribute value; exactly equal; like a [foo="bar"] @@ -1063,7 +931,7 @@ myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /** * Get nodes by attribute value; whitespace separated; like a [foo~="bar"] @@ -1089,7 +957,7 @@ myhtml_get_nodes_by_attribute_value_whitespace_separated(myhtml_tree_t *tree, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /** * Get nodes by attribute value; value begins exactly with the string; like a [foo^="bar"] @@ -1115,7 +983,7 @@ myhtml_get_nodes_by_attribute_value_begin(myhtml_tree_t *tree, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /** @@ -1142,7 +1010,7 @@ myhtml_get_nodes_by_attribute_value_end(myhtml_tree_t *tree, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /** * Get nodes by attribute value; value contains the substring; like a [foo*="bar"] @@ -1168,7 +1036,7 @@ myhtml_get_nodes_by_attribute_value_contain(myhtml_tree_t *tree, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /** * Get nodes by attribute value; attribute value is a hyphen-separated list of values beginning; @@ -1193,7 +1061,7 @@ myhtml_get_nodes_by_attribute_value_hyphen_separated(myhtml_tree_t *tree, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /** * Get nodes by tag id in node scope @@ -1209,7 +1077,7 @@ myhtml_get_nodes_by_attribute_value_hyphen_separated(myhtml_tree_t *tree, myhtml_collection_t* myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, myhtml_tag_id_t tag_id, - myhtml_status_t *status); + mystatus_t *status); /** * Get nodes by tag name in node scope @@ -1226,7 +1094,7 @@ myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, myhtml_collection_t *co myhtml_collection_t* myhtml_get_nodes_by_name_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, const char* html, size_t length, - myhtml_status_t *status); + mystatus_t *status); /** * Get next sibling node @@ -1378,11 +1246,11 @@ myhtml_node_insert_before(myhtml_tree_node_t *target, myhtml_tree_node_t *node); * @param[in] text length * @param[in] character encoding * - * @return myhtml_string_t* if successful, otherwise a NULL value + * @return mycore_string_t* if successful, otherwise a NULL value */ -myhtml_string_t* +mycore_string_t* myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length, - myhtml_encoding_t encoding); + myencoding_t encoding); /** * Add text for a node with convert character encoding. @@ -1392,11 +1260,11 @@ myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length, * @param[in] text length * @param[in] character encoding * - * @return myhtml_string_t* if successful, otherwise a NULL value + * @return mycore_string_t* if successful, otherwise a NULL value */ -myhtml_string_t* +mycore_string_t* myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, - myhtml_encoding_t encoding); + myencoding_t encoding); /** * Get token node @@ -1479,13 +1347,13 @@ const char* myhtml_node_text(myhtml_tree_node_t *node, size_t *length); /** - * Get myhtml_string_t object by Tree node + * Get mycore_string_t object by Tree node * * @param[in] myhtml_tree_node_t* * - * @return myhtml_string_t* if exists, otherwise an NULL value + * @return mycore_string_t* if exists, otherwise an NULL value */ -myhtml_string_t* +mycore_string_t* myhtml_node_string(myhtml_tree_node_t *node); /** @@ -1613,9 +1481,9 @@ myhtml_attribute_value(myhtml_tree_attr_t *attr, size_t *length); * * @param[in] myhtml_tree_attr_t* * - * @return myhtml_string_t* if exists, otherwise an NULL value + * @return mycore_string_t* if exists, otherwise an NULL value */ -myhtml_string_t* +mycore_string_t* myhtml_attribute_key_string(myhtml_tree_attr_t* attr); /** @@ -1623,9 +1491,9 @@ myhtml_attribute_key_string(myhtml_tree_attr_t* attr); * * @param[in] myhtml_tree_attr_t* * - * @return myhtml_string_t* if exists, otherwise an NULL value + * @return mycore_string_t* if exists, otherwise an NULL value */ -myhtml_string_t* +mycore_string_t* myhtml_attribute_value_string(myhtml_tree_attr_t* attr); /** @@ -1649,7 +1517,7 @@ myhtml_attribute_by_key(myhtml_tree_node_t *node, * @param[in] attr key name length * @param[in] attr value name * @param[in] attr value name length - * @param[in] character encoding; Default: MyHTML_ENCODING_UTF_8 or MyHTML_ENCODING_DEFAULT or 0 + * @param[in] character encoding; Default: MyENCODING_UTF_8 or MyENCODING_DEFAULT or 0 * * @return created myhtml_tree_attr_t* if successful, otherwise a NULL value */ @@ -1657,7 +1525,7 @@ myhtml_tree_attr_t* myhtml_attribute_add(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, - myhtml_encoding_t encoding); + myencoding_t encoding); /** * Remove attribute reference. Not release the resources @@ -1797,13 +1665,13 @@ const char* myhtml_token_node_text(myhtml_token_node_t *token_node, size_t *length); /** - * Get myhtml_string_t object by token node + * Get mycore_string_t object by token node * * @param[in] myhtml_token_node_t* * - * @return myhtml_string_t* if exists, otherwise an NULL value + * @return mycore_string_t* if exists, otherwise an NULL value */ -myhtml_string_t* +mycore_string_t* myhtml_token_node_string(myhtml_token_node_t *token_node); /** @@ -1829,10 +1697,11 @@ myhtml_token_node_is_close_self(myhtml_token_node_t *token_node); /** * Wait for process token all parsing stage. Need if you use thread mode * + * @param[in] myhtml_token_t* * @param[in] myhtml_token_node_t* */ void -myhtml_token_node_wait_for_done(myhtml_token_node_t* node); +myhtml_token_node_wait_for_done(myhtml_token_t* token, myhtml_token_node_t* node); /*********************************************************************************** * @@ -1881,7 +1750,7 @@ myhtml_tag_id_by_name(myhtml_tree_t* tree, * @return myhtml_collection_t* if successful, otherwise an NULL value */ myhtml_collection_t* -myhtml_collection_create(size_t size, myhtml_status_t *status); +myhtml_collection_create(size_t size, mystatus_t *status); /** * Clears collection @@ -1911,7 +1780,7 @@ myhtml_collection_destroy(myhtml_collection_t *collection); * * @return NULL if successful, otherwise an myhtml_collection_t* structure */ -myhtml_status_t +mystatus_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t need, size_t upto_length); /*********************************************************************************** @@ -1928,16 +1797,16 @@ myhtml_collection_check_size(myhtml_collection_t *collection, size_t need, size_ * */ void -myhtml_encoding_set(myhtml_tree_t* tree, myhtml_encoding_t encoding); +myhtml_encoding_set(myhtml_tree_t* tree, myencoding_t encoding); /** * Get character encoding for current stream * * @param[in] myhtml_tree_t* * - * @return myhtml_encoding_t + * @return myencoding_t */ -myhtml_encoding_t +myencoding_t myhtml_encoding_get(myhtml_tree_t* tree); /** @@ -1950,7 +1819,7 @@ myhtml_encoding_get(myhtml_tree_t* tree); * @return size character set */ size_t -myhtml_encoding_codepoint_to_ascii_utf_8(size_t codepoint, char *data); +myencoding_codepoint_to_ascii_utf_8(size_t codepoint, char *data); /** * Convert Unicode Codepoint to UTF-16LE @@ -1964,7 +1833,7 @@ myhtml_encoding_codepoint_to_ascii_utf_8(size_t codepoint, char *data); * @return size character set */ size_t -myhtml_encoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data); +myencoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data); /** * Detect character encoding @@ -1980,7 +1849,7 @@ myhtml_encoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data); * @return true if encoding found, otherwise false */ bool -myhtml_encoding_detect(const char *text, size_t length, myhtml_encoding_t *encoding); +myencoding_detect(const char *text, size_t length, myencoding_t *encoding); /** * Detect Russian character encoding @@ -1994,7 +1863,7 @@ myhtml_encoding_detect(const char *text, size_t length, myhtml_encoding_t *encod * @return true if encoding found, otherwise false */ bool -myhtml_encoding_detect_russian(const char *text, size_t length, myhtml_encoding_t *encoding); +myencoding_detect_russian(const char *text, size_t length, myencoding_t *encoding); /** * Detect Unicode character encoding @@ -2008,7 +1877,7 @@ myhtml_encoding_detect_russian(const char *text, size_t length, myhtml_encoding_ * @return true if encoding found, otherwise false */ bool -myhtml_encoding_detect_unicode(const char *text, size_t length, myhtml_encoding_t *encoding); +myencoding_detect_unicode(const char *text, size_t length, myencoding_t *encoding); /** * Detect Unicode character encoding by BOM @@ -2022,7 +1891,7 @@ myhtml_encoding_detect_unicode(const char *text, size_t length, myhtml_encoding_ * @return true if encoding found, otherwise false */ bool -myhtml_encoding_detect_bom(const char *text, size_t length, myhtml_encoding_t *encoding); +myencoding_detect_bom(const char *text, size_t length, myencoding_t *encoding); /** * Detect Unicode character encoding by BOM. Cut BOM if will be found @@ -2038,13 +1907,13 @@ myhtml_encoding_detect_bom(const char *text, size_t length, myhtml_encoding_t *e * @return true if encoding found, otherwise false */ bool -myhtml_encoding_detect_and_cut_bom(const char *text, size_t length, myhtml_encoding_t *encoding, +myencoding_detect_and_cut_bom(const char *text, size_t length, myencoding_t *encoding, const char **new_text, size_t *new_size); /** * Detect encoding by name - * Names like: windows-1258 return MyHTML_ENCODING_WINDOWS_1258 - * cp1251 or windows-1251 return MyHTML_ENCODING_WINDOWS_1251 + * Names like: windows-1258 return MyENCODING_WINDOWS_1258 + * cp1251 or windows-1251 return MyENCODING_WINDOWS_1251 * * See https://encoding.spec.whatwg.org/#names-and-labels * @@ -2055,18 +1924,18 @@ myhtml_encoding_detect_and_cut_bom(const char *text, size_t length, myhtml_encod * @return true if encoding found, otherwise false */ bool -myhtml_encoding_by_name(const char *name, size_t length, myhtml_encoding_t *encoding); +myencoding_by_name(const char *name, size_t length, myencoding_t *encoding); /** - * Get Encoding name by myhtml_encoding_t (by id) + * Get Encoding name by myencoding_t (by id) * - * @param[in] myhtml_encoding_t, encoding id + * @param[in] myencoding_t, encoding id * @param[out] return name length * * @return encoding name, otherwise NULL value */ const char* -myhtml_encoding_name_by_id(myhtml_encoding_t encoding, size_t *length); +myencoding_name_by_id(myencoding_t encoding, size_t *length); /** * Detect encoding in meta tag () before start parsing @@ -2076,10 +1945,27 @@ myhtml_encoding_name_by_id(myhtml_encoding_t encoding, size_t *length); * @param[in] html data bytes * @param[in] html data length * - * @return detected encoding if encoding found, otherwise MyHTML_ENCODING_NOT_DETERMINED + * @return detected encoding if encoding found, otherwise MyENCODING_NOT_DETERMINED */ -myhtml_encoding_t -myhtml_encoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size); +myencoding_t +myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size); + +/** + * Extracting character encoding from string. Find "charset=" and see encoding. + * For example: "text/html; charset=windows-1251". Return MyENCODING_WINDOWS_1251 + * + * + * See https://html.spec.whatwg.org/multipage/infrastructure.html#algorithm-for-extracting-a-character-encoding-from-a-meta-element + * + * @param[in] data + * @param[in] data length + * @param[out] return encoding + * + * @return true if encoding found + */ +bool +myencoding_extracting_character_encoding_from_charset(const char *data, size_t data_size, + myencoding_t *encoding); /*********************************************************************************** * @@ -2088,7 +1974,7 @@ myhtml_encoding_prescan_stream_to_determine_encoding(const char *data, size_t da ***********************************************************************************/ /** - * Init myhtml_string_t structure + * Init mycore_string_t structure * * @param[in] mchar_async_t*. It can be obtained from myhtml_tree_t object * (see myhtml_tree_get_mchar function) or create manualy @@ -2098,7 +1984,7 @@ myhtml_encoding_prescan_stream_to_determine_encoding(const char *data, size_t da * if created mchar_async_t object manually you know it, if not then take from the Tree * (see myhtml_tree_get_mchar_node_id) * - * @param[in] myhtml_string_t*. It can be obtained from myhtml_tree_node_t object + * @param[in] mycore_string_t*. It can be obtained from myhtml_tree_node_t object * (see myhtml_node_string function) or create manualy * * @param[in] data size. Set the size you want for char* @@ -2106,137 +1992,137 @@ myhtml_encoding_prescan_stream_to_determine_encoding(const char *data, size_t da * @return char* of the size if successful, otherwise a NULL value */ char* -myhtml_string_init(mchar_async_t *mchar, size_t node_id, - myhtml_string_t* str, size_t size); +mycore_string_init(mchar_async_t *mchar, size_t node_id, + mycore_string_t* str, size_t size); /** - * Increase the current size for myhtml_string_t object + * Increase the current size for mycore_string_t object * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function - * @param[in] data size. Set the new size you want for myhtml_string_t object + * @param[in] mycore_string_t*. See description for mycore_string_init function + * @param[in] data size. Set the new size you want for mycore_string_t object * * @return char* of the size if successful, otherwise a NULL value */ char* -myhtml_string_realloc(myhtml_string_t *str, size_t new_size); +mycore_string_realloc(mycore_string_t *str, size_t new_size); /** - * Clean myhtml_string_t object. In reality, data length set to 0 - * Equivalently: myhtml_string_length_set(str, 0); + * Clean mycore_string_t object. In reality, data length set to 0 + * Equivalently: mycore_string_length_set(str, 0); * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function + * @param[in] mycore_string_t*. See description for mycore_string_init function */ void -myhtml_string_clean(myhtml_string_t* str); +mycore_string_clean(mycore_string_t* str); /** - * Clean myhtml_string_t object. Equivalently: memset(str, 0, sizeof(myhtml_string_t)) + * Clean mycore_string_t object. Equivalently: memset(str, 0, sizeof(mycore_string_t)) * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function + * @param[in] mycore_string_t*. See description for mycore_string_init function */ void -myhtml_string_clean_all(myhtml_string_t* str); +mycore_string_clean_all(mycore_string_t* str); /** - * Release all resources for myhtml_string_t object + * Release all resources for mycore_string_t object * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function + * @param[in] mycore_string_t*. See description for mycore_string_init function * @param[in] call free function for current object or not * - * @return NULL if destroy_obj set true, otherwise a current myhtml_string_t object + * @return NULL if destroy_obj set true, otherwise a current mycore_string_t object */ -myhtml_string_t* -myhtml_string_destroy(myhtml_string_t* str, bool destroy_obj); +mycore_string_t* +mycore_string_destroy(mycore_string_t* str, bool destroy_obj); /** - * Get data (char*) from a myhtml_string_t object + * Get data (char*) from a mycore_string_t object * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function + * @param[in] mycore_string_t*. See description for mycore_string_init function * * @return char* if exists, otherwise a NULL value */ char* -myhtml_string_data(myhtml_string_t *str); +mycore_string_data(mycore_string_t *str); /** - * Get data length from a myhtml_string_t object + * Get data length from a mycore_string_t object * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function + * @param[in] mycore_string_t*. See description for mycore_string_init function * * @return data length */ size_t -myhtml_string_length(myhtml_string_t *str); +mycore_string_length(mycore_string_t *str); /** - * Get data size from a myhtml_string_t object + * Get data size from a mycore_string_t object * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function + * @param[in] mycore_string_t*. See description for mycore_string_init function * * @return data size */ size_t -myhtml_string_size(myhtml_string_t *str); +mycore_string_size(mycore_string_t *str); /** - * Set data (char *) for a myhtml_string_t object. + * Set data (char *) for a mycore_string_t object. * * Attention!!! Attention!!! Attention!!! * * You can assign only that it has been allocated from functions: - * myhtml_string_data_alloc - * myhtml_string_data_realloc + * mycore_string_data_alloc + * mycore_string_data_realloc * or obtained manually created from mchar_async_t object * * Attention!!! Do not try set chat* from allocated by malloc or realloc!!! * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function + * @param[in] mycore_string_t*. See description for mycore_string_init function * @param[in] you data to want assign * * @return assigned data if successful, otherwise a NULL value */ char* -myhtml_string_data_set(myhtml_string_t *str, char *data); +mycore_string_data_set(mycore_string_t *str, char *data); /** - * Set data size for a myhtml_string_t object. + * Set data size for a mycore_string_t object. * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function + * @param[in] mycore_string_t*. See description for mycore_string_init function * @param[in] you size to want assign * * @return assigned size */ size_t -myhtml_string_size_set(myhtml_string_t *str, size_t size); +mycore_string_size_set(mycore_string_t *str, size_t size); /** - * Set data length for a myhtml_string_t object. + * Set data length for a mycore_string_t object. * - * @param[in] myhtml_string_t*. See description for myhtml_string_init function + * @param[in] mycore_string_t*. See description for mycore_string_init function * @param[in] you length to want assign * * @return assigned length */ size_t -myhtml_string_length_set(myhtml_string_t *str, size_t length); +mycore_string_length_set(mycore_string_t *str, size_t length); /** * Allocate data (char*) from a mchar_async_t object * - * @param[in] mchar_async_t*. See description for myhtml_string_init function - * @param[in] node id. See description for myhtml_string_init function + * @param[in] mchar_async_t*. See description for mycore_string_init function + * @param[in] node id. See description for mycore_string_init function * @param[in] you size to want assign * * @return data if successful, otherwise a NULL value */ char* -myhtml_string_data_alloc(mchar_async_t *mchar, size_t node_id, size_t size); +mycore_string_data_alloc(mchar_async_t *mchar, size_t node_id, size_t size); /** * Allocate data (char*) from a mchar_async_t object * - * @param[in] mchar_async_t*. See description for myhtml_string_init function - * @param[in] node id. See description for myhtml_string_init function + * @param[in] mchar_async_t*. See description for mycore_string_init function + * @param[in] node id. See description for mycore_string_init function * @param[in] old data * @param[in] how much data is copied from the old data to new data * @param[in] new size @@ -2244,28 +2130,28 @@ myhtml_string_data_alloc(mchar_async_t *mchar, size_t node_id, size_t size); * @return data if successful, otherwise a NULL value */ char* -myhtml_string_data_realloc(mchar_async_t *mchar, size_t node_id, +mycore_string_data_realloc(mchar_async_t *mchar, size_t node_id, char *data, size_t len_to_copy, size_t size); /** * Release allocated data * - * @param[in] mchar_async_t*. See description for myhtml_string_init function - * @param[in] node id. See description for myhtml_string_init function + * @param[in] mchar_async_t*. See description for mycore_string_init function + * @param[in] node id. See description for mycore_string_init function * @param[in] data to release * * @return data if successful, otherwise a NULL value */ void -myhtml_string_data_free(mchar_async_t *mchar, size_t node_id, char *data); +mycore_string_data_free(mchar_async_t *mchar, size_t node_id, char *data); /*********************************************************************************** * * MyHTML_STRING_RAW * - * All work with myhtml_string_raw_t object occurs through - * myhtml_malloc (standart malloc), myhtml_realloc (standart realloc), - * myhtml_free (standart free). + * All work with mycore_string_raw_t object occurs through + * mycore_malloc (standart malloc), mycore_realloc (standart realloc), + * mycore_free (standart free). * * You are free to change them on without fear that something will happen * You can call free for str_raw.data, or change str_raw.length = 0 @@ -2273,32 +2159,32 @@ myhtml_string_data_free(mchar_async_t *mchar, size_t node_id, char *data); ***********************************************************************************/ /** - * Clean myhtml_string_raw_t object. In reality, data length set to 0 + * Clean mycore_string_raw_t object. In reality, data length set to 0 * - * @param[in] myhtml_string_raw_t* + * @param[in] mycore_string_raw_t* */ void -myhtml_string_raw_clean(myhtml_string_raw_t* str_raw); +mycore_string_raw_clean(mycore_string_raw_t* str_raw); /** - * Full clean myhtml_string_raw_t object. - * Equivalently: memset(str_raw, 0, sizeof(myhtml_string_raw_t)) + * Full clean mycore_string_raw_t object. + * Equivalently: memset(str_raw, 0, sizeof(mycore_string_raw_t)) * - * @param[in] myhtml_string_raw_t* + * @param[in] mycore_string_raw_t* */ void -myhtml_string_raw_clean_all(myhtml_string_raw_t* str_raw); +mycore_string_raw_clean_all(mycore_string_raw_t* str_raw); /** - * Free resources for myhtml_string_raw_t object + * Free resources for mycore_string_raw_t object * - * @param[in] myhtml_string_raw_t* + * @param[in] mycore_string_raw_t* * @param[in] call free function for current object or not * - * @return NULL if destroy_obj set true, otherwise a current myhtml_string_raw_t object + * @return NULL if destroy_obj set true, otherwise a current mycore_string_raw_t object */ -myhtml_string_raw_t* -myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj); +mycore_string_raw_t* +mycore_string_raw_destroy(mycore_string_raw_t* str_raw, bool destroy_obj); /*********************************************************************************** @@ -2316,99 +2202,99 @@ myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj); /** * Get Incoming Buffer by position * - * @param[in] current myhtml_incoming_buffer_t* + * @param[in] current mycore_incoming_buffer_t* * @param[in] begin position * - * @return myhtml_incoming_buffer_t if successful, otherwise a NULL value + * @return mycore_incoming_buffer_t if successful, otherwise a NULL value */ -myhtml_incoming_buffer_t* -myhtml_incoming_buffer_find_by_position(myhtml_incoming_buffer_t *inc_buf, size_t begin); +mycore_incoming_buffer_t* +mycore_incoming_buffer_find_by_position(mycore_incoming_buffer_t *inc_buf, size_t begin); /** * Get data of Incoming Buffer * - * @param[in] myhtml_incoming_buffer_t* + * @param[in] mycore_incoming_buffer_t* * * @return const char* if successful, otherwise a NULL value */ const char* -myhtml_incoming_buffer_data(myhtml_incoming_buffer_t *inc_buf); +mycore_incoming_buffer_data(mycore_incoming_buffer_t *inc_buf); /** * Get data length of Incoming Buffer * - * @param[in] myhtml_incoming_buffer_t* + * @param[in] mycore_incoming_buffer_t* * * @return size_t */ size_t -myhtml_incoming_buffer_length(myhtml_incoming_buffer_t *inc_buf); +mycore_incoming_buffer_length(mycore_incoming_buffer_t *inc_buf); /** * Get data size of Incoming Buffer * - * @param[in] myhtml_incoming_buffer_t* + * @param[in] mycore_incoming_buffer_t* * * @return size_t */ size_t -myhtml_incoming_buffer_size(myhtml_incoming_buffer_t *inc_buf); +mycore_incoming_buffer_size(mycore_incoming_buffer_t *inc_buf); /** * Get data offset of Incoming Buffer. Global position of begin Incoming Buffer. * See description for MyHTML_INCOMING title * - * @param[in] myhtml_incoming_buffer_t* + * @param[in] mycore_incoming_buffer_t* * * @return size_t */ size_t -myhtml_incoming_buffer_offset(myhtml_incoming_buffer_t *inc_buf); +mycore_incoming_buffer_offset(mycore_incoming_buffer_t *inc_buf); /** * Get Relative Position for Incoming Buffer. - * Incoming Buffer should be prepared by myhtml_incoming_buffer_find_by_position + * Incoming Buffer should be prepared by mycore_incoming_buffer_find_by_position * - * @param[in] myhtml_incoming_buffer_t* + * @param[in] mycore_incoming_buffer_t* * @param[in] global begin * * @return size_t */ size_t -myhtml_incoming_buffer_relative_begin(myhtml_incoming_buffer_t *inc_buf, size_t begin); +mycore_incoming_buffer_relative_begin(mycore_incoming_buffer_t *inc_buf, size_t begin); /** * This function returns number of available data by Incoming Buffer - * Incoming buffer may be incomplete. See myhtml_incoming_buffer_next + * Incoming buffer may be incomplete. See mycore_incoming_buffer_next * - * @param[in] myhtml_incoming_buffer_t* + * @param[in] mycore_incoming_buffer_t* * @param[in] global begin * * @return size_t */ size_t -myhtml_incoming_buffer_available_length(myhtml_incoming_buffer_t *inc_buf, +mycore_incoming_buffer_available_length(mycore_incoming_buffer_t *inc_buf, size_t relative_begin, size_t length); /** * Get next buffer * - * @param[in] myhtml_incoming_buffer_t* + * @param[in] mycore_incoming_buffer_t* * - * @return myhtml_incoming_buffer_t* + * @return mycore_incoming_buffer_t* */ -myhtml_incoming_buffer_t* -myhtml_incoming_buffer_next(myhtml_incoming_buffer_t *inc_buf); +mycore_incoming_buffer_t* +mycore_incoming_buffer_next(mycore_incoming_buffer_t *inc_buf); /** * Get prev buffer * - * @param[in] myhtml_incoming_buffer_t* + * @param[in] mycore_incoming_buffer_t* * - * @return myhtml_incoming_buffer_t* + * @return mycore_incoming_buffer_t* */ -myhtml_incoming_buffer_t* -myhtml_incoming_buffer_prev(myhtml_incoming_buffer_t *inc_buf); +mycore_incoming_buffer_t* +mycore_incoming_buffer_prev(mycore_incoming_buffer_t *inc_buf); /*********************************************************************************** * @@ -2564,7 +2450,7 @@ myhtml_callback_tree_node_remove_set(myhtml_tree_t* tree, myhtml_callback_tree_n * @return 0 if match, otherwise index of break position */ size_t -myhtml_strcasecmp(const char* str1, const char* str2); +mycore_strcasecmp(const char* str1, const char* str2); /** * Compare two strings ignoring case of the first n characters @@ -2575,7 +2461,7 @@ myhtml_strcasecmp(const char* str1, const char* str2); * @return 0 if match, otherwise index of break position */ size_t -myhtml_strncasecmp(const char* str1, const char* str2, size_t size); +mycore_strncasecmp(const char* str1, const char* str2, size_t size); /*********************************************************************************** * @@ -2587,39 +2473,39 @@ myhtml_strncasecmp(const char* str1, const char* str2, size_t size); * Tree fragment serialization * The same as myhtml_serialization_tree_buffer function */ -bool -myhtml_serialization(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str); +mystatus_t +myhtml_serialization(myhtml_tree_node_t* scope_node, mycore_string_raw_t* str); /** * Only one tree node serialization * The same as myhtml_serialization_node_buffer function */ -bool -myhtml_serialization_node(myhtml_tree_node_t* node, myhtml_string_raw_t* str); +mystatus_t +myhtml_serialization_node(myhtml_tree_node_t* node, mycore_string_raw_t* str); /** * Serialize tree to an output string * * @param[in] myhtml_tree_t* * @param[in] scope node - * @param[in] myhtml_string_raw_t* + * @param[in] mycore_string_raw_t* * * @return true if successful, otherwise false */ -bool -myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str); +mystatus_t +myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, mycore_string_raw_t* str); /** * Serialize node to an output string * * @param[in] myhtml_tree_t* * @param[in] node - * @param[in] myhtml_string_raw_t* + * @param[in] mycore_string_raw_t* * * @return true if successful, otherwise false */ -bool -myhtml_serialization_node_buffer(myhtml_tree_node_t* node, myhtml_string_raw_t* str); +mystatus_t +myhtml_serialization_node_buffer(myhtml_tree_node_t* node, mycore_string_raw_t* str); /** * The serialize function for an entire tree @@ -2631,9 +2517,9 @@ myhtml_serialization_node_buffer(myhtml_tree_node_t* node, myhtml_string_raw_t* * * @return true if successful, otherwise false */ -bool +mystatus_t myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, - myhtml_callback_serialize_f callback, void* ptr); + mycore_callback_serialize_f callback, void* ptr); /** * The serialize function for a single node @@ -2645,9 +2531,9 @@ myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, * * @return true if successful, otherwise false */ -bool +mystatus_t myhtml_serialization_node_callback(myhtml_tree_node_t* node, - myhtml_callback_serialize_f callback, void* ptr); + mycore_callback_serialize_f callback, void* ptr); /*********************************************************************************** * diff --git a/source/myhtml/api_encoding.h b/source/myhtml/api_encoding.h deleted file mode 100644 index d32aa4a..0000000 --- a/source/myhtml/api_encoding.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_API_ENCODING_H -#define MyHTML_API_ENCODING_H -#pragma once - -#include "myhtml/api.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum myhtml_encoding_status { - MyHTML_ENCODING_STATUS_OK = 0x00, - MyHTML_ENCODING_STATUS_ERROR = 0x01, - MyHTML_ENCODING_STATUS_CONTINUE = 0x02, - MyHTML_ENCODING_STATUS_DONE = 0x04 -} -typedef myhtml_encoding_status_t; - -struct myhtml_encoding_result { - unsigned long first; - unsigned long second; - unsigned long third; - unsigned long result; - unsigned long result_aux; - unsigned long flag; -} -typedef myhtml_encoding_result_t; - -typedef myhtml_encoding_status_t -(*myhtml_encoding_custom_f)(unsigned const char byte, myhtml_encoding_result_t *res); - -/*********************************************************************************** - * - * MyHTML_ENCODING - * - ***********************************************************************************/ - -/** - * Clean myhtml_encoding_result_t. Use this function before and after text decode - * - * @param[in] myhtml_encoding_result_t* - */ -void -myhtml_encoding_result_clean(myhtml_encoding_result_t *res); - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* api_encoding_h */ diff --git a/source/myhtml/callback.c b/source/myhtml/callback.c index b26377e..a5016b3 100644 --- a/source/myhtml/callback.c +++ b/source/myhtml/callback.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/callback.h b/source/myhtml/callback.h index fd6420a..ad2dc59 100644 --- a/source/myhtml/callback.h +++ b/source/myhtml/callback.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/charef.c b/source/myhtml/charef.c index 5947c11..1930f0c 100644 --- a/source/myhtml/charef.c +++ b/source/myhtml/charef.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/charef.h b/source/myhtml/charef.h index 53e3436..09688df 100644 --- a/source/myhtml/charef.h +++ b/source/myhtml/charef.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/charef_resource.h b/source/myhtml/charef_resource.h index 79c2086..59d93db 100644 --- a/source/myhtml/charef_resource.h +++ b/source/myhtml/charef_resource.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/data_process.c b/source/myhtml/data_process.c index a5081db..4b680c5 100644 --- a/source/myhtml/data_process.c +++ b/source/myhtml/data_process.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -19,13 +19,13 @@ */ #include "myhtml/data_process.h" -#include "myhtml/utils/resources.h" +#include "mycore/utils/resources.h" #define MyHTML_DATA_PROCESS_APPEND_WITH_PREPROCESSING() \ tmp_offset += myhtml_string_before_append_any_preprocessing(str, &data[tmp_offset], (offset - tmp_offset), \ proc_entry->tmp_str_pos_proc); \ if(offset != tmp_offset) { \ - if(proc_entry->encoding == MyHTML_ENCODING_UTF_8) \ + if(proc_entry->encoding == MyENCODING_UTF_8) \ proc_entry->tmp_str_pos_proc = myhtml_string_append_with_preprocessing(str, &data[tmp_offset], (offset - tmp_offset), \ proc_entry->emit_null_char); \ else { \ @@ -42,9 +42,9 @@ void myhtml_data_process_entry_clean(myhtml_data_process_entry_t* proc_entry) proc_entry->state = myhtml_data_process_state_data; } -void myhtml_data_process_string_append_char(myhtml_string_t* str, const char sm) +void myhtml_data_process_string_append_char(mycore_string_t* str, const char sm) { - MyHTML_STRING_REALLOC_IF_NEED(str, 2, 0); + MyCORE_STRING_REALLOC_IF_NEED(str, 2, 0); str->data[str->length] = sm; str->length++; @@ -52,7 +52,7 @@ void myhtml_data_process_string_append_char(myhtml_string_t* str, const char sm) str->data[str->length] = '\0'; } -size_t myhtml_data_process_state_data(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size) +size_t myhtml_data_process_state_data(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size) { size_t tmp_offset = offset; @@ -63,7 +63,7 @@ size_t myhtml_data_process_state_data(myhtml_data_process_entry_t* proc_entry, m tmp_offset += myhtml_string_before_append_any_preprocessing(str, &data[tmp_offset], (offset - tmp_offset), proc_entry->tmp_str_pos_proc); if(offset != tmp_offset) { - if(proc_entry->encoding == MyHTML_ENCODING_UTF_8) + if(proc_entry->encoding == MyENCODING_UTF_8) proc_entry->tmp_str_pos_proc = myhtml_string_append_with_preprocessing(str, &data[tmp_offset], (offset - tmp_offset), proc_entry->emit_null_char); @@ -72,7 +72,7 @@ size_t myhtml_data_process_state_data(myhtml_data_process_entry_t* proc_entry, m myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, &data[tmp_offset], (offset - tmp_offset), proc_entry->encoding, proc_entry->emit_null_char); - myhtml_encoding_result_clean(&proc_entry->res); + myencoding_result_clean(&proc_entry->res); } } @@ -93,7 +93,7 @@ size_t myhtml_data_process_state_data(myhtml_data_process_entry_t* proc_entry, m return offset; } -size_t myhtml_data_process_state_ampersand(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size) +size_t myhtml_data_process_state_ampersand(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size) { if(data[offset] == '#') { @@ -133,7 +133,7 @@ size_t myhtml_data_process_state_ampersand(myhtml_data_process_entry_t* proc_ent return offset; } -size_t myhtml_data_process_state_ampersand_data(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size) +size_t myhtml_data_process_state_ampersand_data(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size) { size_t tmp_offset = offset; @@ -147,7 +147,7 @@ size_t myhtml_data_process_state_ampersand_data(myhtml_data_process_entry_t* pro else { /* if current charef is atrribute */ if(proc_entry->is_attributes && - (data[offset] == '=' || myhtml_string_alphanumeric_character[ (unsigned char)data[offset] ] != 0xff)) + (data[offset] == '=' || mycore_string_alphanumeric_character[ (unsigned char)data[offset] ] != 0xff)) { MyHTML_DATA_PROCESS_APPEND_WITH_PREPROCESSING() @@ -157,9 +157,9 @@ size_t myhtml_data_process_state_ampersand_data(myhtml_data_process_entry_t* pro if(current_entry->codepoints_len) { for (size_t i = 0; i < current_entry->codepoints_len; i++) { - MyHTML_STRING_REALLOC_IF_NEED(str, 5, 0); + MyCORE_STRING_REALLOC_IF_NEED(str, 5, 0); - proc_entry->tmp_str_pos += myhtml_encoding_codepoint_to_ascii_utf_8(current_entry->codepoints[i], &str->data[proc_entry->tmp_str_pos]); + proc_entry->tmp_str_pos += myencoding_codepoint_to_ascii_utf_8(current_entry->codepoints[i], &str->data[proc_entry->tmp_str_pos]); } str->length = proc_entry->tmp_str_pos; @@ -178,7 +178,7 @@ size_t myhtml_data_process_state_ampersand_data(myhtml_data_process_entry_t* pro return offset; } -size_t myhtml_data_process_state_ampersand_hash(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size) +size_t myhtml_data_process_state_ampersand_hash(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size) { if(data[offset] == 'x' || data[offset] == 'X') { myhtml_data_process_string_append_char(str, data[offset]); @@ -192,14 +192,14 @@ size_t myhtml_data_process_state_ampersand_hash(myhtml_data_process_entry_t* pro return offset; } -size_t myhtml_data_process_state_ampersand_hash_data(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size) +size_t myhtml_data_process_state_ampersand_hash_data(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size) { const unsigned char *u_data = (const unsigned char*)data; size_t tmp_offset = offset; while(offset < size) { - if(myhtml_string_chars_num_map[ u_data[offset] ] == 0xff) + if(mycore_string_chars_num_map[ u_data[offset] ] == 0xff) { proc_entry->state = myhtml_data_process_state_data; @@ -217,7 +217,7 @@ size_t myhtml_data_process_state_ampersand_hash_data(myhtml_data_process_entry_t } if(proc_entry->tmp_num <= 0x10FFFF) { - proc_entry->tmp_num = myhtml_string_chars_num_map[ u_data[offset] ] + proc_entry->tmp_num * 10; + proc_entry->tmp_num = mycore_string_chars_num_map[ u_data[offset] ] + proc_entry->tmp_num * 10; } offset++; @@ -228,14 +228,14 @@ size_t myhtml_data_process_state_ampersand_hash_data(myhtml_data_process_entry_t return offset; } -size_t myhtml_data_process_state_ampersand_hash_x_data(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size) +size_t myhtml_data_process_state_ampersand_hash_x_data(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size) { unsigned const char *u_data = (unsigned const char*)data; size_t tmp_offset = offset; while(offset < size) { - if(myhtml_string_chars_hex_map[ u_data[offset] ] == 0xff) + if(mycore_string_chars_hex_map[ u_data[offset] ] == 0xff) { proc_entry->state = myhtml_data_process_state_data; @@ -254,7 +254,7 @@ size_t myhtml_data_process_state_ampersand_hash_x_data(myhtml_data_process_entry if(proc_entry->tmp_num <= 0x10FFFF) { proc_entry->tmp_num <<= 4; - proc_entry->tmp_num |= myhtml_string_chars_hex_map[ u_data[offset] ]; + proc_entry->tmp_num |= mycore_string_chars_hex_map[ u_data[offset] ]; } offset++; @@ -265,10 +265,10 @@ size_t myhtml_data_process_state_ampersand_hash_x_data(myhtml_data_process_entry return offset; } -void myhtml_data_process_state_end(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str) +void myhtml_data_process_state_end(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str) { /* 4 is max utf8 byte + \0 */ - MyHTML_STRING_REALLOC_IF_NEED(str, 5, 0); + MyCORE_STRING_REALLOC_IF_NEED(str, 5, 0); if(proc_entry->tmp_num <= 0x9F) proc_entry->tmp_num = replacement_character[proc_entry->tmp_num]; @@ -278,12 +278,12 @@ void myhtml_data_process_state_end(myhtml_data_process_entry_t* proc_entry, myht proc_entry->tmp_num = replacement_character[0]; str->length = proc_entry->tmp_str_pos + - myhtml_encoding_codepoint_to_ascii_utf_8(proc_entry->tmp_num, &str->data[proc_entry->tmp_str_pos]); + myencoding_codepoint_to_ascii_utf_8(proc_entry->tmp_num, &str->data[proc_entry->tmp_str_pos]); str->data[str->length] = '\0'; } -void myhtml_data_process(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t size) +void myhtml_data_process(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t size) { size_t offset = 0; @@ -292,16 +292,16 @@ void myhtml_data_process(myhtml_data_process_entry_t* proc_entry, myhtml_string_ } } -void myhtml_data_process_end(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str) +void myhtml_data_process_end(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str) { if(proc_entry->state == myhtml_data_process_state_ampersand_data && proc_entry->charef_res.last_entry) { const charef_entry_t *entry = proc_entry->charef_res.last_entry; for (size_t i = 0; i < entry->codepoints_len; i++) { - MyHTML_STRING_REALLOC_IF_NEED(str, 5, 0); + MyCORE_STRING_REALLOC_IF_NEED(str, 5, 0); - proc_entry->tmp_str_pos += myhtml_encoding_codepoint_to_ascii_utf_8(entry->codepoints[i], &str->data[proc_entry->tmp_str_pos]); + proc_entry->tmp_str_pos += myencoding_codepoint_to_ascii_utf_8(entry->codepoints[i], &str->data[proc_entry->tmp_str_pos]); } str->length = proc_entry->tmp_str_pos; diff --git a/source/myhtml/data_process.h b/source/myhtml/data_process.h index 558a5aa..a461d10 100644 --- a/source/myhtml/data_process.h +++ b/source/myhtml/data_process.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -35,8 +35,8 @@ struct myhtml_data_process_entry { myhtml_data_process_state_f state; /* for encodings */ - myhtml_encoding_t encoding; - myhtml_encoding_result_t res; + myencoding_t encoding; + myencoding_result_t res; /* temp */ size_t tmp_str_pos_proc; @@ -53,16 +53,16 @@ struct myhtml_data_process_entry { void myhtml_data_process_entry_clean(myhtml_data_process_entry_t* proc_entry); -void myhtml_data_process(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t size); -void myhtml_data_process_end(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str); +void myhtml_data_process(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t size); +void myhtml_data_process_end(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str); -size_t myhtml_data_process_state_data(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size); -size_t myhtml_data_process_state_ampersand(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size); -size_t myhtml_data_process_state_ampersand_data(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size); -size_t myhtml_data_process_state_ampersand_hash(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size); -size_t myhtml_data_process_state_ampersand_hash_data(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char *data, size_t offset, size_t size); -size_t myhtml_data_process_state_ampersand_hash_x_data(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str, const char* data, size_t offset, size_t size); -void myhtml_data_process_state_end(myhtml_data_process_entry_t* proc_entry, myhtml_string_t* str); +size_t myhtml_data_process_state_data(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size); +size_t myhtml_data_process_state_ampersand(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size); +size_t myhtml_data_process_state_ampersand_data(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size); +size_t myhtml_data_process_state_ampersand_hash(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size); +size_t myhtml_data_process_state_ampersand_hash_data(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char *data, size_t offset, size_t size); +size_t myhtml_data_process_state_ampersand_hash_x_data(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str, const char* data, size_t offset, size_t size); +void myhtml_data_process_state_end(myhtml_data_process_entry_t* proc_entry, mycore_string_t* str); #ifdef __cplusplus } /* extern "C" */ diff --git a/source/myhtml/def.h b/source/myhtml/def.h index 389b198..8ec75e6 100644 --- a/source/myhtml/def.h +++ b/source/myhtml/def.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/encoding.c b/source/myhtml/encoding.c deleted file mode 100644 index d7a7807..0000000 --- a/source/myhtml/encoding.c +++ /dev/null @@ -1,1222 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/encoding.h" -#include "myhtml/encoding_resource.h" -#include "myhtml/utils/resources.h" - - - -myhtml_encoding_custom_f myhtml_encoding_get_function_by_id(myhtml_encoding_t idx) -{ - return myhtml_encoding_function_index[idx]; -} - -enum myhtml_encoding_status myhtml_encoding_decode_utf_8(unsigned const char data, myhtml_encoding_result_t *res) -{ - // res->first -- lower boundary - // res->second -- upper boundary - // res->result -- code point - // res->third -- bytes seen - // res->flag -- bytes needed - - if(res->flag == 0) - { - if(data <= 0x7F) { - res->result = data; - return MyHTML_ENCODING_STATUS_OK; - } - else if(data >= 0xC2 && data <= 0xDF) { - res->flag = 1; - res->result = data - 0xC0; - } - else if(data >= 0xE0 && data <= 0xEF) { - if(data == 0xE0) { - res->first = 0xA0; - } - else if(data == 0xED){ - res->second = 0x9F; - } - - res->flag = 2; - res->result = data - 0xE0; - } - else if(data >= 0xF0 && data <= 0xF4) { - if(data == 0xF0) { - res->first = 0x90; - } - else if(data == 0xF4){ - res->second = 0x8F; - } - - res->flag = 2; - res->result = data - 0xF0; - } - else - return MyHTML_ENCODING_STATUS_ERROR; - - res->result = res->result << (6 * res->flag); - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - if(data < res->first && data > res->second) - { - res->result = 0x00; - res->flag = 0x00; - res->third = 0x00; - res->first = 0x80; - res->second = 0xBF; - - return MyHTML_ENCODING_STATUS_DONE|MyHTML_ENCODING_STATUS_ERROR; - } - - res->first = 0x80; - res->second = 0xBF; - - res->third++; - res->result += (unsigned long)(data - 0x80) << (6 * (res->flag - res->third)); - - if(res->third != res->flag) - return MyHTML_ENCODING_STATUS_CONTINUE; - - res->flag = 0x00; - res->third = 0x00; - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_ibm866(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_ibm866[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_2(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_2[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_3(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_3[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_4(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_4[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_5(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_5[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_6(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_6[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_7(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_7[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_8(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_8[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_8_i(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_8[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_10(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_10[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_13(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_13[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_14(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_14[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_15(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_15[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_16(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_iso_8859_16[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_koi8_r(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_koi8_r[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_koi8_u(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_koi8_u[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_macintosh(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_macintosh[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_874(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_874[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_1250(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_1250[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_1251(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_1251[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_1252(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_1252[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_1253(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_1253[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_1254(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_1254[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_1255(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_1255[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_1256(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_1256[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_1257(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_1257[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_windows_1258(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_windows_1258[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_x_mac_cyrillic(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - (res->result = data); - else - (res->result = myhtml_encoding_map_x_mac_cyrillic[(data - 0x80)]); - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_gbk(unsigned const char data, myhtml_encoding_result_t *res) -{ - return myhtml_encoding_decode_gb18030(data, res); -} - -unsigned long myhtml_encoding_index_gb18030_ranges_code_point(unsigned long pointer) -{ - // step 1 - if((pointer > 39419 && pointer < 189000) || pointer > 1237575) - return 0; - - // step 2 - if(pointer == 7457) - return 0xe7c7; - - // step 3 - unsigned long offset = sizeof(myhtml_encoding_map_gb18030_ranges) / (sizeof(unsigned long) * 2); - - unsigned long code_point_offset = 0; - while (offset) { - offset--; - - if(myhtml_encoding_map_gb18030_ranges[offset][0] == pointer || - myhtml_encoding_map_gb18030_ranges[offset][0] < pointer) - { - code_point_offset = myhtml_encoding_map_gb18030_ranges[offset][1]; - break; - } - } - - // step 4 - return (code_point_offset + pointer - offset); -} - -enum myhtml_encoding_status myhtml_encoding_decode_gb18030(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(res->third) { - if(data >= 0x30 && data <= 0x39) { - res->result = myhtml_encoding_index_gb18030_ranges_code_point((((res->first - 0x81) * 10 + res->second - 0x30) * - 126 + res->third - 0x81) * 10 + data - 0x30); - } - - res->first = 0; - res->second = 0; - res->third = 0; - - if(res->result) - return MyHTML_ENCODING_STATUS_OK; - - return MyHTML_ENCODING_STATUS_ERROR; - } - else if(res->second) { - if(data >= 0x81 && data <= 0xFE) { - res->third = data; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - res->first = 0; - res->second = 0; - - return MyHTML_ENCODING_STATUS_ERROR; - } - else if(res->first) { - if(data >= 0x30 && data <= 0x39) { - res->second = data; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - unsigned long lead = res->first, pointer = 0x00; - res->first = 0x00; - - unsigned char offset; - - if(data < 0x7F) - offset = 0x40; - else - offset = 0x41; - - if((data >= 0x40 && data <= 0x7E) || - (data >= 0x80 && data <= 0xFE)) - { - pointer = (lead - 0x81) * 190 + (data - offset); - } - else { - return MyHTML_ENCODING_STATUS_ERROR; - } - - res->result = myhtml_encoding_map_gb18030[pointer]; - return MyHTML_ENCODING_STATUS_OK; - } - - if(data <= 0x7F) { - res->result = data; - return MyHTML_ENCODING_STATUS_OK; - } - - if(data == 0x80) { - res->result = 0x20ac; - return MyHTML_ENCODING_STATUS_OK; - } - - if(data >= 0x81 && data <= 0xFE) { - res->first = data; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - return MyHTML_ENCODING_STATUS_ERROR; -} - -enum myhtml_encoding_status myhtml_encoding_decode_big5(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(res->first) - { - unsigned long lead = res->first; - unsigned long pointer = 0x00; - unsigned long offset; - - res->first = 0x00; - - if(data < 0x7F) - offset = 0x40; - else - offset = 0x62; - - if((data >= 0x40 && data <= 0x7E) || - (data >= 0xA1 && data <= 0xFE)) - { - pointer = (lead - 0x81) * 157 + (data - offset); - } - - switch (pointer) { - case 1133: - // U+00CA U+0304 - res->result = 0x00ca; - res->result_aux = 0x0304; - return MyHTML_ENCODING_STATUS_OK; - case 1135: - // U+00CA U+030C - res->result = 0x00ca; - res->result_aux = 0x030c; - return MyHTML_ENCODING_STATUS_OK; - case 1164: - // U+00EA U+0304 - res->result = 0x00ea; - res->result_aux = 0x0304; - return MyHTML_ENCODING_STATUS_OK; - case 1166: - // U+00EA U+030C - res->result = 0x00ea; - res->result_aux = 0x030c; - return MyHTML_ENCODING_STATUS_OK; - default: - break; - } - - if(pointer == 0) - return MyHTML_ENCODING_STATUS_ERROR; - - res->result = myhtml_encoding_map_big5[pointer]; - res->result_aux = 0; - - return MyHTML_ENCODING_STATUS_OK; - } - - if(data <= 0x7F) { - res->result = data; - return MyHTML_ENCODING_STATUS_OK; - } - - if(data >= 0x81 && data <= 0xFE) { - res->first = data; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - return MyHTML_ENCODING_STATUS_ERROR; -} - -enum myhtml_encoding_status myhtml_encoding_decode_euc_jp(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(res->first == 0x8E && (data >= 0xA1 && data <= 0xDF)) { - res->first = 0x00; - res->result = 0xFF61 + data - 0xA1; - - return MyHTML_ENCODING_STATUS_OK; - } - else if(res->first == 0x8F && (data >= 0xA1 && data <= 0xFE)) { - res->flag = 1; - res->first = data; - - return MyHTML_ENCODING_STATUS_CONTINUE; - } - else if(res->first) - { - unsigned long lead = res->first; - res->first = 0x00; - - if((lead >= 0xA1 && lead <= 0xFE) && - (data >= 0xA1 && data <= 0xFE)) - { - unsigned long idx = (lead - 0xA1) * 94 + data - 0xA1; - if(res->flag) { - res->result = myhtml_encoding_map_jis0212[idx]; - } - else { - res->result = myhtml_encoding_map_jis0208[idx]; - } - - res->flag = 0; - - if(res->result == 0) - return MyHTML_ENCODING_STATUS_ERROR; - - return MyHTML_ENCODING_STATUS_OK; - } - } - - if(data <= 0x7F) { - res->result = data; - return MyHTML_ENCODING_STATUS_OK; - } - - if((data >= 0x8E && data <= 0x8F) || - (data >= 0xA1 && data <= 0xFE)) - { - res->first = data; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - return MyHTML_ENCODING_STATUS_ERROR; -} - -enum myhtml_encoding_status myhtml_encoding_decode_iso_2022_jp(unsigned const char data, myhtml_encoding_result_t *res) -{ - // res->first -- lead - // res->second -- state - // res->third -- output state - // res->flag -- output flag - - switch (res->second) { - case 0: // ASCII - { - if(data == 0x1B) { - res->second = 6; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - else if((data <= 0x7F) && - data != 0x0E && data != 0x0F && data != 0x1B) - { - res->flag = 0; - res->result = data; - return MyHTML_ENCODING_STATUS_OK; - } - - res->flag = 0; - return MyHTML_ENCODING_STATUS_ERROR; - } - - case 1: // Roman - { - if(data == 0x1B) { - res->second = 6; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - else if(data == 0x5C) { - res->flag = 0; - res->result = 0x00A5; - return MyHTML_ENCODING_STATUS_OK; - } - else if(data == 0x7E) { - res->flag = 0; - res->result = 0x203E; - return MyHTML_ENCODING_STATUS_OK; - } - else if((data <= 0x7F) && - data != 0x0E && data != 0x0F && - data != 0x1B && data != 0x5C && - data != 0x7E) - { - res->flag = 0; - res->result = data; - return MyHTML_ENCODING_STATUS_OK; - } - - res->flag = 0; - return MyHTML_ENCODING_STATUS_ERROR; - } - - case 3: // Katakana - { - if(data == 0x1B) { - res->second = 6; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - else if(data >= 0x21 && data <= 0x5F) { - res->flag = 0; - res->result = 0xFF61 + data - 0x21; - - return MyHTML_ENCODING_STATUS_OK; - } - - res->flag = 0; - return MyHTML_ENCODING_STATUS_ERROR; - } - - case 4: // Lead byte - { - if(data == 0x1B) { - res->second = 6; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - else if(data >= 0x21 && data <= 0x7E) { - res->flag = 0; - res->first = data; - res->second = 5; - - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - res->flag = 0; - return MyHTML_ENCODING_STATUS_ERROR; - } - - case 5: // Trail byte - { - if(data == 0x1B) { - res->second = 6; - return MyHTML_ENCODING_STATUS_CONTINUE|MyHTML_ENCODING_STATUS_ERROR; - } - else if(data >= 0x21 && data <= 0x7E) { - res->second = 4; - - unsigned long pointer = (res->first - 0x21) * 94 + data - 0x21; - res->result = myhtml_encoding_map_jis0208[pointer]; - - if(res->result == 0) - return MyHTML_ENCODING_STATUS_ERROR; - - return MyHTML_ENCODING_STATUS_OK; - } - - res->second = 4; - return MyHTML_ENCODING_STATUS_ERROR; - } - - case 6: // Escape start - { - if(data == 0x24 || data == 0x28) { - res->first = data; - res->second = 7; - - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - res->flag = 0; - res->second = res->third; - - return MyHTML_ENCODING_STATUS_ERROR; - } - - case 7: // Escape - { - unsigned long lead = res->first; - res->first = 0x00; - - res->second = 0x00; - - if(lead == 0x28 && data == 0x42) { - res->second = 0; - } - else if(lead == 0x28 && data == 0x4A) { - res->second = 1; - } - else if(lead == 0x28 && data == 0x49) { - res->second = 2; - } - else if(lead == 0x24 && (data == 0x40 || data == 0x42)) { - res->second = 4; - } - - if(res->second) - { - res->third = res->second; - - unsigned long output_flag = res->flag; - res->flag = 1; - - if(output_flag) - return MyHTML_ENCODING_STATUS_ERROR; - - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - res->flag = 0; - res->second = res->third; - - return MyHTML_ENCODING_STATUS_ERROR; - } - - default: - break; - } - - return 0; -} - -enum myhtml_encoding_status myhtml_encoding_decode_shift_jis(unsigned const char data, myhtml_encoding_result_t *res) -{ - // res->first -- lead - - if(res->first) - { - unsigned long offset; - unsigned long lead_offset; - - if(data < 0x7F) - offset = 0x40; - else - offset = 0x41; - - if(res->first < 0xA0) - lead_offset = 0x81; - else - lead_offset = 0xC1; - - unsigned long pointer = 0x00; - if((data >= 0x40 && data <= 0x7E) || - (data >= 0x80 && data <= 0xFC)) - { - pointer = (res->first - lead_offset) * 188 + data - offset; - } - - if(pointer) - res->result = myhtml_encoding_map_jis0208[pointer]; - else - res->result = 0x00; - - if(res->result == 0x00 && (pointer >= 8836 && pointer <= 10528)) { - res->result = 0xE000 + pointer - 8836; - } - - if(res->result) - return MyHTML_ENCODING_STATUS_OK; - - return MyHTML_ENCODING_STATUS_ERROR; - } - - if((data <= 0x7F) || data == 0x80) { - res->result = data; - return MyHTML_ENCODING_STATUS_OK; - } - - if(data >= 0xA1 && data <= 0xDF) { - res->result = 0xFF61 + data - 0xA1; - return MyHTML_ENCODING_STATUS_OK; - } - - if((data >= 0x81 && data <= 0x9F) || - (data >= 0xE0 && data <= 0xFC)) { - res->first = data; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - return MyHTML_ENCODING_STATUS_ERROR; -} - -enum myhtml_encoding_status myhtml_encoding_decode_euc_kr(unsigned const char data, myhtml_encoding_result_t *res) -{ - // res->first -- lead - - if(res->first) - { - unsigned long lead = res->first, pointer = 0x00; - res->first = 0x00; - - if(data >= 0x41 && data <= 0xFE) { - pointer = (lead - 0x81) * 190 + (data - 0x41); - } - - if(pointer) - res->result = myhtml_encoding_map_euc_kr[pointer]; - - if(res->result) - return MyHTML_ENCODING_STATUS_OK; - - return MyHTML_ENCODING_STATUS_ERROR; - } - - if(data <= 0x7F) { - res->result = data; - return MyHTML_ENCODING_STATUS_OK; - } - - if(data >= 0x81 && data <= 0xFE) { - res->first = data; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - return MyHTML_ENCODING_STATUS_ERROR; -} - -enum myhtml_encoding_status myhtml_encoding_decode_shared_utf_16(unsigned const char data, myhtml_encoding_result_t *res) -{ - // res->first -- lead - // res->second -- lead surrogate - // res->flag -- flag - - if(res->first == 0x00) { - res->first = data; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - unsigned long code_unit; - if(res->flag) - (code_unit = (res->first << 8) + data); - else - (code_unit = (unsigned long)(data << 8) + res->first); - - res->first = 0x00; - - if(res->second) { - unsigned long lead_surrogate = res->second; - res->second = 0x00; - - if(code_unit >= 0xDC00 && code_unit <= 0xDFFF) { - res->result = 0x10000 + ((lead_surrogate - 0xD800) << 10) + (code_unit - 0xDC00); - return MyHTML_ENCODING_STATUS_OK; - } - - unsigned char byte1 = (unsigned char)(code_unit >> 8); - unsigned char byte2 = (unsigned char)(code_unit & 0x00FF); - - if(res->flag) { - res->result = byte1; - res->result_aux = byte2; - } - else { - res->result = byte2; - res->result_aux = byte1; - } - - return MyHTML_ENCODING_STATUS_DONE|MyHTML_ENCODING_STATUS_ERROR; - } - - if(code_unit >= 0xD800 && code_unit <= 0xDBFF) { - res->second = code_unit; - return MyHTML_ENCODING_STATUS_CONTINUE; - } - - if(code_unit >= 0xDC00 && code_unit <= 0xDFFF) { - return MyHTML_ENCODING_STATUS_ERROR; - } - - res->result = code_unit; - - return MyHTML_ENCODING_STATUS_OK; -} - -enum myhtml_encoding_status myhtml_encoding_decode_utf_16be(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(res->flag == 0) - res->flag = 1; - - return myhtml_encoding_decode_shared_utf_16(data, res); -} - -enum myhtml_encoding_status myhtml_encoding_decode_utf_16le(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(res->flag) - res->flag = 0; - - return myhtml_encoding_decode_shared_utf_16(data, res); -} - -enum myhtml_encoding_status myhtml_encoding_decode_x_user_defined(unsigned const char data, myhtml_encoding_result_t *res) -{ - if(data <= 0x7F) - res->result = data; - else - res->result = 0xF780 + data - 0x80; - - return MyHTML_ENCODING_STATUS_OK; -} - -void myhtml_encoding_result_clean(myhtml_encoding_result_t *res) -{ - memset(res, 0, sizeof(myhtml_encoding_result_t)); -} - -size_t myhtml_encoding_codepoint_ascii_length(size_t codepoint) -{ - if (codepoint <= 0x0000007F) { - return 1; - } - else if (codepoint <= 0x000007FF) { - return 2; - } - else if (codepoint <= 0x0000FFFF) { - return 3; - } - else if (codepoint <= 0x001FFFFF) { - return 4; - } - /* not uses in unicode */ - else if (codepoint <= 0x03FFFFFF) { - return 5; - } - else if (codepoint <= 0x7FFFFFFF) { - return 6; - } - - return 0; -} - -size_t myhtml_encoding_codepoint_to_ascii_utf_8(size_t codepoint, char *data) -{ - /* 0x80 -- 10xxxxxx */ - /* 0xC0 -- 110xxxxx */ - /* 0xE0 -- 1110xxxx */ - /* 0xF0 -- 11110xxx */ - /* 0xF8 -- 111110xx */ - /* 0xFC -- 1111110x */ - - if (codepoint <= 0x0000007F) { - /* 0xxxxxxx */ - data[0] = (char)codepoint; - return 1; - } - else if (codepoint <= 0x000007FF) { - /* 110xxxxx 10xxxxxx */ - data[0] = (char)(0xC0 | (codepoint >> 6 )); - data[1] = (char)(0x80 | (codepoint & 0x3F)); - - return 2; - } - else if (codepoint <= 0x0000FFFF) { - /* 1110xxxx 10xxxxxx 10xxxxxx */ - data[0] = (char)(0xE0 | ((codepoint >> 12))); - data[1] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F)); - data[2] = (char)(0x80 | ( codepoint & 0x3F)); - - return 3; - } - else if (codepoint <= 0x001FFFFF) { - /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - data[0] = (char)(0xF0 | ( codepoint >> 18)); - data[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F)); - data[2] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F)); - data[3] = (char)(0x80 | ( codepoint & 0x3F)); - - return 4; - } - /* not uses in unicode */ - // else if (codepoint <= 0x03FFFFFF) { - // /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - // data[0] = 0xF8 | ( codepoint >> 24); - // data[1] = 0x80 | ((codepoint >> 18) & 0x3F); - // data[2] = 0x80 | ((codepoint >> 12) & 0x3F); - // data[3] = 0x80 | ((codepoint >> 6 ) & 0x3F); - // data[4] = 0x80 | ( codepoint & 0x3F); - // - // return 5; - // } - // else if (codepoint <= 0x7FFFFFFF) { - // /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - // data[0] = 0xFC | ( codepoint >> 30); - // data[1] = 0x80 | ((codepoint >> 24) & 0x3F); - // data[2] = 0x80 | ((codepoint >> 18) & 0x3F); - // data[3] = 0x80 | ((codepoint >> 12) & 0x3F); - // data[4] = 0x80 | ((codepoint >> 6 ) & 0x3F); - // data[5] = 0x80 | ( codepoint & 0x3F); - // - // return 6; - // } - - return 0; -} - -size_t myhtml_encoding_codepoint_to_lowercase_ascii_utf_8(size_t codepoint, char *data) -{ - /* 0x80 -- 10xxxxxx */ - /* 0xC0 -- 110xxxxx */ - /* 0xE0 -- 1110xxxx */ - /* 0xF0 -- 11110xxx */ - /* 0xF8 -- 111110xx */ - /* 0xFC -- 1111110x */ - - if (codepoint <= 0x0000007F) { - /* 0xxxxxxx */ - data[0] = (char)myhtml_string_chars_lowercase_map[ codepoint ]; - return 1; - } - else if (codepoint <= 0x000007FF) { - /* 110xxxxx 10xxxxxx */ - data[0] = (char)(0xC0 | (codepoint >> 6 )); - data[1] = (char)(0x80 | (codepoint & 0x3F)); - - return 2; - } - else if (codepoint <= 0x0000FFFF) { - /* 1110xxxx 10xxxxxx 10xxxxxx */ - data[0] = (char)(0xE0 | ((codepoint >> 12))); - data[1] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F)); - data[2] = (char)(0x80 | ( codepoint & 0x3F)); - - return 3; - } - else if (codepoint <= 0x001FFFFF) { - /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - data[0] = (char)(0xF0 | ( codepoint >> 18)); - data[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F)); - data[2] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F)); - data[3] = (char)(0x80 | ( codepoint & 0x3F)); - - return 4; - } - /* not uses in unicode */ - // else if (codepoint <= 0x03FFFFFF) { - // /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - // data[0] = 0xF8 | ( codepoint >> 24); - // data[1] = 0x80 | ((codepoint >> 18) & 0x3F); - // data[2] = 0x80 | ((codepoint >> 12) & 0x3F); - // data[3] = 0x80 | ((codepoint >> 6 ) & 0x3F); - // data[4] = 0x80 | ( codepoint & 0x3F); - // - // return 5; - // } - // else if (codepoint <= 0x7FFFFFFF) { - // /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - // data[0] = 0xFC | ( codepoint >> 30); - // data[1] = 0x80 | ((codepoint >> 24) & 0x3F); - // data[2] = 0x80 | ((codepoint >> 18) & 0x3F); - // data[3] = 0x80 | ((codepoint >> 12) & 0x3F); - // data[4] = 0x80 | ((codepoint >> 6 ) & 0x3F); - // data[5] = 0x80 | ( codepoint & 0x3F); - // - // return 6; - // } - - return 0; -} - -size_t myhtml_encoding_ascii_utf_8_to_codepoint(const unsigned char* data, size_t* codepoint) -{ - if (*data < 0x80){ - /* 0xxxxxxx */ - *codepoint = (size_t)*data; - return 1; - } - else if ((*data & 0xe0) == 0xc0) { - /* 110xxxxx 10xxxxxx */ - *codepoint = (data[0] ^ (0xC0 & data[0])) << 6; - *codepoint |= (data[1] ^ (0x80 & data[1])); - - return 2; - } - else if ((*data & 0xf0) == 0xe0) { - /* 1110xxxx 10xxxxxx 10xxxxxx */ - *codepoint = (data[0] ^ (0xE0 & data[0])) << 12; - *codepoint |= (data[1] ^ (0x80 & data[1])) << 6; - *codepoint |= (data[2] ^ (0x80 & data[2])); - - return 3; - } - else if ((*data & 0xf8) == 0xf0) { - /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - *codepoint = (data[0] ^ (0xF0 & data[0])) << 18; - *codepoint |= (data[1] ^ (0x80 & data[1])) << 12; - *codepoint |= (data[2] ^ (0x80 & data[2])) << 6; - *codepoint |= (data[3] ^ (0x80 & data[3])); - - return 4; - } - - return 0; -} - -size_t myhtml_encoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data) -{ - if((codepoint >> 16)) { - codepoint -= 0x10000; - - size_t high = 0xD800 | (codepoint >> 10); - size_t low = 0xDC00 | (codepoint & 0x3FF); - - data[0] = (char)(high >> 8); - data[1] = (char)high; - data[2] = (char)(low >> 8); - data[3] = (char)low; - - return 4; - } - - data[0] = (char)(codepoint >> 8); - data[1] = (char)codepoint; - - return 2; -} - -size_t myhtml_encoding_convert_to_ascii_utf_8(myhtml_string_raw_t* raw_str, const char* buff, size_t length, myhtml_encoding_t encoding) -{ - if(raw_str->data == NULL) { - raw_str->size = length + 1; - raw_str->length = 0; - raw_str->data = myhtml_malloc(sizeof(char) * raw_str->size); - - if(raw_str->data == NULL) - return 0; - } - - myhtml_encoding_result_t res = {0}; - - unsigned const char* u_buff = (unsigned const char*)buff; - const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding); - - size_t i; - for (i = 0; i < length; i++) - { - if(func(u_buff[i], &res) == MyHTML_ENCODING_STATUS_OK) { - if((raw_str->length + 6) >= raw_str->size) { - size_t new_size = raw_str->length + 6 + (length / 2); - char *new_data = myhtml_realloc(raw_str->data, sizeof(char) * new_size); - - if(new_data == NULL) { - return 0; - } - - raw_str->data = new_data; - raw_str->size = new_size; - } - - raw_str->length += myhtml_encoding_codepoint_to_ascii_utf_8(res.result, &raw_str->data[raw_str->length]); - } - } - - return i; -} - - diff --git a/source/myhtml/encoding.h b/source/myhtml/encoding.h deleted file mode 100644 index e575690..0000000 --- a/source/myhtml/encoding.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_ENCODING_H -#define MyHTML_ENCODING_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "myhtml/myosi.h" -#include "myhtml/utils.h" -#include "myhtml/mystring.h" - -enum myhtml_encoding_status { - MyHTML_ENCODING_STATUS_OK = 0x00, - MyHTML_ENCODING_STATUS_ERROR = 0x01, - MyHTML_ENCODING_STATUS_CONTINUE = 0x02, - MyHTML_ENCODING_STATUS_DONE = 0x04 -} -typedef myhtml_encoding_status_t; - -struct myhtml_encoding_result { - unsigned long first; - unsigned long second; - unsigned long third; - unsigned long result; - unsigned long result_aux; - unsigned long flag; -}; - -struct myhtml_encoding_trigram { - const unsigned char trigram[3]; - size_t value; -}; - -struct myhtml_encoding_trigram_result { - size_t count; - size_t value; -}; - -struct myhtml_encoding_unicode_result { - size_t count_ascii; - size_t count_good; - size_t count_bad; -}; - -struct myhtml_encoding_detect_name_entry { - const char* name; - size_t name_length; - const char* label; - size_t label_length; - - myhtml_encoding_t encoding; - - size_t next; - size_t curr; -}; - -struct myhtml_encoding_detect_attr { - size_t key_begin; - size_t key_length; - size_t value_begin; - size_t value_length; - - myhtml_encoding_detect_attr_t *next; -}; - -struct myhtml_encoding_entry_name_index { - const char *name; - size_t length; -}; - -typedef myhtml_encoding_status_t (*myhtml_encoding_custom_f)(unsigned const char data, myhtml_encoding_result_t *res); - -myhtml_encoding_custom_f myhtml_encoding_get_function_by_id(myhtml_encoding_t idx); - -enum myhtml_encoding_status myhtml_encoding_decode_utf_8(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_ibm866(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_2(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_3(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_4(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_5(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_6(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_7(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_8(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_8_i(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_10(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_13(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_14(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_15(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_8859_16(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_koi8_r(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_koi8_u(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_macintosh(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_874(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_1250(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_1251(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_1252(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_1253(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_1254(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_1255(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_1256(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_1257(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_windows_1258(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_x_mac_cyrillic(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_gbk(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_gb18030(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_big5(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_euc_jp(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_iso_2022_jp(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_shift_jis(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_euc_kr(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_shared_utf_16(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_utf_16be(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_utf_16le(unsigned const char data, myhtml_encoding_result_t *res); -enum myhtml_encoding_status myhtml_encoding_decode_x_user_defined(unsigned const char data, myhtml_encoding_result_t *res); - -size_t myhtml_encoding_codepoint_ascii_length(size_t codepoint); -size_t myhtml_encoding_codepoint_to_ascii_utf_8(size_t codepoint, char *data); -size_t myhtml_encoding_codepoint_to_lowercase_ascii_utf_8(size_t codepoint, char *data); -size_t myhtml_encoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data); -size_t myhtml_encoding_ascii_utf_8_to_codepoint(const unsigned char* data, size_t* codepoint); - -void myhtml_encoding_result_clean(myhtml_encoding_result_t *res); - -bool myhtml_encoding_detect(const char *text, size_t length, myhtml_encoding_t *encoding); -bool myhtml_encoding_detect_russian(const char *text, size_t length, myhtml_encoding_t *encoding); -bool myhtml_encoding_detect_unicode(const char *text, size_t length, myhtml_encoding_t *encoding); -bool myhtml_encoding_detect_bom(const char *text, size_t length, myhtml_encoding_t *encoding); -bool myhtml_encoding_detect_and_cut_bom(const char *text, size_t length, myhtml_encoding_t *encoding, const char **new_text, size_t *new_size); - -size_t myhtml_encoding_convert_to_ascii_utf_8(myhtml_string_raw_t* raw_str, const char* buff, size_t length, myhtml_encoding_t encoding); - -const myhtml_encoding_detect_name_entry_t * myhtml_encoding_name_entry_by_name(const char* name, size_t length); -bool myhtml_encoding_by_name(const char *name, size_t length, myhtml_encoding_t *encoding); -const char * myhtml_encoding_name_by_id(myhtml_encoding_t encoding, size_t *length); - -myhtml_encoding_t myhtml_encoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* encoding_h */ diff --git a/source/myhtml/encoding_detect.c b/source/myhtml/encoding_detect.c deleted file mode 100644 index 4dc0842..0000000 --- a/source/myhtml/encoding_detect.c +++ /dev/null @@ -1,948 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/encoding.h" -#include "myhtml/encoding_detect_resource.h" -#include "myhtml/utils/resources.h" - -myhtml_encoding_trigram_result_t myhtml_encoding_detect_by_trigram(unsigned const char *u_text, size_t length, - const myhtml_encoding_trigram_t *list, size_t list_length, - size_t max_sum_for_break) -{ - myhtml_encoding_trigram_result_t res = {0, 0}; - - for (size_t i = 0; i < (length - 3); i++) { - if(u_text[i] > 127) - { - for (size_t j = 0; j < list_length; j++) - { - if(memcmp(list[j].trigram, &u_text[i], 3) == 0) { - res.value += list[j].value; - res.count++; - - if(res.value >= max_sum_for_break) - i = length; - - break; - } - } - } - } - - return res; -} - -bool myhtml_encoding_detect_russian_has_end(myhtml_encoding_trigram_result_t *res, size_t min_count, size_t min_value) -{ - if(res->value >= min_value || res->count >= min_count) - return true; - - return false; -} - -bool myhtml_encoding_detect_unicode_has_end(myhtml_encoding_unicode_result_t *res, size_t max_bad_percent) -{ - if(res->count_good == 0) { - if(res->count_bad) - return false; - - return true; - } - else if(res->count_bad == 0) - return true; - - size_t percent_bad = (res->count_bad * 100) / res->count_good; - if(percent_bad < max_bad_percent) - return true; - - return false; -} - -myhtml_encoding_unicode_result_t myhtml_encoding_detect_utf_8(unsigned const char *u_text, size_t length) -{ - size_t i = 0; - myhtml_encoding_unicode_result_t res = {0, 0, 0}; - - while(i < length) - { - if((u_text[i] & 0x80) == 0x00) { - i++; - res.count_ascii++; - } - else if((u_text[i] & 0xE0) == 0xC0) { - i += 2; - - if(i >= length) - break; - - if(((u_text[i - 1] & 0x80) && (u_text[i - 1] & 0x40) == 0)) - res.count_good++; - else - res.count_bad++; - } - else if((u_text[i] & 0xF0) == 0xE0) { - i += 3; - - if(i >= length) - break; - - if(((u_text[i - 1] & 0x80) && (u_text[i - 1] & 0x40) == 0) && - ((u_text[i - 2] & 0x80) && (u_text[i - 2] & 0x40) == 0)) { - res.count_good++; - } - else - res.count_bad++; - } - else if((u_text[i] & 0xF8) == 0xF0) { - i += 4; - - if(i >= length) - break; - - if(((u_text[i - 1] & 0x80) && (u_text[i - 1] & 0x40) == 0) && - ((u_text[i - 2] & 0x80) && (u_text[i - 2] & 0x40) == 0) && - ((u_text[i - 3] & 0x80) && (u_text[i - 3] & 0x40) == 0)) { - res.count_good++; - } - else - res.count_bad++; - } - else { - i++; - res.count_bad++; - } - } - - return res; -} - -myhtml_encoding_unicode_result_t myhtml_encoding_detect_utf_16(unsigned const char *u_text, size_t length) -{ - size_t i = 0; - myhtml_encoding_unicode_result_t res = {0, 0, 0}; - - while(i < length) - { - if(u_text[i] == 0x00) { - if((i % 2) == 0) { - i++; - - if(u_text[i] > 0x1F && u_text[i] < 0x7F) - res.count_bad++; - } - else { - if(u_text[(i - 1)] > 0x1F && u_text[(i - 1)] < 0x7F) - res.count_good++; - - i++; - } - } - else - i++; - } - - return res; -} - -bool myhtml_encoding_detect_bom(const char *text, size_t length, myhtml_encoding_t *encoding) -{ - unsigned const char *u_text = (unsigned const char*)text; - - if(length > 2) { - if(u_text[0] == 0xEF && - u_text[1] == 0xBB && - u_text[2] == 0xBF) - { - *encoding = MyHTML_ENCODING_UTF_8; - return true; - } - } - - if(length > 1) { - if(u_text[0] == 0xFE && u_text[1] == 0xFF) { - *encoding = MyHTML_ENCODING_UTF_16BE; - return true; - } - - if(u_text[0] == 0xFF && u_text[1] == 0xFE) { - *encoding = MyHTML_ENCODING_UTF_16LE; - return true; - } - } - -// //for UTF-32 -// if(length > 3) { -// if(u_text[0] == 0x00 && -// u_text[1] == 0x00 && -// u_text[2] == 0xFE && -// u_text[3] == 0xFF) -// { -// *encoding = MyHTML_ENCODING_UTF_32BE; -// return true; -// } -// -// if(u_text[0] == 0xFF && -// u_text[1] == 0xFE && -// u_text[2] == 0x00 && -// u_text[3] == 0x00) -// { -// *encoding = MyHTML_ENCODING_UTF_32LE; -// return true; -// } -// } - - return false; -} - -bool myhtml_encoding_detect_and_cut_bom(const char *text, size_t length, myhtml_encoding_t *encoding, const char **new_text, size_t *new_size) -{ - if(myhtml_encoding_detect_bom(text, length, encoding)) - { - if(*encoding == MyHTML_ENCODING_UTF_8) { - *new_text = &text[3]; - *new_size = length - 3; - } - else { - *new_text = &text[2]; - *new_size = length - 2; - } - - return true; - } - - return false; -} - -bool myhtml_encoding_detect_unicode(const char *text, size_t length, myhtml_encoding_t *encoding) -{ - unsigned const char *u_text = (unsigned const char*)text; - *encoding = MyHTML_ENCODING_DEFAULT; - - myhtml_encoding_unicode_result_t res = myhtml_encoding_detect_utf_16(u_text, length); - - if(res.count_bad == 0 && res.count_good >= 3) { - *encoding = MyHTML_ENCODING_UTF_16LE; - return true; - } - else if(res.count_bad >= 3 && res.count_good == 0) { - *encoding = MyHTML_ENCODING_UTF_16BE; - return true; - } - - res = myhtml_encoding_detect_utf_8(u_text, length); - if(myhtml_encoding_detect_unicode_has_end(&res, 10)) { - *encoding = MyHTML_ENCODING_UTF_8; - return true; - } - - return false; -} - -bool myhtml_encoding_detect_russian(const char *text, size_t length, myhtml_encoding_t *encoding) -{ - unsigned const char *u_text = (unsigned const char*)text; - - size_t min_count = 50; - size_t min_value = 100000; - size_t max_value = 0; - - *encoding = MyHTML_ENCODING_DEFAULT; - - myhtml_encoding_trigram_result_t - res = myhtml_encoding_detect_by_trigram(u_text, length, myhtml_encoding_detect_trigrams_index_windows_1251, 1000, min_value); - if(myhtml_encoding_detect_russian_has_end(&res, min_count, min_value)) { - *encoding = MyHTML_ENCODING_WINDOWS_1251; - return true; - } - - max_value = res.value; - if(max_value) { - *encoding = MyHTML_ENCODING_WINDOWS_1251; - } - - res = myhtml_encoding_detect_by_trigram(u_text, length, myhtml_encoding_detect_trigrams_index_koi8_r, 1000, min_value); - if(myhtml_encoding_detect_russian_has_end(&res, min_count, min_value)) { - *encoding = MyHTML_ENCODING_KOI8_R; - return true; - } - - if(max_value < res.value) { - *encoding = MyHTML_ENCODING_KOI8_R; - max_value = res.value; - } - - res = myhtml_encoding_detect_by_trigram(u_text, length, myhtml_encoding_detect_trigrams_index_iso_8859_5, 1000, min_value); - if(myhtml_encoding_detect_russian_has_end(&res, min_count, min_value)) { - *encoding = MyHTML_ENCODING_ISO_8859_5; - return true; - } - - if(max_value < res.value) { - *encoding = MyHTML_ENCODING_ISO_8859_5; - max_value = res.value; - } - - res = myhtml_encoding_detect_by_trigram(u_text, length, myhtml_encoding_detect_trigrams_index_x_mac_cyrillic, 1000, min_value); - if(myhtml_encoding_detect_russian_has_end(&res, min_count, min_value)) { - *encoding = MyHTML_ENCODING_X_MAC_CYRILLIC; - return true; - } - - if(max_value < res.value) { - *encoding = MyHTML_ENCODING_X_MAC_CYRILLIC; - max_value = res.value; - } - - res = myhtml_encoding_detect_by_trigram(u_text, length, myhtml_encoding_detect_trigrams_index_ibm866, 1000, min_value); - if(myhtml_encoding_detect_russian_has_end(&res, min_count, min_value)) { - *encoding = MyHTML_ENCODING_IBM866; - return true; - } - - if(max_value < res.value) { - *encoding = MyHTML_ENCODING_IBM866; - } - - return false; -} - -bool myhtml_encoding_detect(const char *text, size_t length, myhtml_encoding_t *encoding) -{ - *encoding = MyHTML_ENCODING_DEFAULT; - - if(myhtml_encoding_detect_unicode(text, length, encoding)) - return true; - - if(myhtml_encoding_detect_russian(text, length, encoding)) - return true; - - return false; -} - -const myhtml_encoding_detect_name_entry_t * myhtml_encoding_name_entry_by_name(const char* name, size_t length) -{ - size_t idx = ((myhtml_string_chars_lowercase_map[ (const unsigned char)name[0] ] * - myhtml_string_chars_lowercase_map[ (const unsigned char)name[(length - 1)] ] * - length) - % MyHTML_ENCODING_DETECT_NAME_STATIC_SIZE) + 1; - - while (myhtml_encoding_detect_name_entry_static_list_index[idx].label) - { - if(myhtml_encoding_detect_name_entry_static_list_index[idx].label_length == length) { - if(myhtml_strncasecmp(myhtml_encoding_detect_name_entry_static_list_index[idx].label, name, length) == 0) - return &myhtml_encoding_detect_name_entry_static_list_index[idx]; - - if(myhtml_encoding_detect_name_entry_static_list_index[idx].next) - idx = myhtml_encoding_detect_name_entry_static_list_index[idx].next; - else - return NULL; - } - else if(myhtml_encoding_detect_name_entry_static_list_index[idx].label_length > length) { - return NULL; - } - else { - idx = myhtml_encoding_detect_name_entry_static_list_index[idx].next; - } - } - - return NULL; -} - -bool myhtml_encoding_by_name(const char *name, size_t length, myhtml_encoding_t *encoding) -{ - const myhtml_encoding_detect_name_entry_t *entry = myhtml_encoding_name_entry_by_name(name, length); - - if(entry) { - if(encoding) - *encoding = entry->encoding; - - return true; - } - - return false; -} - -const char * myhtml_encoding_name_by_id(myhtml_encoding_t encoding, size_t *length) -{ - if(encoding >= MyHTML_ENCODING_LAST_ENTRY) { - if(length) { - *length = 0; - } - - return NULL; - } - - const myhtml_encoding_entry_name_index_t *entry = &myhtml_encoding_entry_name_index_static_list_index[encoding]; - - if(length) { - *length = entry->length; - } - - return entry->name; -} - -/* - When an algorithm requires a user agent to prescan a byte stream to determine its encoding, - given some defined end condition, then it must run the following steps. - These steps either abort unsuccessfully or return a character encoding. - If at any point during these steps (including during instances of the get an attribute algorithm invoked by this one) - the user agent either runs out of bytes (meaning the position pointer created in the first step below goes beyond the end of the byte stream obtained so far) - or reaches its end condition, then abort the prescan a byte stream to determine its encoding algorithm unsuccessfully. -*/ - -bool myhtml_encoding_algorithm_extracting_character_encoding_from_meta_element(const char *data, size_t data_size, myhtml_encoding_t *encoding) -{ - *encoding = MyHTML_ENCODING_NOT_DETERMINED; - - /* 1 */ - size_t length = 0; - size_t charset_length = strlen("charset"); - - bool is_get_pos = false; - const unsigned char *udata = (const unsigned char *)data; - - /* 2 */ - while((length + charset_length) < data_size) { - if(myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"charset", &udata[length])) - { - length += charset_length; - - /* 2 */ - while(length < data_size) { - if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C && - udata[length] != 0x0D && udata[length] != 0x20) - { - break; - } - - length++; - } - - /* 4 */ - if(udata[length] == 0x3D) { /* EQUALS SIGN (=) */ - is_get_pos = true; - - length++; - break; - } - } - - length++; - } - - if(is_get_pos == false || length >= data_size) - return false; - - /* 5 */ - while(length < data_size) { - if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C && - udata[length] != 0x0D && udata[length] != 0x20) - { - break; - } - - length++; - } - - if(length >= data_size) - return false; - - /* 6 */ - /* " */ - if(udata[length] == 0x22) - { - length++; - size_t begin = length; - - while(length < data_size) { - if(udata[length] == 0x22) - return myhtml_encoding_by_name(&data[begin], (length - begin), encoding); - - length++; - } - - return false; - } - - /* ' */ - if(udata[length] == 0x27) - { - length++; - size_t begin = length; - - while(length < data_size) { - if(udata[length] == 0x27) - return myhtml_encoding_by_name(&data[begin], (length - begin), encoding); - - length++; - } - - return false; - } - - /* other */ - while(length < data_size) { - if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C && - udata[length] != 0x0D && udata[length] != 0x20) - { - size_t begin = length; - - while(length < data_size) { - /* SEMICOLON character (;) */ - if(udata[length] == 0x3B) { - return myhtml_encoding_by_name(&data[begin], (length - begin), encoding); - } - - length++; - } - - return myhtml_encoding_by_name(&data[begin], (length - begin), encoding); - } - - length++; - } - - return false; -} - -bool myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_spaces(const unsigned char *udata, size_t *data_length, size_t data_size, myhtml_encoding_detect_attr_t *attr) -{ - size_t length = *data_length; - - /* set position */ - attr->key_length = length - attr->key_begin; - - /* 6 */ - while(length < data_size) { - if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C && - udata[length] != 0x0D && udata[length] != 0x20 && udata[length] != 0x2F) - { - break; - } - - length++; - } - - if(length >= data_size) { - *data_length = length; - return false; - } - - /* 7 */ - if(udata[length] != 0x3D) { - *data_length = length; - return false; - } - - /* 8 */ - *data_length = (length + 1); - return true; -} - -size_t myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_value(const unsigned char *udata, size_t length, size_t data_size, myhtml_encoding_detect_attr_t *attr, bool *it_last) -{ - /* 9 */ - while(length < data_size) { - if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C && - udata[length] != 0x0D && udata[length] != 0x20) - { - break; - } - - length++; - } - - if(length >= data_size) { - *it_last = true; - return length; - } - - /* 10 */ - switch (udata[length]) { - case 0x22: /* (ASCII ") */ - length++; - attr->value_begin = length; - - while(length < data_size) { - if(udata[length] == 0x22) - { - attr->value_length = length - attr->value_begin; - return (length + 1); - } - - length++; - } - - break; - - case 0x27: /* (ASCII ') */ - length++; - attr->value_begin = length; - - while(length < data_size) { - if(udata[length] == 0x27) - { - attr->value_length = length - attr->value_begin; - return (length + 1); - } - - length++; - } - - break; - - case 0x3E: /* (ASCII >) */ - *it_last = true; - return (length + 1); - - default: - attr->value_begin = length; - - while(length < data_size) { - if(udata[length] == 0x09 || udata[length] == 0x0A || udata[length] == 0x0C || - udata[length] == 0x0D || udata[length] == 0x20 || udata[length] == 0x3E) - { - attr->value_length = length - attr->value_begin; - return (length + 1); - } - - length++; - } - - break; - } - - attr->value_length = length - attr->value_begin; - return length; -} - -size_t myhtml_encoding_prescan_stream_to_determine_encoding_get_attr(const unsigned char *udata, size_t length, size_t data_size, myhtml_encoding_detect_attr_t *attr, bool *it_last) -{ - memset(attr, 0, sizeof(myhtml_encoding_detect_attr_t)); - - /* - If the byte at position is one of 0x09 (ASCII TAB), 0x0A (ASCII LF), 0x0C (ASCII FF), 0x0D (ASCII CR), - 0x20 (ASCII space), or 0x2F (ASCII /) then advance position to the next byte and redo this step. - */ - /* 1 */ - while(length < data_size) { - if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C && - udata[length] != 0x0D && udata[length] != 0x20 && udata[length] != 0x2F) - { - break; - } - - length++; - } - - if(length >= data_size) { - *it_last = true; - return length; - } - - /* 2 */ - if(udata[length] == 0x3E) { /* (ASCII >) */ - *it_last = true; - return (length + 1); - } - - attr->key_begin = length; - - /* 3, 4 */ - while(length < data_size) { - switch (udata[length]) { - case 0x3D: /* (ASCII =) */ - if(attr->key_begin != (length - 1)) { - attr->key_length = length - attr->key_begin; - - length++; - return myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last); - } - - /* fall through */ - - case 0x09: /* (ASCII TAB) */ - case 0x0A: /* (ASCII LF) */ - case 0x0C: /* (ASCII FF) */ - case 0x0D: /* (ASCII CR) */ - case 0x20: /* (ASCII space) */ - length++; - - if(myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_spaces(udata, &length, data_size, attr) == false) { - *it_last = true; - return length; - } - - return myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last); - - case 0x2F: /* (ASCII /) */ - case 0x3E: /* (ASCII >) */ - *it_last = true; - attr->key_length = length - attr->key_begin; - - return (length + 1); - - default: - break; - } - - length++; - } - - if(myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_spaces(udata, &length, data_size, attr) == false) { - *it_last = true; - return length; - } - - return myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last); -} - -bool myhtml_encoding_prescan_stream_to_determine_encoding_check_meta(const unsigned char *udata, size_t *length, size_t data_size, myhtml_encoding_t *encoding) -{ - myhtml_encoding_detect_attr_t attr; - - bool got_pragma = false; - bool it_last = false; - - unsigned int need_pragma = 0; /* 0 = NULL, 1 = false, 2 = true */ - - /* - http-equiv = 1 - content = 2 - charset = 4 - */ - /* If the attribute's name is already in attribute list, then return to the step labeled attributes. */ - size_t is_exists = 0; - - while(*length < data_size) { - *length = myhtml_encoding_prescan_stream_to_determine_encoding_get_attr(udata, *length, data_size, &attr, &it_last); - - /* 9 */ - if(attr.key_length == strlen("http-equiv") && - myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"http-equiv", &udata[ attr.key_begin ])) - { - if((is_exists & 1) == 0) { - is_exists |= 1; - - if(attr.value_length == strlen("content-type") && - myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"content-type", &udata[ attr.value_begin ])) - { - got_pragma = true; - } - } - } - else if(attr.key_length == strlen("content") && - myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"content", &udata[ attr.key_begin ])) - { - if((is_exists & 2) == 0) { - is_exists |= 2; - - if(myhtml_encoding_algorithm_extracting_character_encoding_from_meta_element((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding)) { - need_pragma = 2; - } - } - } - else if(attr.key_length == strlen("charset") && - myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"charset", &udata[ attr.key_begin ])) - { - if((is_exists & 4) == 0) { - is_exists |= 4; - - myhtml_encoding_by_name((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding); - need_pragma = 1; - } - } - - if(it_last) - break; - } - - /* 11, 12, 13 */ - if(need_pragma == 0 || (need_pragma == 2 && got_pragma == false)) { - *encoding = MyHTML_ENCODING_NOT_DETERMINED; - return false; - } - - /* 14 */ - if(*encoding == MyHTML_ENCODING_UTF_16BE || *encoding == MyHTML_ENCODING_UTF_16LE) { - *encoding = MyHTML_ENCODING_UTF_8; - } - - /* 15 */ - if(*encoding == MyHTML_ENCODING_X_USER_DEFINED) { - *encoding = MyHTML_ENCODING_WINDOWS_1252; - } - - /* 16 */ - return true; -} - -size_t myhtml_encoding_prescan_stream_to_determine_encoding_skip_name(const unsigned char *udata, size_t length, size_t data_size) -{ - while(length < data_size) { - if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C && - udata[length] != 0x0D && udata[length] != 0x20) - { - break; - } - - length++; - } - - if(length >= data_size) - return length; - - if(udata[length] == 0x3E) { - return (length + 1); - } - - myhtml_encoding_detect_attr_t attr; - bool it_last = false; - - while(length < data_size) { - length = myhtml_encoding_prescan_stream_to_determine_encoding_get_attr(udata, length, data_size, &attr, &it_last); - - if(it_last) { - return length; - } - } - - return length; -} - -size_t myhtml_encoding_prescan_stream_to_determine_encoding_skip_other(const unsigned char *udata, size_t length, size_t data_size) -{ - if(udata[length] == 0x2F) { /* / */ - length++; - - if(length >= data_size) - return length; - - if(myhtml_tokenizer_chars_map[ udata[length] ] == MyHTML_TOKENIZER_CHAR_A_Z_a_z) { - return myhtml_encoding_prescan_stream_to_determine_encoding_skip_name(udata, length, data_size); - } - - while(length < data_size) { - if(udata[length] != 0x3E) { - return (length + 1); - } - - length++; - } - - return length; - } - else if(udata[length] == 0x21) { /* ! */ - length++; - - if((length + 2) < data_size && udata[length] == 0x2D && udata[(length+1)] == 0x2D) { - while(length < data_size) { - if(udata[length] != 0x3E) { - if(udata[(length - 1)] == 0x2D && udata[(length - 2)] == 0x2D) - return (length + 1); - - length++; - } - } - - return length; - } - - while(length < data_size) { - if(udata[length] != 0x3E) { - return (length + 1); - } - - length++; - } - - return length; - } - else if(udata[length] == 0x3F) { /* ? */ - length++; - - while(length < data_size) { - if(udata[length] != 0x3E) { - return (length + 1); - } - - length++; - } - - return length; - } - - - return myhtml_encoding_prescan_stream_to_determine_encoding_skip_name(udata, length, data_size); -} - -myhtml_encoding_t myhtml_encoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size) -{ - const unsigned char* udata = (const unsigned char*)data; - myhtml_encoding_t encoding = MyHTML_ENCODING_NOT_DETERMINED; - - size_t i = 0; - while(i < data_size) { - /* 0x3C = '<' */ - if(data[i] == 0x3C) - { - if((i + 5) >= data_size) - return encoding; - - i++; - - switch (data[i]) { - /* - A sequence of bytes starting with: - 0x3C, 0x4D or 0x6D, 0x45 or 0x65, 0x54 or 0x74, 0x41 or 0x61, - and one of 0x09, 0x0A, 0x0C, 0x0D, 0x20, 0x2F - (case-insensitive ASCII 'size = html_size; - inc_buf->length = 0; - inc_buf->data = html; - - if(current) { - inc_buf->offset = current->offset + current->size; - current->next = inc_buf; - } - else { - inc_buf->offset = 0; - } - - inc_buf->prev = current; - inc_buf->next = NULL; - - return inc_buf; -} - -void myhtml_incoming_buffer_clean(myhtml_incoming_buffer_t *current) -{ - memset(current, 0, sizeof(myhtml_incoming_buffer_t)); -} - -myhtml_incoming_buffer_t * myhtml_incoming_buffer_split(myhtml_incoming_buffer_t *current, mcobject_t *mcobject, size_t global_pos) -{ - size_t relative_pos = global_pos - current->offset; - myhtml_incoming_buffer_t *inc_buf = mcobject_malloc(mcobject, NULL); - - inc_buf->size = current->size - relative_pos; - inc_buf->length = inc_buf->size; - inc_buf->data = ¤t->data[relative_pos]; - inc_buf->offset = current->offset + relative_pos; - inc_buf->next = NULL; - inc_buf->prev = current; - - current->next = inc_buf; - current->size = relative_pos; - current->length = relative_pos; - - return inc_buf; -} - -myhtml_incoming_buffer_t * myhtml_incoming_buffer_find_by_position(myhtml_incoming_buffer_t *inc_buf, size_t begin) -{ - if(inc_buf->offset < begin) { - while(inc_buf && (inc_buf->offset + inc_buf->size) < begin) - inc_buf = inc_buf->next; - } - else { - while(inc_buf && inc_buf->offset > begin) - inc_buf = inc_buf->prev; - } - - return inc_buf; -} - -const char * myhtml_incoming_buffer_data(myhtml_incoming_buffer_t *inc_buf) -{ - return inc_buf->data; -} - -size_t myhtml_incoming_buffer_length(myhtml_incoming_buffer_t *inc_buf) -{ - return inc_buf->length; -} - -size_t myhtml_incoming_buffer_size(myhtml_incoming_buffer_t *inc_buf) -{ - return inc_buf->size; -} - -size_t myhtml_incoming_buffer_offset(myhtml_incoming_buffer_t *inc_buf) -{ - return inc_buf->offset; -} - -size_t myhtml_incoming_buffer_relative_begin(myhtml_incoming_buffer_t *inc_buf, size_t begin) -{ - return (begin - inc_buf->offset); -} - -size_t myhtml_incoming_buffer_available_length(myhtml_incoming_buffer_t *inc_buf, size_t relative_begin, size_t length) -{ - if((relative_begin + length) > inc_buf->size) - return (inc_buf->size - relative_begin); - - return length; -} - -myhtml_incoming_buffer_t * myhtml_incoming_buffer_next(myhtml_incoming_buffer_t *inc_buf) -{ - return inc_buf->next; -} - -myhtml_incoming_buffer_t * myhtml_incoming_buffer_prev(myhtml_incoming_buffer_t *inc_buf) -{ - return inc_buf->prev; -} - -// // // -// convert only one 002345 (\002345) to code point -// -size_t myhtml_incoming_buffer_convert_one_escaped_to_code_point(myhtml_incoming_buffer_t **inc_buf, size_t *relative_pos) -{ - const unsigned char *u_data; - myhtml_incoming_buffer_t *current = *inc_buf; - - if(*relative_pos >= current->size) { - *relative_pos = 0; - current = current->next; - } - - u_data = (const unsigned char*)current->data; - - unsigned int consume = 0; - size_t code_point = 0; - - while(current) - { - if(myhtml_string_chars_num_map[ u_data[*relative_pos] ] != 0xff && consume < 6) { - code_point <<= 4; - code_point |= myhtml_string_chars_hex_map[ u_data[*relative_pos] ]; - - ++consume; - } - else - break; - - *relative_pos += 1; - - if(*relative_pos >= current->size) - { - if(current->next == NULL) - break; - - *relative_pos = 0; - - u_data = (const unsigned char*)current->data; - current = current->next; - } - } - - *inc_buf = current; - - return code_point; -} - -size_t myhtml_incoming_buffer_escaped_case_cmp(myhtml_incoming_buffer_t **inc_buf, const char *to, size_t to_size, size_t *relative_pos) -{ - myhtml_incoming_buffer_t *current = *inc_buf; - - if(*relative_pos >= current->size) { - if(current->next == 0) - return to_size; - - *relative_pos = 0; - current = current->next; - } - - const unsigned char *u_to = (const unsigned char*)to; - const unsigned char *u_data = (const unsigned char*)current->data; - - size_t i = 0; - - while(i < to_size) - { - if(u_data[*relative_pos] == 0x5C) { - *relative_pos += 1; - - size_t code_point = myhtml_incoming_buffer_convert_one_escaped_to_code_point(¤t, relative_pos); - - if(code_point > 255 || myhtml_string_chars_lowercase_map[code_point] != myhtml_string_chars_lowercase_map[ u_to[i] ]) { - break; - } - - u_data = (const unsigned char*)current->data; - } - else if(myhtml_string_chars_lowercase_map[ u_data[*relative_pos] ] != myhtml_string_chars_lowercase_map[ u_to[i] ]) { - break; - } - else { - ++(*relative_pos); - } - - i++; - - if(*relative_pos >= current->size) - { - if(current->next == NULL) - break; - - current = current->next; - u_data = (const unsigned char*)current->data; - *relative_pos = 0; - } - } - - *inc_buf = current; - return (to_size - i); -} - - - - diff --git a/source/myhtml/incoming.h b/source/myhtml/incoming.h deleted file mode 100644 index 6e35389..0000000 --- a/source/myhtml/incoming.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright (C) 2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_INCOMING_H -#define MyHTML_INCOMING_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "myhtml/myosi.h" -#include "myhtml/tree.h" -#include "myhtml/utils/mcobject.h" - -struct myhtml_incoming_buffer { - const char* data; - size_t length; // use of data - size_t size; // size of data - size_t offset; // begin global offset - - struct myhtml_incoming_buffer* prev; - struct myhtml_incoming_buffer* next; -}; - - -// incoming buffer -myhtml_incoming_buffer_t * myhtml_incoming_buffer_add(myhtml_incoming_buffer_t *current, mcobject_t *mcobject, const char *html, size_t html_size); -void myhtml_incoming_buffer_clean(myhtml_incoming_buffer_t *current); -myhtml_incoming_buffer_t * myhtml_incoming_buffer_split(myhtml_incoming_buffer_t *current, mcobject_t *mcobject, size_t global_pos); - -myhtml_incoming_buffer_t * myhtml_incoming_buffer_find_by_position(myhtml_incoming_buffer_t *inc_buf, size_t begin); - -const char * myhtml_incoming_buffer_data(myhtml_incoming_buffer_t *inc_buf); -size_t myhtml_incoming_buffer_length(myhtml_incoming_buffer_t *inc_buf); -size_t myhtml_incoming_buffer_size(myhtml_incoming_buffer_t *inc_buf); -size_t myhtml_incoming_buffer_offset(myhtml_incoming_buffer_t *inc_buf); -size_t myhtml_incoming_buffer_relative_begin(myhtml_incoming_buffer_t *inc_buf, size_t begin); -size_t myhtml_incoming_buffer_available_length(myhtml_incoming_buffer_t *inc_buf, size_t relative_begin, size_t length); - -myhtml_incoming_buffer_t * myhtml_incoming_buffer_next(myhtml_incoming_buffer_t *inc_buf); -myhtml_incoming_buffer_t * myhtml_incoming_buffer_prev(myhtml_incoming_buffer_t *inc_buf); - -size_t myhtml_incoming_buffer_convert_one_escaped_to_code_point(myhtml_incoming_buffer_t **inc_buf, size_t *relative_pos); -size_t myhtml_incoming_buffer_escaped_case_cmp(myhtml_incoming_buffer_t **inc_buf, const char *to, size_t to_size, size_t *relative_pos); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* MyHTML_INCOMING_H */ diff --git a/source/myhtml/myhtml.c b/source/myhtml/myhtml.c index 767603e..479f33e 100644 --- a/source/myhtml/myhtml.c +++ b/source/myhtml/myhtml.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -22,7 +22,7 @@ void myhtml_init_marker(myhtml_t* myhtml) { - myhtml->marker = (myhtml_tree_node_t*)myhtml_malloc(sizeof(myhtml_tree_node_t)); + myhtml->marker = (myhtml_tree_node_t*)mycore_malloc(sizeof(myhtml_tree_node_t)); if(myhtml->marker) myhtml_tree_node_clean(myhtml->marker); @@ -31,92 +31,152 @@ void myhtml_init_marker(myhtml_t* myhtml) void myhtml_destroy_marker(myhtml_t* myhtml) { if(myhtml->marker) - myhtml_free(myhtml->marker); + mycore_free(myhtml->marker); } -myhtml_t * myhtml_create(void) +#ifndef MyCORE_BUILD_WITHOUT_THREADS +mystatus_t myhtml_stream_create(myhtml_t* myhtml, mystatus_t* status, size_t count, size_t id_increase) { - return (myhtml_t*)myhtml_malloc(sizeof(myhtml_t)); + if(count == 0) { + myhtml->thread_stream = NULL; + + *status = MyHTML_STATUS_OK; + return *status; + } + + myhtml->thread_stream = mythread_create(); + if(myhtml->thread_stream == NULL) + *status = MyCORE_STATUS_THREAD_ERROR_MEMORY_ALLOCATION; + + *status = mythread_init(myhtml->thread_stream, MyTHREAD_TYPE_STREAM, count, id_increase); + + if(*status) + myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, NULL, NULL, true); + + return *status; } -myhtml_status_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_count, size_t queue_size) +mystatus_t myhtml_batch_create(myhtml_t* myhtml, mystatus_t* status, size_t count, size_t id_increase) { - myhtml_status_t status; + if(count == 0) { + myhtml->thread_batch = NULL; + + *status = MyHTML_STATUS_OK; + return *status; + } - myhtml_init_marker(myhtml); + myhtml->thread_batch = mythread_create(); + if(myhtml->thread_stream == NULL) { + myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, NULL, NULL, true); + *status = MyCORE_STATUS_THREAD_ERROR_MEMORY_ALLOCATION; + } - status = myhtml_tokenizer_state_init(myhtml); - if(status) { - myhtml->insertion_func = NULL; - myhtml->thread = NULL; - + *status = mythread_init(myhtml->thread_batch, MyTHREAD_TYPE_BATCH, count, id_increase); + + if(*status) + myhtml->thread_batch = mythread_destroy(myhtml->thread_batch , NULL, NULL, true); + + return *status; +} + +mystatus_t myhtml_create_stream_and_batch(myhtml_t* myhtml, size_t stream_count, size_t batch_count) +{ + mystatus_t status; + + /* stream */ + if(myhtml_stream_create(myhtml, &status, stream_count, 0)) { return status; } - status = myhtml_rules_init(myhtml); - if(status) { - myhtml->thread = NULL; - + /* batch */ + if(myhtml_batch_create(myhtml, &status, batch_count, stream_count)) { + myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, NULL, NULL, true); return status; } + return status; +} +#endif /* if undef MyCORE_BUILD_WITHOUT_THREADS */ + +myhtml_t * myhtml_create(void) +{ + return (myhtml_t*)mycore_calloc(1, sizeof(myhtml_t)); +} + +mystatus_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_count, size_t queue_size) +{ + mystatus_t status; + myhtml->opt = opt; - myhtml->thread = mythread_create(); + myhtml_init_marker(myhtml); - if(myhtml->thread == NULL) - return MyHTML_STATUS_THREAD_ERROR_MEMORY_ALLOCATION; + status = myhtml_tokenizer_state_init(myhtml); + if(status) + return status; -#ifdef MyHTML_BUILD_WITHOUT_THREADS + status = myhtml_rules_init(myhtml); + +#ifdef MyCORE_BUILD_WITHOUT_THREADS - status = mythread_init(myhtml->thread, NULL, thread_count); + myhtml->thread_stream = NULL; + myhtml->thread_batch = NULL; + myhtml->thread_total = 0; +#else /* if undef MyCORE_BUILD_WITHOUT_THREADS */ if(status) return status; -#else /* MyHTML_BUILD_WITHOUT_THREADS */ switch (opt) { case MyHTML_OPTIONS_PARSE_MODE_SINGLE: - status = mythread_init(myhtml->thread, "lastmac", 0); - if(status) - return status; - - myhtml->thread->context = mythread_queue_list_create(&status); - if(status) + if((status = myhtml_create_stream_and_batch(myhtml, 0, 0))) return status; break; case MyHTML_OPTIONS_PARSE_MODE_ALL_IN_ONE: - status = mythread_init(myhtml->thread, "lastmac", 1); - if(status) + if((status = myhtml_create_stream_and_batch(myhtml, 1, 0))) return status; - myhtml->thread->context = mythread_queue_list_create(&status); - if(status) - return status; + myhtml->thread_stream->context = mythread_queue_list_create(&status); + status = myhread_entry_create(myhtml->thread_stream, mythread_function_queue_stream, myhtml_parser_worker_stream, MyTHREAD_OPT_STOP); - myhread_create_stream(myhtml->thread, mythread_function_queue_stream, myhtml_parser_worker_stream, MyTHREAD_OPT_STOP, &status); break; default: // default MyHTML_OPTIONS_PARSE_MODE_SEPARATELY - if(thread_count == 0) - thread_count = 1; + if(thread_count < 2) + thread_count = 2; - status = mythread_init(myhtml->thread, "lastmac", (thread_count + 1)); - if(status) + if((status = myhtml_create_stream_and_batch(myhtml, 1, (thread_count - 1)))) return status; - myhtml->thread->context = mythread_queue_list_create(&status); + myhtml->thread_stream->context = mythread_queue_list_create(&status); + myhtml->thread_batch->context = myhtml->thread_stream->context; + + status = myhread_entry_create(myhtml->thread_stream, mythread_function_queue_stream, myhtml_parser_stream, MyTHREAD_OPT_STOP); if(status) return status; - myhread_create_stream(myhtml->thread, mythread_function_queue_stream, myhtml_parser_stream, MyTHREAD_OPT_STOP, &status); - myhread_create_batch(myhtml->thread, mythread_function_queue_batch, myhtml_parser_worker, MyTHREAD_OPT_STOP, &status, thread_count); + for(size_t i = 0; i < myhtml->thread_batch->entries_size; i++) { + status = myhread_entry_create(myhtml->thread_batch, mythread_function_queue_batch, myhtml_parser_worker, MyTHREAD_OPT_STOP); + + if(status) + return status; + } + break; } -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ + myhtml->thread_total = thread_count; + + myhtml->thread_list[0] = myhtml->thread_stream; + myhtml->thread_list[1] = myhtml->thread_batch; + myhtml->thread_list[2] = NULL; + +#endif /* if undef MyCORE_BUILD_WITHOUT_THREADS */ + + if(status) + return status; myhtml_clean(myhtml); @@ -125,7 +185,7 @@ myhtml_status_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t th void myhtml_clean(myhtml_t* myhtml) { - mythread_clean(myhtml->thread); + /* some code */ } myhtml_t* myhtml_destroy(myhtml_t* myhtml) @@ -135,36 +195,41 @@ myhtml_t* myhtml_destroy(myhtml_t* myhtml) myhtml_destroy_marker(myhtml); - if(myhtml->thread) { -#ifndef MyHTML_BUILD_WITHOUT_THREADS - mythread_queue_list_t* queue_list = myhtml->thread->context; -#endif +#ifndef MyCORE_BUILD_WITHOUT_THREADS + if(myhtml->thread_stream) { + mythread_queue_list_t* queue_list = myhtml->thread_stream->context; + + if(queue_list) + mythread_queue_list_wait_for_done(myhtml->thread_stream, queue_list); - myhtml->thread = mythread_destroy(myhtml->thread, mythread_queue_wait_all_for_done, true); + myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, mythread_callback_quit, NULL, true); -#ifndef MyHTML_BUILD_WITHOUT_THREADS - mythread_queue_list_destroy(queue_list); -#endif + if(myhtml->thread_batch) + myhtml->thread_batch = mythread_destroy(myhtml->thread_batch, mythread_callback_quit, NULL, true); + + if(queue_list) + mythread_queue_list_destroy(queue_list); } +#endif /* if undef MyCORE_BUILD_WITHOUT_THREADS */ myhtml_tokenizer_state_destroy(myhtml); if(myhtml->insertion_func) - myhtml_free(myhtml->insertion_func); + mycore_free(myhtml->insertion_func); - myhtml_free(myhtml); + mycore_free(myhtml); return NULL; } -myhtml_status_t myhtml_parse(myhtml_tree_t* tree, myhtml_encoding_t encoding, const char* html, size_t html_size) +mystatus_t myhtml_parse(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size) { if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); } myhtml_encoding_set(tree, encoding); - myhtml_status_t status = myhtml_tokenizer_begin(tree, html, html_size); + mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size); if(status) return status; @@ -172,7 +237,7 @@ myhtml_status_t myhtml_parse(myhtml_tree_t* tree, myhtml_encoding_t encoding, co return myhtml_tokenizer_end(tree); } -myhtml_status_t myhtml_parse_fragment(myhtml_tree_t* tree, myhtml_encoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns) +mystatus_t myhtml_parse_fragment(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns) { if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); @@ -188,7 +253,7 @@ myhtml_status_t myhtml_parse_fragment(myhtml_tree_t* tree, myhtml_encoding_t enc return MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT; myhtml_encoding_set(tree, encoding); - myhtml_status_t status = myhtml_tokenizer_begin(tree, html, html_size); + mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size); if(status) return status; @@ -196,7 +261,7 @@ myhtml_status_t myhtml_parse_fragment(myhtml_tree_t* tree, myhtml_encoding_t enc return myhtml_tokenizer_end(tree); } -myhtml_status_t myhtml_parse_single(myhtml_tree_t* tree, myhtml_encoding_t encoding, const char* html, size_t html_size) +mystatus_t myhtml_parse_single(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size) { if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); @@ -206,7 +271,7 @@ myhtml_status_t myhtml_parse_single(myhtml_tree_t* tree, myhtml_encoding_t encod myhtml_encoding_set(tree, encoding); - myhtml_status_t status = myhtml_tokenizer_begin(tree, html, html_size); + mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size); if(status) return status; @@ -214,7 +279,7 @@ myhtml_status_t myhtml_parse_single(myhtml_tree_t* tree, myhtml_encoding_t encod return myhtml_tokenizer_end(tree); } -myhtml_status_t myhtml_parse_fragment_single(myhtml_tree_t* tree, myhtml_encoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns) +mystatus_t myhtml_parse_fragment_single(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns) { if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); @@ -233,7 +298,7 @@ myhtml_status_t myhtml_parse_fragment_single(myhtml_tree_t* tree, myhtml_encodin myhtml_encoding_set(tree, encoding); - myhtml_status_t status = myhtml_tokenizer_begin(tree, html, html_size); + mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size); if(status) return status; @@ -241,7 +306,7 @@ myhtml_status_t myhtml_parse_fragment_single(myhtml_tree_t* tree, myhtml_encodin return myhtml_tokenizer_end(tree); } -myhtml_status_t myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t html_size) +mystatus_t myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t html_size) { if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); @@ -250,7 +315,7 @@ myhtml_status_t myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t return myhtml_tokenizer_chunk(tree, html, html_size); } -myhtml_status_t myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns) +mystatus_t myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns) { if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); @@ -268,7 +333,7 @@ myhtml_status_t myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* htm return myhtml_tokenizer_chunk(tree, html, html_size); } -myhtml_status_t myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, size_t html_size) +mystatus_t myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, size_t html_size) { if((tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0) tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE; @@ -276,7 +341,7 @@ myhtml_status_t myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, return myhtml_parse_chunk(tree, html, html_size); } -myhtml_status_t myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns) +mystatus_t myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns) { if((tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0) tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE; @@ -284,21 +349,21 @@ myhtml_status_t myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const ch return myhtml_parse_chunk_fragment(tree, html, html_size, tag_id, ns); } -myhtml_status_t myhtml_parse_chunk_end(myhtml_tree_t* tree) +mystatus_t myhtml_parse_chunk_end(myhtml_tree_t* tree) { return myhtml_tokenizer_end(tree); } -void myhtml_encoding_set(myhtml_tree_t* tree, myhtml_encoding_t encoding) +void myhtml_encoding_set(myhtml_tree_t* tree, myencoding_t encoding) { - if(encoding >= MyHTML_ENCODING_LAST_ENTRY) + if(encoding >= MyENCODING_LAST_ENTRY) return; tree->encoding_usereq = encoding; tree->encoding = encoding; } -myhtml_encoding_t myhtml_encoding_get(myhtml_tree_t* tree) +myencoding_t myhtml_encoding_get(myhtml_tree_t* tree) { return tree->encoding; } @@ -307,7 +372,7 @@ myhtml_encoding_t myhtml_encoding_get(myhtml_tree_t* tree) * Nodes */ -myhtml_status_t myhtml_get_nodes_by_tag_id_in_scope_find_recursion(myhtml_tree_node_t *node, myhtml_collection_t *collection, myhtml_tag_id_t tag_id) +mystatus_t myhtml_get_nodes_by_tag_id_in_scope_find_recursion(myhtml_tree_node_t *node, myhtml_collection_t *collection, myhtml_tag_id_t tag_id) { while(node) { if(node->tag_id == tag_id) { @@ -316,7 +381,7 @@ myhtml_status_t myhtml_get_nodes_by_tag_id_in_scope_find_recursion(myhtml_tree_n if(collection->length >= collection->size) { - myhtml_status_t mystatus = myhtml_collection_check_size(collection, 1024, 0); + mystatus_t mystatus = myhtml_collection_check_size(collection, 1024, 0); if(mystatus != MyHTML_STATUS_OK) return mystatus; @@ -332,12 +397,12 @@ myhtml_status_t myhtml_get_nodes_by_tag_id_in_scope_find_recursion(myhtml_tree_n return MyHTML_STATUS_OK; } -myhtml_collection_t * myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, myhtml_tag_id_t tag_id, myhtml_status_t *status) +myhtml_collection_t * myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, myhtml_tag_id_t tag_id, mystatus_t *status) { if(node == NULL) return NULL; - myhtml_status_t mystatus = MyHTML_STATUS_OK; + mystatus_t mystatus = MyHTML_STATUS_OK; if(collection == NULL) { collection = myhtml_collection_create(1024, &mystatus); @@ -361,13 +426,13 @@ myhtml_collection_t * myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, m return collection; } -myhtml_collection_t * myhtml_get_nodes_by_name_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, const char* html, size_t length, myhtml_status_t *status) +myhtml_collection_t * myhtml_get_nodes_by_name_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, const char* html, size_t length, mystatus_t *status) { const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_name(tree->tags, html, length); return myhtml_get_nodes_by_tag_id_in_scope(tree, collection, node, tag_ctx->id, status); } -myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tag_id_t tag_id, myhtml_status_t *status) +myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tag_id_t tag_id, mystatus_t *status) { if(collection == NULL) { collection = myhtml_collection_create(1024, NULL); @@ -417,7 +482,7 @@ myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_col return collection; } -myhtml_collection_t * myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection, const char* html, size_t length, myhtml_status_t *status) +myhtml_collection_t * myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection, const char* html, size_t length, mystatus_t *status) { const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_name(tree->tags, html, length); @@ -546,12 +611,12 @@ myhtml_tree_node_t * myhtml_node_insert_to_appropriate_place(myhtml_tree_node_t return node; } -myhtml_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding) +mycore_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length, myencoding_t encoding) { if(node == NULL) return NULL; - if(encoding >= MyHTML_ENCODING_LAST_ENTRY) + if(encoding >= MyENCODING_LAST_ENTRY) return NULL; myhtml_tree_t* tree = node->tree; @@ -564,22 +629,22 @@ myhtml_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* tex } if(node->token->str.data == NULL) { - myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2)); + mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2)); } else { if(node->token->str.size < length) { mchar_async_free(tree->mchar, node->token->str.node_idx, node->token->str.data); - myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length); + mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length); } else node->token->str.length = 0; } - if(encoding != MyHTML_ENCODING_UTF_8) { - myhtml_string_append_with_convert_encoding(&node->token->str, text, length, encoding); + if(encoding != MyENCODING_UTF_8) { + myencoding_string_append(&node->token->str, text, length, encoding); } else { - myhtml_string_append(&node->token->str, text, length); + mycore_string_append(&node->token->str, text, length); } node->token->raw_begin = 0; @@ -588,12 +653,12 @@ myhtml_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* tex return &node->token->str; } -myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding) +mycore_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, myencoding_t encoding) { if(node == NULL) return NULL; - if(encoding >= MyHTML_ENCODING_LAST_ENTRY) + if(encoding >= MyENCODING_LAST_ENTRY) return NULL; myhtml_tree_t* tree = node->tree; @@ -606,12 +671,12 @@ myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, con } if(node->token->str.data == NULL) { - myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2)); + mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2)); } else { if(node->token->str.size < length) { mchar_async_free(tree->mchar, node->token->str.node_idx, node->token->str.data); - myhtml_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length); + mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, length); } else node->token->str.length = 0; @@ -621,7 +686,7 @@ myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, con myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = encoding; - myhtml_encoding_result_clean(&proc_entry.res); + myencoding_result_clean(&proc_entry.res); myhtml_data_process(&proc_entry, &node->token->str, text, length); myhtml_data_process_end(&proc_entry, &node->token->str); @@ -724,7 +789,7 @@ const char * myhtml_node_text(myhtml_tree_node_t *node, size_t *length) return NULL; } -myhtml_string_t * myhtml_node_string(myhtml_tree_node_t *node) +mycore_string_t * myhtml_node_string(myhtml_tree_node_t *node) { if(node && node->token) return &node->token->str; @@ -763,7 +828,7 @@ myhtml_tree_t * myhtml_node_tree(myhtml_tree_node_t *node) return node->tree; } -myhtml_status_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_node_t* node, myhtml_collection_t* collection, const char* key, size_t key_len) +mystatus_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_node_t* node, myhtml_collection_t* collection, const char* key, size_t key_len) { while(node) { @@ -771,14 +836,14 @@ myhtml_status_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_node_t* myhtml_tree_attr_t* attr = node->token->attr_first; while(attr) { - myhtml_string_t* str_key = &attr->key; + mycore_string_t* str_key = &attr->key; - if(str_key->length == key_len && myhtml_strncasecmp(str_key->data, key, key_len) == 0) { + if(str_key->length == key_len && mycore_strncasecmp(str_key->data, key, key_len) == 0) { collection->list[ collection->length ] = node; collection->length++; if(collection->length >= collection->size) { - myhtml_status_t status = myhtml_collection_check_size(collection, 1024, 0); + mystatus_t status = myhtml_collection_check_size(collection, 1024, 0); if(status) return status; @@ -790,7 +855,7 @@ myhtml_status_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_node_t* } if(node->child) { - myhtml_status_t status = myhtml_get_nodes_by_attribute_key_recursion(node->child, collection, key, key_len); + mystatus_t status = myhtml_get_nodes_by_attribute_key_recursion(node->child, collection, key, key_len); if(status) return status; @@ -802,7 +867,7 @@ myhtml_status_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_node_t* return MyHTML_STATUS_OK; } -myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* scope_node, const char* key, size_t key_len, myhtml_status_t* status) +myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* scope_node, const char* key, size_t key_len, mystatus_t* status) { if(collection == NULL) { collection = myhtml_collection_create(1024, status); @@ -814,7 +879,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myh if(scope_node == NULL) scope_node = tree->node_html; - myhtml_status_t rec_status = myhtml_get_nodes_by_attribute_key_recursion(scope_node, collection, key, key_len); + mystatus_t rec_status = myhtml_get_nodes_by_attribute_key_recursion(scope_node, collection, key, key_len); if(rec_status && status) *status = rec_status; @@ -823,28 +888,28 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myh } /* find by attribute value; case-sensitivity */ -bool myhtml_get_nodes_by_attribute_value_recursion_eq(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_eq(mycore_string_t* str, const char* value, size_t value_len) { - return str->length == value_len && myhtml_strncmp(str->data, value, value_len) == 0; + return str->length == value_len && mycore_strncmp(str->data, value, value_len) == 0; } -bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated(mycore_string_t* str, const char* value, size_t value_len) { if(str->length < value_len) return false; const char *data = str->data; - if(myhtml_strncmp(data, value, value_len) == 0) { - if((str->length > value_len && myhtml_utils_whithspace(data[value_len], ==, ||)) || str->length == value_len) + if(mycore_strncmp(data, value, value_len) == 0) { + if((str->length > value_len && mycore_utils_whithspace(data[value_len], ==, ||)) || str->length == value_len) return true; } for(size_t i = 1; (str->length - i) >= value_len; i++) { - if(myhtml_utils_whithspace(data[(i - 1)], ==, ||)) { - if(myhtml_strncmp(&data[i], value, value_len) == 0) { - if((i > value_len && myhtml_utils_whithspace(data[(i + value_len)], ==, ||)) || (str->length - i) == value_len) + if(mycore_utils_whithspace(data[(i - 1)], ==, ||)) { + if(mycore_strncmp(&data[i], value, value_len) == 0) { + if((i > value_len && mycore_utils_whithspace(data[(i + value_len)], ==, ||)) || (str->length - i) == value_len) return true; } } @@ -853,23 +918,23 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated(myhtml_s return false; } -bool myhtml_get_nodes_by_attribute_value_recursion_begin(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_begin(mycore_string_t* str, const char* value, size_t value_len) { if(str->length < value_len) return false; - return myhtml_strncmp(str->data, value, value_len) == 0; + return mycore_strncmp(str->data, value, value_len) == 0; } -bool myhtml_get_nodes_by_attribute_value_recursion_end(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_end(mycore_string_t* str, const char* value, size_t value_len) { if(str->length < value_len) return false; - return myhtml_strncmp(&str->data[ (str->length - (str->length - value_len)) ], value, value_len) == 0; + return mycore_strncmp(&str->data[ (str->length - (str->length - value_len)) ], value, value_len) == 0; } -bool myhtml_get_nodes_by_attribute_value_recursion_contain(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_contain(mycore_string_t* str, const char* value, size_t value_len) { if(str->length < value_len) return false; @@ -878,7 +943,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_contain(myhtml_string_t* str, for(size_t i = 0; (str->length - i) >= value_len; i++) { - if(myhtml_strncmp(&data[i], value, value_len) == 0) { + if(mycore_strncmp(&data[i], value, value_len) == 0) { return true; } } @@ -886,16 +951,16 @@ bool myhtml_get_nodes_by_attribute_value_recursion_contain(myhtml_string_t* str, return false; } -bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated(mycore_string_t* str, const char* value, size_t value_len) { const char *data = str->data; if(str->length < value_len) return false; - else if(str->length == value_len && myhtml_strncmp(data, value, value_len) == 0) { + else if(str->length == value_len && mycore_strncmp(data, value, value_len) == 0) { return true; } - else if(myhtml_strncmp(data, value, value_len) == 0 && data[value_len] == '-') { + else if(mycore_strncmp(data, value, value_len) == 0 && data[value_len] == '-') { return true; } @@ -903,28 +968,28 @@ bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated(myhtml_strin } /* find by attribute value; case-insensitive */ -bool myhtml_get_nodes_by_attribute_value_recursion_eq_i(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_eq_i(mycore_string_t* str, const char* value, size_t value_len) { - return str->length == value_len && myhtml_strncasecmp(str->data, value, value_len) == 0; + return str->length == value_len && mycore_strncasecmp(str->data, value, value_len) == 0; } -bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated_i(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated_i(mycore_string_t* str, const char* value, size_t value_len) { if(str->length < value_len) return false; const char *data = str->data; - if(myhtml_strncasecmp(data, value, value_len) == 0) { - if((str->length > value_len && myhtml_utils_whithspace(data[value_len], ==, ||)) || str->length == value_len) + if(mycore_strncasecmp(data, value, value_len) == 0) { + if((str->length > value_len && mycore_utils_whithspace(data[value_len], ==, ||)) || str->length == value_len) return true; } for(size_t i = 1; (str->length - i) >= value_len; i++) { - if(myhtml_utils_whithspace(data[(i - 1)], ==, ||)) { - if(myhtml_strncasecmp(&data[i], value, value_len) == 0) { - if((i > value_len && myhtml_utils_whithspace(data[(i + value_len)], ==, ||)) || (str->length - i) == value_len) + if(mycore_utils_whithspace(data[(i - 1)], ==, ||)) { + if(mycore_strncasecmp(&data[i], value, value_len) == 0) { + if((i > value_len && mycore_utils_whithspace(data[(i + value_len)], ==, ||)) || (str->length - i) == value_len) return true; } } @@ -933,23 +998,23 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated_i(myhtml return false; } -bool myhtml_get_nodes_by_attribute_value_recursion_begin_i(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_begin_i(mycore_string_t* str, const char* value, size_t value_len) { if(str->length < value_len) return false; - return myhtml_strncasecmp(str->data, value, value_len) == 0; + return mycore_strncasecmp(str->data, value, value_len) == 0; } -bool myhtml_get_nodes_by_attribute_value_recursion_end_i(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_end_i(mycore_string_t* str, const char* value, size_t value_len) { if(str->length < value_len) return false; - return myhtml_strncasecmp(&str->data[ (str->length - (str->length - value_len)) ], value, value_len) == 0; + return mycore_strncasecmp(&str->data[ (str->length - (str->length - value_len)) ], value, value_len) == 0; } -bool myhtml_get_nodes_by_attribute_value_recursion_contain_i(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_contain_i(mycore_string_t* str, const char* value, size_t value_len) { if(str->length < value_len) return false; @@ -958,7 +1023,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_contain_i(myhtml_string_t* st for(size_t i = 0; (str->length - i) >= value_len; i++) { - if(myhtml_strncasecmp(&data[i], value, value_len) == 0) { + if(mycore_strncasecmp(&data[i], value, value_len) == 0) { return true; } } @@ -966,16 +1031,16 @@ bool myhtml_get_nodes_by_attribute_value_recursion_contain_i(myhtml_string_t* st return false; } -bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i(myhtml_string_t* str, const char* value, size_t value_len) +bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i(mycore_string_t* str, const char* value, size_t value_len) { const char *data = str->data; if(str->length < value_len) return false; - else if(str->length == value_len && myhtml_strncasecmp(data, value, value_len) == 0) { + else if(str->length == value_len && mycore_strncasecmp(data, value, value_len) == 0) { return true; } - else if(myhtml_strncasecmp(data, value, value_len) == 0 && data[value_len] == '-') { + else if(mycore_strncasecmp(data, value, value_len) == 0 && data[value_len] == '-') { return true; } @@ -983,7 +1048,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i(myhtml_str } /* find by attribute value; basic functions */ -myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_node_t* node, myhtml_collection_t* collection, +mystatus_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_node_t* node, myhtml_collection_t* collection, myhtml_attribute_value_find_f func_eq, const char* value, size_t value_len) { @@ -993,14 +1058,14 @@ myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_node_t myhtml_tree_attr_t* attr = node->token->attr_first; while(attr) { - myhtml_string_t* str = &attr->value; + mycore_string_t* str = &attr->value; if(func_eq(str, value, value_len)) { collection->list[ collection->length ] = node; collection->length++; if(collection->length >= collection->size) { - myhtml_status_t status = myhtml_collection_check_size(collection, 1024, 0); + mystatus_t status = myhtml_collection_check_size(collection, 1024, 0); if(status) return status; @@ -1012,7 +1077,7 @@ myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_node_t } if(node->child) { - myhtml_status_t status = myhtml_get_nodes_by_attribute_value_recursion(node->child, collection, func_eq, value, value_len); + mystatus_t status = myhtml_get_nodes_by_attribute_value_recursion(node->child, collection, func_eq, value, value_len); if(status) return status; @@ -1024,7 +1089,7 @@ myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_node_t return MyHTML_STATUS_OK; } -myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion_by_key(myhtml_tree_node_t* node, myhtml_collection_t* collection, +mystatus_t myhtml_get_nodes_by_attribute_value_recursion_by_key(myhtml_tree_node_t* node, myhtml_collection_t* collection, myhtml_attribute_value_find_f func_eq, const char* key, size_t key_len, const char* value, size_t value_len) @@ -1035,17 +1100,17 @@ myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion_by_key(myhtml_tree myhtml_tree_attr_t* attr = node->token->attr_first; while(attr) { - myhtml_string_t* str_key = &attr->key; - myhtml_string_t* str = &attr->value; + mycore_string_t* str_key = &attr->key; + mycore_string_t* str = &attr->value; - if(str_key->length == key_len && myhtml_strncasecmp(str_key->data, key, key_len) == 0) + if(str_key->length == key_len && mycore_strncasecmp(str_key->data, key, key_len) == 0) { if(func_eq(str, value, value_len)) { collection->list[ collection->length ] = node; collection->length++; if(collection->length >= collection->size) { - myhtml_status_t status = myhtml_collection_check_size(collection, 1024, 0); + mystatus_t status = myhtml_collection_check_size(collection, 1024, 0); if(status) return status; @@ -1058,7 +1123,7 @@ myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion_by_key(myhtml_tree } if(node->child) { - myhtml_status_t status = myhtml_get_nodes_by_attribute_value_recursion_by_key(node->child, collection, func_eq, + mystatus_t status = myhtml_get_nodes_by_attribute_value_recursion_by_key(node->child, collection, func_eq, key, key_len, value, value_len); if(status) @@ -1075,7 +1140,7 @@ myhtml_collection_t * _myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree, myhtml_attribute_value_find_f func_eq, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status) + mystatus_t* status) { if(collection == NULL) { collection = myhtml_collection_create(1024, status); @@ -1087,7 +1152,7 @@ myhtml_collection_t * _myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree, if(node == NULL) node = tree->node_html; - myhtml_status_t rec_status; + mystatus_t rec_status; if(key && key_len) rec_status = myhtml_get_nodes_by_attribute_value_recursion_by_key(node, collection, func_eq, key, key_len, value, value_len); @@ -1104,7 +1169,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree, m bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status) + mystatus_t* status) { if(case_insensitive) { return _myhtml_get_nodes_by_attribute_value(tree, collection, node, @@ -1121,7 +1186,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_whitespace_separated(m bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status) + mystatus_t* status) { if(case_insensitive) { return _myhtml_get_nodes_by_attribute_value(tree, collection, node, @@ -1138,7 +1203,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_begin(myhtml_tree_t *t bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status) + mystatus_t* status) { if(case_insensitive) { return _myhtml_get_nodes_by_attribute_value(tree, collection, node, @@ -1155,7 +1220,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_end(myhtml_tree_t *tre bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status) + mystatus_t* status) { if(case_insensitive) { return _myhtml_get_nodes_by_attribute_value(tree, collection, node, @@ -1172,7 +1237,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_contain(myhtml_tree_t bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status) + mystatus_t* status) { if(case_insensitive) { return _myhtml_get_nodes_by_attribute_value(tree, collection, node, @@ -1189,7 +1254,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_hyphen_separated(myhtm bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status) + mystatus_t* status) { if(case_insensitive) { return _myhtml_get_nodes_by_attribute_value(tree, collection, node, @@ -1257,7 +1322,7 @@ const char * myhtml_attribute_value(myhtml_tree_attr_t *attr, size_t *length) return NULL; } -myhtml_string_t * myhtml_attribute_key_string(myhtml_tree_attr_t* attr) +mycore_string_t * myhtml_attribute_key_string(myhtml_tree_attr_t* attr) { if(attr) return &attr->key; @@ -1265,7 +1330,7 @@ myhtml_string_t * myhtml_attribute_key_string(myhtml_tree_attr_t* attr) return NULL; } -myhtml_string_t * myhtml_attribute_value_string(myhtml_tree_attr_t* attr) +mycore_string_t * myhtml_attribute_value_string(myhtml_tree_attr_t* attr) { if(attr) return &attr->value; @@ -1281,7 +1346,7 @@ myhtml_tree_attr_t * myhtml_attribute_by_key(myhtml_tree_node_t *node, const cha return myhtml_token_attr_by_name(node->token, key, key_len); } -myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, myhtml_encoding_t encoding) +myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, myencoding_t encoding) { if(node == NULL) return NULL; @@ -1353,9 +1418,9 @@ myhtml_position_t myhtml_attribute_value_raw_position(myhtml_tree_attr_t *attr) /* * Collections */ -myhtml_collection_t * myhtml_collection_create(size_t size, myhtml_status_t *status) +myhtml_collection_t * myhtml_collection_create(size_t size, mystatus_t *status) { - myhtml_collection_t *collection = (myhtml_collection_t*)myhtml_malloc(sizeof(myhtml_collection_t)); + myhtml_collection_t *collection = (myhtml_collection_t*)mycore_malloc(sizeof(myhtml_collection_t)); if(collection == NULL) { if(status) @@ -1366,10 +1431,10 @@ myhtml_collection_t * myhtml_collection_create(size_t size, myhtml_status_t *sta collection->size = size; collection->length = 0; - collection->list = (myhtml_tree_node_t **)myhtml_malloc(sizeof(myhtml_tree_node_t*) * size); + collection->list = (myhtml_tree_node_t **)mycore_malloc(sizeof(myhtml_tree_node_t*) * size); if(collection->list == NULL) { - myhtml_free(collection); + mycore_free(collection); if(status) *status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; @@ -1383,12 +1448,12 @@ myhtml_collection_t * myhtml_collection_create(size_t size, myhtml_status_t *sta return collection; } -myhtml_status_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t need, size_t upto_length) +mystatus_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t need, size_t upto_length) { if((collection->length + need) >= collection->size) { size_t tmp_size = collection->length + need + upto_length + 1; - myhtml_tree_node_t **tmp = (myhtml_tree_node_t **)myhtml_realloc(collection->list, sizeof(myhtml_tree_node_t*) * tmp_size); + myhtml_tree_node_t **tmp = (myhtml_tree_node_t **)mycore_realloc(collection->list, sizeof(myhtml_tree_node_t*) * tmp_size); if(tmp) { collection->size = tmp_size; @@ -1413,15 +1478,15 @@ myhtml_collection_t * myhtml_collection_destroy(myhtml_collection_t *collection) return NULL; if(collection->list) - myhtml_free(collection->list); + mycore_free(collection->list); - myhtml_free(collection); + mycore_free(collection); return NULL; } /* queue */ -myhtml_status_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* token) +mystatus_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* token) { // TODO: need refactoring this code // too many conditions @@ -1437,50 +1502,48 @@ myhtml_status_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token } } -#ifndef MyHTML_BUILD_WITHOUT_THREADS +#ifndef MyCORE_BUILD_WITHOUT_THREADS if(tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) { if(qnode && token) { - qnode->token = token; + qnode->args = token; myhtml_parser_worker(0, qnode); myhtml_parser_stream(0, qnode); } - tree->current_qnode = mythread_queue_node_malloc_limit(tree->myhtml->thread, tree->queue, 4, NULL); + tree->current_qnode = mythread_queue_node_malloc_limit(tree->myhtml->thread_stream, tree->queue, 4, NULL); } else { if(qnode) - qnode->token = token; + qnode->args = token; - tree->current_qnode = mythread_queue_node_malloc_round(tree->myhtml->thread, tree->queue_entry, NULL); + tree->current_qnode = mythread_queue_node_malloc_round(tree->myhtml->thread_stream, tree->queue_entry, NULL); } #else if(qnode && token) { - qnode->token = token; + qnode->args = token; myhtml_parser_worker(0, qnode); myhtml_parser_stream(0, qnode); } - tree->current_qnode = mythread_queue_node_malloc_limit(tree->myhtml->thread, tree->queue, 4, NULL); + tree->current_qnode = mythread_queue_node_malloc_limit(tree->myhtml->thread_stream, tree->queue, 4, NULL); -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ +#endif /* MyCORE_BUILD_WITHOUT_THREADS */ - if(tree->current_qnode == NULL) { + if(tree->current_qnode == NULL) return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; - } - tree->current_qnode->tree = tree; + tree->current_qnode->context = tree; tree->current_qnode->prev = qnode; if(qnode && token) myhtml_tokenizer_calc_current_namespace(tree, token); tree->current_token_node = myhtml_token_node_create(tree->token, tree->token->mcasync_token_id); - if(tree->current_token_node == NULL) return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; diff --git a/source/myhtml/myhtml.h b/source/myhtml/myhtml.h index 9a049e2..1bc94e1 100644 --- a/source/myhtml/myhtml.h +++ b/source/myhtml/myhtml.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -28,19 +28,19 @@ extern "C" { #include "myhtml/myosi.h" -#include "myhtml/utils/mctree.h" -#include "myhtml/utils/mcobject_async.h" +#include "mycore/utils/mctree.h" +#include "mycore/utils/mcobject_async.h" +#include "mycore/mythread.h" +#include "mycore/incoming.h" +#include "myencoding/encoding.h" #include "myhtml/tree.h" #include "myhtml/tag.h" #include "myhtml/def.h" #include "myhtml/parser.h" #include "myhtml/tokenizer.h" -#include "myhtml/thread.h" #include "myhtml/rules.h" #include "myhtml/token.h" #include "myhtml/charef.h" -#include "myhtml/encoding.h" -#include "myhtml/incoming.h" #include "myhtml/callback.h" #define mh_queue_current() tree->queue @@ -65,8 +65,10 @@ extern "C" { (onechar < 'A' || onechar > 'Z')) struct myhtml { - mythread_t *thread; - //mchar_async_t *mchar; // for all + mythread_t* thread_stream; + mythread_t* thread_batch; + mythread_t* thread_list[3]; + size_t thread_total; myhtml_tokenizer_state_f* parse_state_func; myhtml_insertion_f* insertion_func; @@ -83,68 +85,68 @@ struct myhtml_collection { myhtml_t * myhtml_create(void); -myhtml_status_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_count, size_t queue_size); +mystatus_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_count, size_t queue_size); void myhtml_clean(myhtml_t* myhtml); myhtml_t* myhtml_destroy(myhtml_t* myhtml); -myhtml_status_t myhtml_parse(myhtml_tree_t* tree, myhtml_encoding_t encoding, const char* html, size_t html_size); -myhtml_status_t myhtml_parse_fragment(myhtml_tree_t* tree, myhtml_encoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); +mystatus_t myhtml_parse(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size); +mystatus_t myhtml_parse_fragment(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); -myhtml_status_t myhtml_parse_single(myhtml_tree_t* tree, myhtml_encoding_t encoding, const char* html, size_t html_size); -myhtml_status_t myhtml_parse_fragment_single(myhtml_tree_t* tree, myhtml_encoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); +mystatus_t myhtml_parse_single(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size); +mystatus_t myhtml_parse_fragment_single(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); -myhtml_status_t myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t html_size); -myhtml_status_t myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); -myhtml_status_t myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, size_t html_size); -myhtml_status_t myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); -myhtml_status_t myhtml_parse_chunk_end(myhtml_tree_t* tree); +mystatus_t myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t html_size); +mystatus_t myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); +mystatus_t myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, size_t html_size); +mystatus_t myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns); +mystatus_t myhtml_parse_chunk_end(myhtml_tree_t* tree); // encoding -void myhtml_encoding_set(myhtml_tree_t* tree, myhtml_encoding_t encoding); -myhtml_encoding_t myhtml_encoding_get(myhtml_tree_t* tree); +void myhtml_encoding_set(myhtml_tree_t* tree, myencoding_t encoding); +myencoding_t myhtml_encoding_get(myhtml_tree_t* tree); -myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tag_id_t tag_id, myhtml_status_t *status); -myhtml_collection_t * myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection, const char* html, size_t length, myhtml_status_t *status); -myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* scope_node, const char* key, size_t key_len, myhtml_status_t* status); +myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tag_id_t tag_id, mystatus_t *status); +myhtml_collection_t * myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection, const char* html, size_t length, mystatus_t *status); +myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* scope_node, const char* key, size_t key_len, mystatus_t* status); /* like a [some=value] or #id */ myhtml_collection_t * myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /* like a [some~=value] or .class */ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_whitespace_separated(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /* like a [some^=value] */ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_begin(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /* like a [some$=value] */ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_end(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /* like a [some*=value] */ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_contain(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); /* like a [some|=value] */ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_hyphen_separated(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node, bool case_insensitive, const char* key, size_t key_len, const char* value, size_t value_len, - myhtml_status_t* status); + mystatus_t* status); -myhtml_collection_t * myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, myhtml_tag_id_t tag_id, myhtml_status_t *status); -myhtml_collection_t * myhtml_get_nodes_by_name_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, const char* html, size_t length, myhtml_status_t *status); +myhtml_collection_t * myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, myhtml_tag_id_t tag_id, mystatus_t *status); +myhtml_collection_t * myhtml_get_nodes_by_name_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, const char* html, size_t length, mystatus_t *status); myhtml_tree_node_t * myhtml_node_first(myhtml_tree_t* tree); myhtml_tree_node_t * myhtml_node_next(myhtml_tree_node_t *node); @@ -174,7 +176,7 @@ bool myhtml_node_is_close_self(myhtml_tree_node_t *node); myhtml_tree_attr_t * myhtml_node_attribute_first(myhtml_tree_node_t *node); myhtml_tree_attr_t * myhtml_node_attribute_last(myhtml_tree_node_t *node); const char * myhtml_node_text(myhtml_tree_node_t *node, size_t *length); -myhtml_string_t * myhtml_node_string(myhtml_tree_node_t *node); +mycore_string_t * myhtml_node_string(myhtml_tree_node_t *node); myhtml_position_t myhtml_node_raw_pasition(myhtml_tree_node_t *node); myhtml_position_t myhtml_node_element_pasition(myhtml_tree_node_t *node); void myhtml_node_set_data(myhtml_tree_node_t *node, void* data); @@ -188,10 +190,10 @@ void myhtml_attribute_namespace_set(myhtml_tree_attr_t *attr, myhtml_namespace_t const char * myhtml_attribute_key(myhtml_tree_attr_t *attr, size_t *length); const char * myhtml_attribute_value(myhtml_tree_attr_t *attr, size_t *length); -myhtml_string_t * myhtml_attribute_key_string(myhtml_tree_attr_t* attr); -myhtml_string_t * myhtml_attribute_value_string(myhtml_tree_attr_t* attr); +mycore_string_t * myhtml_attribute_key_string(myhtml_tree_attr_t* attr); +mycore_string_t * myhtml_attribute_value_string(myhtml_tree_attr_t* attr); myhtml_tree_attr_t * myhtml_attribute_by_key(myhtml_tree_node_t *node, const char *key, size_t key_len); -myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, myhtml_encoding_t encoding); +myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, myencoding_t encoding); myhtml_tree_attr_t * myhtml_attribute_remove(myhtml_tree_node_t *node, myhtml_tree_attr_t *attr); myhtml_tree_attr_t * myhtml_attribute_remove_by_key(myhtml_tree_node_t *node, const char *key, size_t key_len); void myhtml_attribute_delete(myhtml_tree_t *tree, myhtml_tree_node_t *node, myhtml_tree_attr_t *attr); @@ -200,35 +202,20 @@ myhtml_position_t myhtml_attribute_key_raw_position(myhtml_tree_attr_t *attr); myhtml_position_t myhtml_attribute_value_raw_position(myhtml_tree_attr_t *attr); /* collection */ -myhtml_collection_t * myhtml_collection_create(size_t size, myhtml_status_t *status); +myhtml_collection_t * myhtml_collection_create(size_t size, mystatus_t *status); void myhtml_collection_clean(myhtml_collection_t *collection); myhtml_collection_t * myhtml_collection_destroy(myhtml_collection_t *collection); -myhtml_status_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t need, size_t upto_length); +mystatus_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t need, size_t upto_length); // strings -myhtml_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding); -myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding); +mycore_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length, myencoding_t encoding); +mycore_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, myencoding_t encoding); bool myhtml_utils_strcmp(const char* ab, const char* to_lowercase, size_t size); bool myhtml_is_html_node(myhtml_tree_node_t *node, myhtml_tag_id_t tag_id); // queue -myhtml_status_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* token); - -/** - * Platform-specific hdef performance clock queries. - * Implemented in perf.c - */ - -/** Get clock resolution */ -uint64_t myhtml_hperf_res(myhtml_status_t *status); - -/** Get current value in clock ticks */ -uint64_t myhtml_hperf_clock(myhtml_status_t *status); - -/** Print an hperf measure */ -myhtml_status_t myhtml_hperf_print(const char *name, uint64_t x, uint64_t y, FILE *fh); -myhtml_status_t myhtml_hperf_print_by_val(const char *name, uint64_t x, FILE *fh); +mystatus_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* token); /* version */ myhtml_version_t myhtml_version(void); diff --git a/source/myhtml/mynamespace.c b/source/myhtml/mynamespace.c index 596b518..4821c75 100644 --- a/source/myhtml/mynamespace.c +++ b/source/myhtml/mynamespace.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,19 +20,19 @@ #include "myhtml/mynamespace.h" #include "myhtml/mynamespace_resource.h" -#include "myhtml/utils/resources.h" +#include "mycore/utils/resources.h" const myhtml_namespace_detect_name_entry_t * myhtml_namespace_name_entry_by_name(const char* name, size_t length) { - size_t idx = ((myhtml_string_chars_lowercase_map[ (const unsigned char)name[0] ] * - myhtml_string_chars_lowercase_map[ (const unsigned char)name[(length - 1)] ] * + size_t idx = ((mycore_string_chars_lowercase_map[ (const unsigned char)name[0] ] * + mycore_string_chars_lowercase_map[ (const unsigned char)name[(length - 1)] ] * length) % MyHTML_NAMESPACE_DETECT_NAME_STATIC_SIZE) + 1; while (myhtml_namespace_detect_name_entry_static_list_index[idx].name) { if(myhtml_namespace_detect_name_entry_static_list_index[idx].name_length == length) { - if(myhtml_strncasecmp(myhtml_namespace_detect_name_entry_static_list_index[idx].name, name, length) == 0) + if(mycore_strncasecmp(myhtml_namespace_detect_name_entry_static_list_index[idx].name, name, length) == 0) return &myhtml_namespace_detect_name_entry_static_list_index[idx]; if(myhtml_namespace_detect_name_entry_static_list_index[idx].next) diff --git a/source/myhtml/mynamespace.h b/source/myhtml/mynamespace.h index 0d8d0e9..755cd09 100644 --- a/source/myhtml/mynamespace.h +++ b/source/myhtml/mynamespace.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -31,7 +31,7 @@ extern "C" { #include #include "myhtml/myosi.h" -#include "myhtml/utils.h" +#include "mycore/utils.h" struct myhtml_namespace_detect_name_entry { const char* name; diff --git a/source/myhtml/mynamespace_resource.h b/source/myhtml/mynamespace_resource.h index e584cb7..be20f0b 100644 --- a/source/myhtml/mynamespace_resource.h +++ b/source/myhtml/mynamespace_resource.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/myosi.c b/source/myhtml/myosi.c deleted file mode 100644 index cd1e683..0000000 --- a/source/myhtml/myosi.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/myosi.h" - -void * myhtml_mem_malloc(size_t size) -{ - return malloc(size); -} - -// see who's call realloc -void * myhtml_mem_realloc(void* dst, size_t size) -{ - return realloc(dst, size); -} - -void * myhtml_mem_calloc(size_t num, size_t size) -{ - return calloc(num, size); -} - -void myhtml_mem_free(void* dst) -{ - free(dst); -} - -void myhtml_print(FILE* out, const char* format, ...) -{ - va_list argptr; - va_start(argptr, format); - vfprintf(out, format, argptr); - va_end(argptr); -} diff --git a/source/myhtml/myosi.h b/source/myhtml/myosi.h index 3901fae..cdc14db 100644 --- a/source/myhtml/myosi.h +++ b/source/myhtml/myosi.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -22,181 +22,23 @@ #define MyHTML_MYOSI_H #pragma once -#include -#include -#include -#include -#include -#include +#include "mycore/myosi.h" -#define MyHTML_VERSION_MAJOR 3 +#define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MINOR 0 #define MyHTML_VERSION_PATCH 0 -#if (defined(_WIN32) || defined(_WIN64)) && !defined(__WINPTHREADS_VERSION) -#define IS_OS_WINDOWS -#include -#endif - -#if defined(_MSC_VER) -# define MyHTML_DEPRECATED(func) __declspec(deprecated) func -#elif defined(__GNUC__) || defined(__INTEL_COMPILER) -# define MyHTML_DEPRECATED(func) func __attribute__((deprecated)) -#else -# define MyHTML_DEPRECATED(func) func -#endif +#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) #ifdef __cplusplus extern "C" { #endif -/* Mem */ -#ifdef MyHTML_EXTERN_MALLOC - extern void * MyHTML_EXTERN_MALLOC(size_t size); - #define myhtml_malloc MyHTML_EXTERN_MALLOC -#else - #define myhtml_malloc myhtml_mem_malloc -#endif - -#ifdef MyHTML_EXTERN_REALLOC - extern void * MyHTML_EXTERN_REALLOC(void* dst, size_t size); - #define myhtml_realloc MyHTML_EXTERN_REALLOC -#else - #define myhtml_realloc myhtml_mem_realloc -#endif - -#ifdef MyHTML_EXTERN_CALLOC - extern void * MyHTML_EXTERN_CALLOC(size_t num, size_t size); - #define myhtml_calloc MyHTML_EXTERN_CALLOC -#else - #define myhtml_calloc myhtml_mem_calloc -#endif - -#ifdef MyHTML_EXTERN_FREE - extern void MyHTML_EXTERN_FREE(void* dst); - #define myhtml_free MyHTML_EXTERN_FREE -#else - #define myhtml_free myhtml_mem_free -#endif - -/* Debug */ -#ifdef DEBUG_MODE - #define MyHTML_DEBUG(format, ...) \ - myhtml_print(stderr, "DEBUG: "format"\n", ##__VA_ARGS__) -#else - #define MyHTML_DEBUG(format, ...) -#endif - -#ifdef DEBUG_MODE - #define MyHTML_DEBUG_ERROR(format, ...) \ - myhtml_print(stderr, "DEBUG ERROR: "format"\n", ##__VA_ARGS__) -#else - #define MyHTML_DEBUG_ERROR(format, ...) -#endif - -#define myhtml_base_add(myhtml, point, lenn, sizen, strcn, size) \ - myhtml->Lenn++; \ - if(myhtml->lenn == myhtml->sizen) { \ - myhtml->sizen += size; \ - myhtml->point = (strcn*)myhtml_realloc(myhtml->point, \ - sizeof(strcn) * myhtml->sizen); \ - } - #define MyHTML_FAILED(_status_) ((_status_) != MyHTML_STATUS_OK) -// encoding -// https://encoding.spec.whatwg.org/#the-encoding -// https://encoding.spec.whatwg.org/#legacy-single-byte-encodings -// https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(simplified)-encodings -// https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(traditional)-encodings -// https://encoding.spec.whatwg.org/#legacy-multi-byte-japanese-encodings -// https://encoding.spec.whatwg.org/#legacy-multi-byte-korean-encodings -// https://encoding.spec.whatwg.org/#legacy-miscellaneous-encodings - -enum myhtml_encoding_list { - MyHTML_ENCODING_DEFAULT = 0x00, -// MyHTML_ENCODING_AUTO = 0x01, // future - MyHTML_ENCODING_NOT_DETERMINED = 0x02, - MyHTML_ENCODING_UTF_8 = 0x00, // default encoding - MyHTML_ENCODING_UTF_16LE = 0x04, - MyHTML_ENCODING_UTF_16BE = 0x05, - MyHTML_ENCODING_X_USER_DEFINED = 0x06, - MyHTML_ENCODING_BIG5 = 0x07, - MyHTML_ENCODING_EUC_JP = 0x08, - MyHTML_ENCODING_EUC_KR = 0x09, - MyHTML_ENCODING_GB18030 = 0x0a, - MyHTML_ENCODING_GBK = 0x0b, - MyHTML_ENCODING_IBM866 = 0x0c, - MyHTML_ENCODING_ISO_2022_JP = 0x0d, - MyHTML_ENCODING_ISO_8859_10 = 0x0e, - MyHTML_ENCODING_ISO_8859_13 = 0x0f, - MyHTML_ENCODING_ISO_8859_14 = 0x10, - MyHTML_ENCODING_ISO_8859_15 = 0x11, - MyHTML_ENCODING_ISO_8859_16 = 0x12, - MyHTML_ENCODING_ISO_8859_2 = 0x13, - MyHTML_ENCODING_ISO_8859_3 = 0x14, - MyHTML_ENCODING_ISO_8859_4 = 0x15, - MyHTML_ENCODING_ISO_8859_5 = 0x16, - MyHTML_ENCODING_ISO_8859_6 = 0x17, - MyHTML_ENCODING_ISO_8859_7 = 0x18, - MyHTML_ENCODING_ISO_8859_8 = 0x19, - MyHTML_ENCODING_ISO_8859_8_I = 0x1a, - MyHTML_ENCODING_KOI8_R = 0x1b, - MyHTML_ENCODING_KOI8_U = 0x1c, - MyHTML_ENCODING_MACINTOSH = 0x1d, - MyHTML_ENCODING_SHIFT_JIS = 0x1e, - MyHTML_ENCODING_WINDOWS_1250 = 0x1f, - MyHTML_ENCODING_WINDOWS_1251 = 0x20, - MyHTML_ENCODING_WINDOWS_1252 = 0x21, - MyHTML_ENCODING_WINDOWS_1253 = 0x22, - MyHTML_ENCODING_WINDOWS_1254 = 0x23, - MyHTML_ENCODING_WINDOWS_1255 = 0x24, - MyHTML_ENCODING_WINDOWS_1256 = 0x25, - MyHTML_ENCODING_WINDOWS_1257 = 0x26, - MyHTML_ENCODING_WINDOWS_1258 = 0x27, - MyHTML_ENCODING_WINDOWS_874 = 0x28, - MyHTML_ENCODING_X_MAC_CYRILLIC = 0x29, - MyHTML_ENCODING_LAST_ENTRY = 0x2a -} -typedef myhtml_encoding_t; - -typedef struct myhtml_encoding_result myhtml_encoding_result_t; -typedef struct myhtml_encoding_trigram myhtml_encoding_trigram_t; -typedef struct myhtml_encoding_trigram_result myhtml_encoding_trigram_result_t; -typedef struct myhtml_encoding_unicode_result myhtml_encoding_unicode_result_t; -typedef struct myhtml_encoding_detect_name_entry myhtml_encoding_detect_name_entry_t; -typedef struct myhtml_encoding_detect_attr myhtml_encoding_detect_attr_t; -typedef struct myhtml_encoding_entry_name_index myhtml_encoding_entry_name_index_t; - // char references typedef struct myhtml_data_process_entry myhtml_data_process_entry_t; -// strings -typedef struct myhtml_string_raw myhtml_string_raw_t; -typedef struct myhtml_string myhtml_string_t; - -// thread -enum mythread_thread_opt { - MyTHREAD_OPT_UNDEF = 0x00, - MyTHREAD_OPT_WAIT = 0x01, - MyTHREAD_OPT_QUIT = 0x02, - MyTHREAD_OPT_STOP = 0x04, - MyTHREAD_OPT_DONE = 0x08 -} -typedef mythread_thread_opt_t; - -typedef struct mythread_queue_list_entry mythread_queue_list_entry_t; -typedef struct mythread_queue_thread_param mythread_queue_thread_param_t; -typedef struct mythread_queue_list mythread_queue_list_t; -typedef struct mythread_queue_node mythread_queue_node_t; -typedef struct mythread_queue mythread_queue_t; - -typedef size_t mythread_id_t; -typedef struct mythread_workers_list mythread_workers_list_t; -typedef struct mythread_context mythread_context_t; -typedef struct mythread_list mythread_list_t; -typedef struct mythread mythread_t; - // tree enum myhtml_tree_flags { MyHTML_TREE_FLAGS_CLEAN = 0x000, @@ -386,68 +228,33 @@ enum myhtml_insertion_mode { // base /* Very important!!! - - for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000 - for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000 - for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000 - for myrender 030000..03ffff; MyRENDER_STATUS_OK == 0x000000 - for mydom 040000..04ffff; MyDOM_STATUS_OK == 0x000000 - for mynetwork 050000..05ffff; MyNETWORK_STATUS_OK == 0x000000 - for myecma 060000..06ffff; MyECMA_STATUS_OK == 0x000000 - not occupied 070000.. + See mycore/myosi.h:mystatus_t */ enum myhtml_status { MyHTML_STATUS_OK = 0x0000, MyHTML_STATUS_ERROR = 0x0001, MyHTML_STATUS_ERROR_MEMORY_ALLOCATION = 0x0002, - MyHTML_STATUS_THREAD_ERROR_MEMORY_ALLOCATION = 0x0009, - MyHTML_STATUS_THREAD_ERROR_LIST_INIT = 0x000a, - MyHTML_STATUS_THREAD_ERROR_ATTR_MALLOC = 0x000b, - MyHTML_STATUS_THREAD_ERROR_ATTR_INIT = 0x000c, - MyHTML_STATUS_THREAD_ERROR_ATTR_SET = 0x000d, - MyHTML_STATUS_THREAD_ERROR_ATTR_DESTROY = 0x000e, - MyHTML_STATUS_THREAD_ERROR_NO_SLOTS = 0x000f, - MyHTML_STATUS_THREAD_ERROR_BATCH_INIT = 0x0010, - MyHTML_STATUS_THREAD_ERROR_WORKER_MALLOC = 0x0011, - MyHTML_STATUS_THREAD_ERROR_WORKER_SEM_CREATE = 0x0012, - MyHTML_STATUS_THREAD_ERROR_WORKER_THREAD_CREATE = 0x0013, - MyHTML_STATUS_THREAD_ERROR_MASTER_THREAD_CREATE = 0x0014, - MyHTML_STATUS_THREAD_ERROR_SEM_PREFIX_MALLOC = 0x0032, - MyHTML_STATUS_THREAD_ERROR_SEM_CREATE = 0x0033, - MyHTML_STATUS_THREAD_ERROR_QUEUE_MALLOC = 0x003c, - MyHTML_STATUS_THREAD_ERROR_QUEUE_NODES_MALLOC = 0x003d, - MyHTML_STATUS_THREAD_ERROR_QUEUE_NODE_MALLOC = 0x003e, - MyHTML_STATUS_THREAD_ERROR_MUTEX_MALLOC = 0x0046, - MyHTML_STATUS_THREAD_ERROR_MUTEX_INIT = 0x0047, - MyHTML_STATUS_THREAD_ERROR_MUTEX_LOCK = 0x0048, - MyHTML_STATUS_THREAD_ERROR_MUTEX_UNLOCK = 0x0049, - MyHTML_STATUS_RULES_ERROR_MEMORY_ALLOCATION = 0x0064, - MyHTML_STATUS_PERF_ERROR_COMPILED_WITHOUT_PERF = 0x00c8, - MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK = 0x00c9, - MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION = 0x012c, - MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT = 0x012d, - MyHTML_STATUS_TAGS_ERROR_MEMORY_ALLOCATION = 0x0190, - MyHTML_STATUS_TAGS_ERROR_MCOBJECT_CREATE = 0x0191, - MyHTML_STATUS_TAGS_ERROR_MCOBJECT_MALLOC = 0x0192, - MyHTML_STATUS_TAGS_ERROR_MCOBJECT_CREATE_NODE = 0x0193, - MyHTML_STATUS_TAGS_ERROR_CACHE_MEMORY_ALLOCATION = 0x0194, - MyHTML_STATUS_TAGS_ERROR_INDEX_MEMORY_ALLOCATION = 0x0195, - MyHTML_STATUS_TREE_ERROR_MEMORY_ALLOCATION = 0x01f4, - MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE = 0x01f5, - MyHTML_STATUS_TREE_ERROR_MCOBJECT_INIT = 0x01f6, - MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE_NODE = 0x01f7, - MyHTML_STATUS_TREE_ERROR_INCOMING_BUFFER_CREATE = 0x01f8, - MyHTML_STATUS_ATTR_ERROR_ALLOCATION = 0x0258, - MyHTML_STATUS_ATTR_ERROR_CREATE = 0x0259, - MyHTML_STATUS_STREAM_BUFFER_ERROR_CREATE = 0x0300, - MyHTML_STATUS_STREAM_BUFFER_ERROR_INIT = 0x0301, - MyHTML_STATUS_STREAM_BUFFER_ENTRY_ERROR_CREATE = 0x0302, - MyHTML_STATUS_STREAM_BUFFER_ENTRY_ERROR_INIT = 0x0303, - MyHTML_STATUS_STREAM_BUFFER_ERROR_ADD_ENTRY = 0x0304, - MyHTML_STATUS_MCOBJECT_ERROR_CACHE_CREATE = 0x0340, - MyHTML_STATUS_MCOBJECT_ERROR_CHUNK_CREATE = 0x0341, - MyHTML_STATUS_MCOBJECT_ERROR_CHUNK_INIT = 0x0342, - MyHTML_STATUS_MCOBJECT_ERROR_CACHE_REALLOC = 0x0343 + MyHTML_STATUS_RULES_ERROR_MEMORY_ALLOCATION = 0x9064, + MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION = 0x912c, + MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT = 0x912d, + MyHTML_STATUS_TAGS_ERROR_MEMORY_ALLOCATION = 0x9190, + MyHTML_STATUS_TAGS_ERROR_MCOBJECT_CREATE = 0x9191, + MyHTML_STATUS_TAGS_ERROR_MCOBJECT_MALLOC = 0x9192, + MyHTML_STATUS_TAGS_ERROR_MCOBJECT_CREATE_NODE = 0x9193, + MyHTML_STATUS_TAGS_ERROR_CACHE_MEMORY_ALLOCATION = 0x9194, + MyHTML_STATUS_TAGS_ERROR_INDEX_MEMORY_ALLOCATION = 0x9195, + MyHTML_STATUS_TREE_ERROR_MEMORY_ALLOCATION = 0x91f4, + MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE = 0x91f5, + MyHTML_STATUS_TREE_ERROR_MCOBJECT_INIT = 0x91f6, + MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE_NODE = 0x91f7, + MyHTML_STATUS_TREE_ERROR_INCOMING_BUFFER_CREATE = 0x91f8, + MyHTML_STATUS_ATTR_ERROR_ALLOCATION = 0x9258, + MyHTML_STATUS_ATTR_ERROR_CREATE = 0x9259, + MyHTML_STATUS_STREAM_BUFFER_ERROR_CREATE = 0x9300, + MyHTML_STATUS_STREAM_BUFFER_ERROR_INIT = 0x9301, + MyHTML_STATUS_STREAM_BUFFER_ENTRY_ERROR_CREATE = 0x9302, + MyHTML_STATUS_STREAM_BUFFER_ENTRY_ERROR_INIT = 0x9303, + MyHTML_STATUS_STREAM_BUFFER_ERROR_ADD_ENTRY = 0x9304 } typedef myhtml_status_t; @@ -486,7 +293,6 @@ struct myhtml_version { } typedef myhtml_version_t; -typedef struct myhtml_incoming_buffer myhtml_incoming_buffer_t; typedef myhtml_token_attr_t myhtml_tree_attr_t; typedef struct myhtml_collection myhtml_collection_t; typedef struct myhtml myhtml_t; @@ -498,24 +304,14 @@ typedef size_t (*myhtml_tokenizer_state_f)(myhtml_tree_t* tree, myhtml_token_nod typedef bool (*myhtml_insertion_f)(myhtml_tree_t* tree, myhtml_token_node_t* token); // char references state -typedef size_t (*myhtml_data_process_state_f)(myhtml_data_process_entry_t* charef, myhtml_string_t* str, const char* data, size_t offset, size_t size); +typedef size_t (*myhtml_data_process_state_f)(myhtml_data_process_entry_t* charef, mycore_string_t* str, const char* data, size_t offset, size_t size); // callback functions typedef void* (*myhtml_callback_token_f)(myhtml_tree_t* tree, myhtml_token_node_t* token, void* ctx); typedef void (*myhtml_callback_tree_node_f)(myhtml_tree_t* tree, myhtml_tree_node_t* node, void* ctx); // find attribute value functions -typedef bool (*myhtml_attribute_value_find_f)(myhtml_string_t* str_key, const char* value, size_t value_len); - -// callback to be implemented by the user -typedef void (*myhtml_callback_serialize_f)(const char* buffer, size_t size, void* ctx); - -void * myhtml_mem_malloc(size_t size); -void * myhtml_mem_realloc(void* dst, size_t size); -void * myhtml_mem_calloc(size_t num, size_t size); -void myhtml_mem_free(void* dst); - -void myhtml_print(FILE* out, const char* format, ...); +typedef bool (*myhtml_attribute_value_find_f)(mycore_string_t* str_key, const char* value, size_t value_len); #ifdef __cplusplus } /* extern "C" */ diff --git a/source/myhtml/mystring.c b/source/myhtml/mystring.c index a353e60..71888e4 100644 --- a/source/myhtml/mystring.c +++ b/source/myhtml/mystring.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -19,205 +19,11 @@ */ #include "myhtml/mystring.h" -#include "myhtml/utils/resources.h" +#include "mycore/utils/resources.h" -///////////////////////////////////////////////////////// -//// Init, Clean, Destroy -//// -///////////////////////////////////////////////////////// -char * myhtml_string_init(mchar_async_t *mchar, size_t node_idx, myhtml_string_t* str, size_t size) -{ - str->data = mchar_async_malloc(mchar, node_idx, size); - str->size = size; - str->node_idx = node_idx; - str->mchar = mchar; - - myhtml_string_clean(str); - - return str->data; -} - -void myhtml_string_clean(myhtml_string_t* str) -{ - str->length = 0; -} - -void myhtml_string_clean_all(myhtml_string_t* str) -{ - memset(str, 0, sizeof(myhtml_string_t)); -} - -myhtml_string_t * myhtml_string_destroy(myhtml_string_t* str, bool destroy_obj) -{ - if(str == NULL) - return NULL; - - if(str->data && str->mchar) - mchar_async_free(str->mchar, str->node_idx, str->data); - - if(destroy_obj && str) { - myhtml_free(str); - return NULL; - } - - return str; -} - -void myhtml_string_raw_clean(myhtml_string_raw_t* str_raw) -{ - str_raw->length = 0; -} - -void myhtml_string_raw_clean_all(myhtml_string_raw_t* str_raw) -{ - memset(str_raw, 0, sizeof(myhtml_string_raw_t)); -} - -myhtml_string_raw_t * myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj) -{ - if(str_raw == NULL) - return NULL; - - if(str_raw->data) { - myhtml_free(str_raw->data); - str_raw->data = NULL; - } - - if(destroy_obj && str_raw) { - myhtml_free(str_raw); - return NULL; - } - - return str_raw; -} - -char * myhtml_string_realloc(myhtml_string_t *str, size_t new_size) -{ - if(str == NULL) - return NULL; - - char *tmp = mchar_async_realloc(str->mchar, str->node_idx, str->data, str->length, new_size); - - if(tmp) { - str->size = new_size; - str->data = tmp; - } - else - return NULL; - - return tmp; -} - -///////////////////////////////////////////////////////// -//// Basic API -//// -///////////////////////////////////////////////////////// -char * myhtml_string_data_alloc(mchar_async_t *mchar, size_t node_id, size_t size) -{ - return mchar_async_malloc(mchar, node_id, size); -} - -char * myhtml_string_data_realloc(mchar_async_t *mchar, size_t node_id, char *data, size_t len_to_copy, size_t size) -{ - return mchar_async_realloc(mchar, node_id, data, len_to_copy, size); -} - -void myhtml_string_data_free(mchar_async_t *mchar, size_t node_id, char *data) +size_t myhtml_string_append_with_preprocessing(mycore_string_t* str, const char* buff, size_t length, bool emit_null_chars) { - mchar_async_free(mchar, node_id, data); -} - -char * myhtml_string_data(myhtml_string_t *str) -{ - if(str == NULL) - return NULL; - - return str->data; -} - -size_t myhtml_string_length(myhtml_string_t *str) -{ - if(str == NULL) - return 0; - - return str->length; -} - -size_t myhtml_string_size(myhtml_string_t *str) -{ - if(str == NULL) - return 0; - - return str->size; -} - -char * myhtml_string_data_set(myhtml_string_t *str, char *data) -{ - if(str == NULL) - return NULL; - - str->data = data; - return str->data; -} - -size_t myhtml_string_size_set(myhtml_string_t *str, size_t size) -{ - if(str == NULL) - return 0; - - str->size = size; - return str->size; -} - -size_t myhtml_string_length_set(myhtml_string_t *str, size_t length) -{ - if(str == NULL) - return 0; - - str->length = length; - return str->length; -} - -///////////////////////////////////////////////////////// -//// Append API -//// -///////////////////////////////////////////////////////// -void myhtml_string_append(myhtml_string_t* str, const char* buff, size_t length) -{ - MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0); - - memcpy(&str->data[str->length], buff, (sizeof(char) * length)); - - str->length += length; - str->data[str->length] = '\0'; -} - -void myhtml_string_append_one(myhtml_string_t* str, const char data) -{ - MyHTML_STRING_REALLOC_IF_NEED(str, 2, 1); - MyHTML_STRING_APPEND_BYTE_WITHOUT_REALLOC(data, str); - MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT_REALLOC('\0', str); -} - -void myhtml_string_append_lowercase(myhtml_string_t* str, const char* data, size_t length) -{ - MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0); - - unsigned char *ref = (unsigned char*)&str->data[str->length]; - const unsigned char *buf = (const unsigned char*)data; - - size_t i; - for(i = 0; i < length; i++) { - ref[i] = myhtml_string_chars_lowercase_map[ buf[i] ]; - } - - ref[i] = '\0'; - str->length += length; -} - -size_t myhtml_string_append_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, bool emit_null_chars) -{ - MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0); + MyCORE_STRING_REALLOC_IF_NEED(str, (length + 1), 0); unsigned char *data = (unsigned char*)str->data; const unsigned char *u_buff = (const unsigned char*)buff; @@ -236,12 +42,16 @@ size_t myhtml_string_append_with_preprocessing(myhtml_string_t* str, const char* } else { str->length++; + + MyCORE_STRING_REALLOC_IF_NEED(str, 0, 2); + str->data[str->length] = '\0'; + return str->length; } } else if(u_buff[i] == 0x00 && emit_null_chars == false) { - myhtml_string_realloc(str, (str->size + 5)); + mycore_string_realloc(str, (str->size + 5)); data = (unsigned char*)str->data; // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) @@ -260,9 +70,9 @@ size_t myhtml_string_append_with_preprocessing(myhtml_string_t* str, const char* return 0; } -size_t myhtml_string_append_lowercase_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, bool emit_null_chars) +size_t myhtml_string_append_lowercase_with_preprocessing(mycore_string_t* str, const char* buff, size_t length, bool emit_null_chars) { - MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0); + MyCORE_STRING_REALLOC_IF_NEED(str, (length + 1), 0); unsigned char *data = (unsigned char*)str->data; const unsigned char *u_buff = (const unsigned char*)buff; @@ -278,12 +88,16 @@ size_t myhtml_string_append_lowercase_with_preprocessing(myhtml_string_t* str, c } else { str->length++; + + MyCORE_STRING_REALLOC_IF_NEED(str, 0, 2); + str->data[str->length] = '\0'; + return str->length; } } else if(u_buff[i] == 0x00 && emit_null_chars == false) { - myhtml_string_realloc(str, (str->size + 5)); + mycore_string_realloc(str, (str->size + 5)); data = (unsigned char*)str->data; // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) @@ -292,7 +106,7 @@ size_t myhtml_string_append_lowercase_with_preprocessing(myhtml_string_t* str, c data[str->length] = 0xBD; } else { - data[str->length] = myhtml_string_chars_lowercase_map[ u_buff[i] ]; + data[str->length] = mycore_string_chars_lowercase_map[ u_buff[i] ]; } str->length++; @@ -303,7 +117,7 @@ size_t myhtml_string_append_lowercase_with_preprocessing(myhtml_string_t* str, c return 0; } -size_t myhtml_string_before_append_any_preprocessing(myhtml_string_t* str, const char* buff, size_t length, size_t last_position) +size_t myhtml_string_before_append_any_preprocessing(mycore_string_t* str, const char* buff, size_t length, size_t last_position) { if(last_position == 0 || length == 0) return 0; @@ -318,27 +132,27 @@ size_t myhtml_string_before_append_any_preprocessing(myhtml_string_t* str, const //// Append With Convert Encoding and Preprocessing API //// ///////////////////////////////////////////////////////// -size_t myhtml_string_append_with_convert_encoding_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars) +size_t myhtml_string_append_with_convert_encoding_with_preprocessing(mycore_string_t* str, const char* buff, size_t length, myencoding_t encoding, bool emit_null_chars) { - myhtml_encoding_result_t res; - myhtml_encoding_result_clean(&res); + myencoding_result_t res; + myencoding_result_clean(&res); return myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &res, buff, length, encoding, emit_null_chars); } -size_t myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars) +size_t myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(mycore_string_t* str, myencoding_result_t* res, const char* buff, size_t length, myencoding_t encoding, bool emit_null_chars) { - MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0); + MyCORE_STRING_REALLOC_IF_NEED(str, (length + 1), 0); unsigned const char* u_buff = (unsigned const char*)buff; - const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding); + const myencoding_custom_f func = myencoding_get_function_by_id(encoding); for (size_t i = 0; i < length; i++) { - if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) { - MyHTML_STRING_REALLOC_IF_NEED(str, 5, 1); + if(func(u_buff[i], res) == MyENCODING_STATUS_OK) { + MyCORE_STRING_REALLOC_IF_NEED(str, 5, 1); - size_t len = myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]); + size_t len = myencoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]); if(len == 1) { if(str->data[str->length] == '\r') { @@ -350,12 +164,16 @@ size_t myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(myhtm } else { str->length++; + + MyCORE_STRING_REALLOC_IF_NEED(str, 0, 2); + str->data[str->length] = '\0'; + return str->length; } } else if(str->data[str->length] == 0x00 && emit_null_chars == false) { - myhtml_string_realloc(str, (str->size + 5)); + mycore_string_realloc(str, (str->size + 5)); // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) str->data[str->length] = (char)0xEF; str->length++; @@ -368,32 +186,32 @@ size_t myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(myhtm } } - MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1); + MyCORE_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1); return 0; } -size_t myhtml_string_append_lowercase_with_convert_encoding_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars) +size_t myhtml_string_append_lowercase_with_convert_encoding_with_preprocessing(mycore_string_t* str, const char* buff, size_t length, myencoding_t encoding, bool emit_null_chars) { - myhtml_encoding_result_t res; - myhtml_encoding_result_clean(&res); + myencoding_result_t res; + myencoding_result_clean(&res); return myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &res, buff, length, encoding, emit_null_chars); } -size_t myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars) +size_t myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(mycore_string_t* str, myencoding_result_t* res, const char* buff, size_t length, myencoding_t encoding, bool emit_null_chars) { - MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0); + MyCORE_STRING_REALLOC_IF_NEED(str, (length + 1), 0); unsigned const char* u_buff = (unsigned const char*)buff; - const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding); + const myencoding_custom_f func = myencoding_get_function_by_id(encoding); for (size_t i = 0; i < length; i++) { - if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) { - MyHTML_STRING_REALLOC_IF_NEED(str, 5, 1); + if(func(u_buff[i], res) == MyENCODING_STATUS_OK) { + MyCORE_STRING_REALLOC_IF_NEED(str, 5, 1); - size_t len = myhtml_encoding_codepoint_to_lowercase_ascii_utf_8(res->result, &str->data[str->length]); + size_t len = myencoding_codepoint_to_lowercase_ascii_utf_8(res->result, &str->data[str->length]); if(len == 1) { if(str->data[str->length] == '\r') { @@ -405,12 +223,16 @@ size_t myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preproces } else { str->length++; + + MyCORE_STRING_REALLOC_IF_NEED(str, 0, 2); + str->data[str->length] = '\0'; + return str->length; } } else if(str->data[str->length] == 0x00 && emit_null_chars == false) { - myhtml_string_realloc(str, (str->size + 5)); + mycore_string_realloc(str, (str->size + 5)); // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) str->data[str->length] = (char)0xEF; str->length++; @@ -423,182 +245,7 @@ size_t myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preproces } } - MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1); + MyCORE_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1); return 0; } - -///////////////////////////////////////////////////////// -//// Append With Convert Encoding without Preprocessing API -//// -///////////////////////////////////////////////////////// -void myhtml_string_append_with_convert_encoding(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding) -{ - myhtml_encoding_result_t res; - myhtml_encoding_result_clean(&res); - - myhtml_string_append_chunk_with_convert_encoding(str, &res, buff, length, encoding); -} - -void myhtml_string_append_chunk_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding) -{ - unsigned const char* u_buff = (unsigned const char*)buff; - const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding); - - for (size_t i = 0; i < length; i++) - { - if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) { - MyHTML_STRING_REALLOC_IF_NEED(str, 5, 0); - str->length += myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]); - } - } - - MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1); -} - -void myhtml_string_append_one_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char data, myhtml_encoding_t encoding) -{ - const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding); - - if(func((unsigned const char)data, res) == MyHTML_ENCODING_STATUS_OK) { - MyHTML_STRING_REALLOC_IF_NEED(str, 5, 0); - str->length += myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]); - - MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT_REALLOC('\0', str); - } -} - - -void myhtml_string_append_lowercase_ascii_with_convert_encoding(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding) -{ - myhtml_encoding_result_t res; - myhtml_encoding_result_clean(&res); - - myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(str, &res, buff, length, encoding); -} - -void myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding) -{ - unsigned const char* u_buff = (unsigned const char*)buff; - const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding); - - for (size_t i = 0; i < length; i++) - { - if(func(u_buff[i], res) == MyHTML_ENCODING_STATUS_OK) { - MyHTML_STRING_REALLOC_IF_NEED(str, 5, 1); - - size_t insert_len = myhtml_encoding_codepoint_to_ascii_utf_8(res->result, &str->data[str->length]); - - if(insert_len == 1) { - str->data[str->length] = (const char)myhtml_string_chars_lowercase_map[ u_buff[i] ]; - } - - str->length += insert_len; - } - } - - MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT('\0', str, 1); -} - -void myhtml_string_copy(myhtml_string_t* dest, myhtml_string_t* target) -{ - myhtml_string_append(dest, target->data, target->length); -} - -size_t myhtml_string_raw_copy(char* str1, const char* str2, size_t size) -{ - str1[size] = '\0'; - - while(size) { - size--; - str1[size] = str2[size]; - } - - return size; -} - -size_t myhtml_string_raw_set_replacement_character(myhtml_string_t* target, size_t position) -{ - // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) - target->data[(position)] = (char)0xEF; - target->data[(position + 1)] = (char)0xBF; - target->data[(position + 2)] = (char)0xBD; - - return 3; -} - -void myhtml_string_append_with_replacement_null_characters_only(myhtml_string_t* str, const char* buff, size_t length) -{ - MyHTML_STRING_REALLOC_IF_NEED(str, (length + 1), 0); - - unsigned char *data = (unsigned char*)str->data; - const unsigned char *u_buff = (const unsigned char*)buff; - - for (size_t i = 0; i < length; i++) - { - if(u_buff[i] == 0x00) { - myhtml_string_realloc(str, (str->size + 5)); - data = (unsigned char*)str->data; - - // Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) - data[str->length] = 0xEF; str->length++; - data[str->length] = 0xBF; str->length++; - data[str->length] = 0xBD; - } - else - data[str->length] = u_buff[i]; - - str->length++; - } - - str->data[str->length] = '\0'; -} - -void myhtml_string_stay_only_whitespace(myhtml_string_t* target) -{ - char *data = target->data; - size_t pos = 0; - - for(size_t i = 0; i < target->length; i++) - { - if(myhtml_mystring_whitespace(data[i], ==, ||)) { - data[pos] = data[i]; - pos++; - } - } - - target->length = pos; -} - -size_t myhtml_string_crop_whitespace_from_begin(myhtml_string_t* target) -{ - char *data = target->data; - size_t i; - - for(i = 0; i < target->length; i++) { - if(myhtml_mystring_whitespace(data[i], !=, &&)) - break; - } - - if(i) - target->data = mchar_async_crop_first_chars_without_cache(target->data, i); - - target->length -= i; - - return i; -} - -size_t myhtml_string_whitespace_from_begin(myhtml_string_t* target) -{ - char *data = target->data; - size_t i; - - for(i = 0; i < target->length; i++) { - if(myhtml_mystring_whitespace(data[i], !=, &&)) - break; - } - - return i; -} - - diff --git a/source/myhtml/mystring.h b/source/myhtml/mystring.h index 0a0c02c..2906d59 100644 --- a/source/myhtml/mystring.h +++ b/source/myhtml/mystring.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -25,129 +25,31 @@ #ifdef __cplusplus extern "C" { #endif - + #include "myhtml/myosi.h" -#include "myhtml/encoding.h" -#include "myhtml/utils/mchar_async.h" + +#include "myencoding/encoding.h" +#include "myencoding/mystring.h" + +#include "mycore/mystring.h" +#include "mycore/utils/mchar_async.h" #define myhtml_string_get(str, attr) str->attr #define myhtml_string_set(str, attr) myhtml_string_get(str, attr) #define myhtml_string_len(str) myhtml_string_get(str, length) -#define MyHTML_STRING_PREPROCESSOR_CHECK_BUFFER_SIZE(str, unsigned_data) \ - if(str->length >= str->size) { \ - myhtml_string_realloc(str, (str->length + 5)); \ - \ - unsigned_data = (unsigned char*)str->data; \ - } - -#define MyHTML_STRING_REALLOC_IF_NEED(str, need_length, upto) \ - if((str->length + need_length) >= str->size) { \ - myhtml_string_realloc(str, (str->length + need_length + upto)); \ - } \ - -#define MyHTML_STRING_APPEND_BYTE_WITHOUT_REALLOC(onechar, str) \ - str->data[str->length] = onechar; \ - str->length++ - -#define MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT(onechar, str, upto) \ - if(str->length >= str->size) { \ - myhtml_string_realloc(str, (str->length + upto)); \ - } \ - str->data[str->length] = onechar - -#define MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT_REALLOC(onechar, str) \ - str->data[str->length] = onechar - -#define MyHTML_STRING_APPEND_BYTE(onechar, str, upto) \ - MyHTML_STRING_APPEND_BYTE_WITHOUT_INCREMENT(onechar, str, upto); \ - str->length++ - -#define myhtml_mystring_whitespace(onechar, action, logic) \ - (onechar action ' ' logic \ - onechar action '\t' logic \ - onechar action '\n' logic \ - onechar action '\f' logic \ - onechar action '\r') - -struct myhtml_string { - char* data; - size_t size; - size_t length; - - mchar_async_t *mchar; - size_t node_idx; -}; - -struct myhtml_string_raw { - char* data; - size_t size; - size_t length; -}; - -typedef size_t myhtml_string_index_t; - -char * myhtml_string_init(mchar_async_t *mchar, size_t node_idx, myhtml_string_t* str, size_t size); -char * myhtml_string_realloc(myhtml_string_t *str, size_t new_size); - -void myhtml_string_clean(myhtml_string_t* str); -void myhtml_string_clean_all(myhtml_string_t* str); -myhtml_string_t * myhtml_string_destroy(myhtml_string_t* str, bool destroy_obj); - -void myhtml_string_raw_clean(myhtml_string_raw_t* str_raw); -void myhtml_string_raw_clean_all(myhtml_string_raw_t* str_raw); -myhtml_string_raw_t * myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj); - -/* basic api */ -char * myhtml_string_data_alloc(mchar_async_t *mchar, size_t node_id, size_t size); -char * myhtml_string_data_realloc(mchar_async_t *mchar, size_t node_id, char *data, size_t len_to_copy, size_t size); -void myhtml_string_data_free(mchar_async_t *mchar, size_t node_id, char *data); - -char * myhtml_string_data(myhtml_string_t *str); -size_t myhtml_string_length(myhtml_string_t *str); -size_t myhtml_string_size(myhtml_string_t *str); -char * myhtml_string_data_set(myhtml_string_t *str, char *data); -size_t myhtml_string_size_set(myhtml_string_t *str, size_t size); -size_t myhtml_string_length_set(myhtml_string_t *str, size_t length); - -/* append */ -void myhtml_string_append(myhtml_string_t* str, const char* data, size_t length); -void myhtml_string_append_one(myhtml_string_t* str, const char data); -void myhtml_string_append_lowercase(myhtml_string_t* str, const char* data, size_t length); -void myhtml_string_append_with_convert_encoding(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding); - -/* append with convert encoding */ -void myhtml_string_append_chunk_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding); -void myhtml_string_append_one_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char data, myhtml_encoding_t encoding); - -/* append with convert encoding lowercase */ -void myhtml_string_append_lowercase_ascii_with_convert_encoding(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding); -void myhtml_string_append_chunk_lowercase_ascii_with_convert_encoding(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding); - -/* Oh, long long long name */ - /* append with convert encoding with preprocessing */ -size_t myhtml_string_append_with_convert_encoding_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars); -size_t myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars); +size_t myhtml_string_append_with_convert_encoding_with_preprocessing(mycore_string_t* str, const char* buff, size_t length, myencoding_t encoding, bool emit_null_chars); +size_t myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(mycore_string_t* str, myencoding_result_t* res, const char* buff, size_t length, myencoding_t encoding, bool emit_null_chars); /* append with convert encoding lowercase with preprocessing */ -size_t myhtml_string_append_lowercase_with_convert_encoding_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars); -size_t myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(myhtml_string_t* str, myhtml_encoding_result_t* res, const char* buff, size_t length, myhtml_encoding_t encoding, bool emit_null_chars); - -void myhtml_string_append_with_replacement_null_characters_only(myhtml_string_t* str, const char* buff, size_t length); +size_t myhtml_string_append_lowercase_with_convert_encoding_with_preprocessing(mycore_string_t* str, const char* buff, size_t length, myencoding_t encoding, bool emit_null_chars); +size_t myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(mycore_string_t* str, myencoding_result_t* res, const char* buff, size_t length, myencoding_t encoding, bool emit_null_chars); /* append with preprocessing */ -size_t myhtml_string_before_append_any_preprocessing(myhtml_string_t* str, const char* buff, size_t length, size_t last_position); -size_t myhtml_string_append_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, bool emit_null_chars); -size_t myhtml_string_append_lowercase_with_preprocessing(myhtml_string_t* str, const char* buff, size_t length, bool emit_null_chars); -size_t myhtml_string_raw_set_replacement_character(myhtml_string_t* target, size_t position); - -/* other functions */ -void myhtml_string_copy(myhtml_string_t* dest, myhtml_string_t* target); -size_t myhtml_string_raw_copy(char* str1, const char* str2, size_t size); -void myhtml_string_stay_only_whitespace(myhtml_string_t* target); -size_t myhtml_string_crop_whitespace_from_begin(myhtml_string_t* target); -size_t myhtml_string_whitespace_from_begin(myhtml_string_t* target); +size_t myhtml_string_before_append_any_preprocessing(mycore_string_t* str, const char* buff, size_t length, size_t last_position); +size_t myhtml_string_append_with_preprocessing(mycore_string_t* str, const char* buff, size_t length, bool emit_null_chars); +size_t myhtml_string_append_lowercase_with_preprocessing(mycore_string_t* str, const char* buff, size_t length, bool emit_null_chars); #ifdef __cplusplus } /* extern "C" */ diff --git a/source/myhtml/parser.c b/source/myhtml/parser.c index 95cdc1b..95a7764 100644 --- a/source/myhtml/parser.c +++ b/source/myhtml/parser.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -24,19 +24,19 @@ void myhtml_parser_stream(mythread_id_t thread_id, void* ctx) { mythread_queue_node_t *qnode = (mythread_queue_node_t*)ctx; - if((qnode->tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE) == 0) { - while(myhtml_rules_tree_dispatcher(qnode->tree, qnode->token)){} + if((((myhtml_tree_t*)(qnode->context))->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE) == 0) { + while(myhtml_rules_tree_dispatcher(qnode->context, qnode->args)){} } } -size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) +size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) { - myhtml_incoming_buffer_t *buffer = myhtml_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); + mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); size_t relative_begin = begin - buffer->offset; // if token data length in one buffer then print them all at once if((relative_begin + length) <= buffer->size) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char); else myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -57,7 +57,7 @@ size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_ size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position); if(relative_end > 0) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) save_position = myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char); else save_position = myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -72,7 +72,7 @@ size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_ size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position); if(length > 0) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char); else myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -87,14 +87,14 @@ size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_ return str->length; } -size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) +size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) { - myhtml_incoming_buffer_t *buffer = myhtml_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); + mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); size_t relative_begin = begin - buffer->offset; // if token data length in one buffer then print them all at once if((relative_begin + length) <= buffer->size) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) myhtml_string_append_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char); else myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -115,7 +115,7 @@ size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, myhtml_string_t* size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position); if(relative_end > 0) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) save_position = myhtml_string_append_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char); else save_position = myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -131,12 +131,12 @@ size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, myhtml_string_t* size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position); if(length > 0) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) myhtml_string_append_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char); else myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, - &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), - proc_entry->encoding, proc_entry->emit_null_char); + &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), + proc_entry->encoding, proc_entry->emit_null_char); } break; @@ -146,9 +146,9 @@ size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, myhtml_string_t* return str->length; } -size_t myhtml_parser_token_data_to_string_charef(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) +size_t myhtml_parser_token_data_to_string_charef(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) { - myhtml_incoming_buffer_t *buffer = myhtml_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); + mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); size_t relative_begin = begin - buffer->offset; // if token data length in one buffer then print them all at once @@ -186,8 +186,8 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) { mythread_queue_node_t *qnode = (mythread_queue_node_t*)ctx; - myhtml_tree_t* tree = qnode->tree; - myhtml_token_node_t* token = qnode->token; + myhtml_tree_t* tree = qnode->context; + myhtml_token_node_t* token = qnode->args; /* * Tree can not be built without tokens @@ -201,8 +201,8 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) * return true * we need check both, 1 and 2 */ - if((qnode->tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN) && - (qnode->tree->parse_flags & 2)) + if((tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN) && + (tree->parse_flags & 2)) { if(tree->callback_before_token) tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx); @@ -215,15 +215,21 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) return; } + size_t mchar_node_id; +#ifndef MyCORE_BUILD_WITHOUT_THREADS + if(tree->myhtml->thread_batch) + mchar_node_id = tree->async_args[(thread_id + tree->myhtml->thread_batch->id_increase)].mchar_node_id; + else +#endif + mchar_node_id = tree->async_args[thread_id].mchar_node_id; + if(tree->callback_before_token) tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx); - size_t mchar_node_id = qnode->tree->async_args[thread_id].mchar_node_id; - if(token->tag_id == MyHTML_TAG__TEXT || token->tag_id == MyHTML_TAG__COMMENT) { - myhtml_string_init(tree->mchar, mchar_node_id, &token->str, (token->raw_length + 1)); + mycore_string_init(tree->mchar, mchar_node_id, &token->str, (token->raw_length + 1)); token->attr_first = NULL; token->attr_last = NULL; @@ -246,7 +252,7 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) } else if(token->attr_first) { - myhtml_string_clean_all(&token->str); + mycore_string_clean_all(&token->str); myhtml_token_attr_t* attr = token->attr_first; myhtml_data_process_entry_t proc_entry; @@ -257,22 +263,22 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = tree->encoding; - myhtml_string_init(tree->mchar, mchar_node_id, &attr->key, (attr->raw_key_length + 1)); + mycore_string_init(tree->mchar, mchar_node_id, &attr->key, (attr->raw_key_length + 1)); myhtml_parser_token_data_to_string_lowercase(tree, &attr->key, &proc_entry, attr->raw_key_begin, attr->raw_key_length); } else - myhtml_string_clean_all(&attr->key); + mycore_string_clean_all(&attr->key); if(attr->raw_value_length) { myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = tree->encoding; proc_entry.is_attributes = true; - myhtml_string_init(tree->mchar, mchar_node_id, &attr->value, (attr->raw_value_length + 1)); + mycore_string_init(tree->mchar, mchar_node_id, &attr->value, (attr->raw_value_length + 1)); myhtml_parser_token_data_to_string_charef(tree, &attr->value, &proc_entry, attr->raw_value_begin, attr->raw_value_length); } else - myhtml_string_clean_all(&attr->value); + mycore_string_clean_all(&attr->value); attr = attr->next; } @@ -281,7 +287,7 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) token->attr_first = NULL; token->attr_last = NULL; - myhtml_string_clean_all(&token->str); + mycore_string_clean_all(&token->str); } token->type |= MyHTML_TOKEN_TYPE_DONE; diff --git a/source/myhtml/parser.h b/source/myhtml/parser.h index a4d1ab9..d04e043 100644 --- a/source/myhtml/parser.h +++ b/source/myhtml/parser.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -37,9 +37,9 @@ void myhtml_parser_stream(mythread_id_t thread_id, void* ctx); void myhtml_parser_worker(mythread_id_t thread_id, void* ctx); void myhtml_parser_worker_stream(mythread_id_t thread_id, void* ctx); -size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length); -size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length); -size_t myhtml_parser_token_data_to_string_charef(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length); +size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length); +size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length); +size_t myhtml_parser_token_data_to_string_charef(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length); #ifdef __cplusplus } /* extern "C" */ diff --git a/source/myhtml/perf.c b/source/myhtml/perf.c deleted file mode 100644 index f95faab..0000000 --- a/source/myhtml/perf.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Authors: insoreiges@gmail.com (Evgeny Yakovlev), lex.borisov@gmail.com (Alexander Borisov) -*/ - -/** - * Platform-specific hdef performance clock value. - */ - -#include "myhtml/myhtml.h" -#include - -#if !defined(IS_OS_WINDOWS) -#include -#endif - -#if !defined(MyHTML_WITH_PERF) - -uint64_t myhtml_hperf_res(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_PERF_ERROR_COMPILED_WITHOUT_PERF; - - return 0; -} - -uint64_t myhtml_hperf_clock(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_PERF_ERROR_COMPILED_WITHOUT_PERF; - - return 0; -} - -#else - -#if defined(__APPLE__) -#include -#include -#elif defined(IS_OS_WINDOWS) -#endif - -#if defined(MyHTML_FORCE_RDTSC) /* Force using rdtsc, useful for comparison */ - -/** - * Get CPU rdtsc frequency. - * - * TODO: I think using rdtsc for measuring user-space counters is not correct: - * - rdtsc does not have a constant rate. instead ot is scaled to physical core's internal clock which changes due to power saving modes on modern CPUs - * - rdtsc is software-emulated in virtual machines which will introduce an inconsistency in reported ticks - * - user space process can be preempted between consecutive rdtsc measures but the physical clock will still tick while it is executing a different thread. - * also think what would happen if preempted process will be re-scheduled on a different physical core which has a different tsc value. - * - computing rdtsc frequency produces unreliable results (due to all of the above) - * - * Consider using platform-specific monotonic hperf timers (ftrace/dtrace) or even clock(). - */ -uint64_t myhtml_hperf_res(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_OK; - -#if defined(__APPLE__) && defined(CTL_HW) && defined(HW_CPU_FREQ) - unsigned long long freq = 0; - - /* OSX kernel: sysctl(CTL_HW | HW_CPU_FREQ) */ - size_t len = sizeof(freq); - int mib[2] = {CTL_HW, HW_CPU_FREQ}; - - int error = sysctl(mib, 2, &freq, &len, NULL, 0); - if (error) { - if(status) - *status = MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK; - - return 0; - } - - return freq; - -#elif defined(__linux__) - unsigned long long freq = 0; - - /* Use procfs on linux */ - FILE* fp = NULL; - fp = fopen("/proc/cpuinfo", "r"); - if (fp == NULL) { - if(status) - *status = MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK; - - return 0; - } - - /* Find 'CPU MHz :' */ - char buf[1024] = {0}; - double fval = 0.0; - while (fgets(buf, sizeof(buf), fp) != NULL) { - if (sscanf(buf, "cpu MHz : %lf\n", &fval) == 1) { - freq = (unsigned long long)(fval * 1000000ull); - break; - } - } - - fclose(fp); - return freq; - -#else -# warning Cant figure out cpu frequency on this platfrom - - if(status) - *status = MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK; - - return 0; -#endif /* defined __APPLE__ || __linux__ ... */ -} - -uint64_t myhtml_hperf_clock(myhtml_status_t *status) -{ - uint64_t x; - - __asm__ volatile ( - "cpuid\n\t" /* cpuid serializes any out-of-order prefetches before executing rdtsc (clobbers ebx, ecx, edx) */ - "rdtsc\n\t" - "shl $32, %%rdx\n\t" - "or %%rdx, %%rax" - : "=a" (x) - : - : "rdx", "ebx", "ecx"); - - return x; -} - -#elif defined(_POSIX_TIMERS) && defined(_POSIX_CPUTIME) \ - && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 199309L) /* Do we have clock_gettime? */ - -#define NSEC_PER_SECOND 1000000000ull -#define TIMESPEC_TO_USEC(tspec) (((uint64_t)(tspec).tv_sec * NSEC_PER_SECOND) + (tspec).tv_nsec) - -uint64_t myhtml_hperf_res(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_OK; - - struct timespec tspec; - int error = clock_getres(CLOCK_PROCESS_CPUTIME_ID, &tspec); - if (error) { - if(status) - *status = MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK; - - return 0; - } - - unsigned long long ticks_per_sec = (unsigned long long)((double)NSEC_PER_SECOND / TIMESPEC_TO_USEC(tspec)); - return ticks_per_sec; -} - -uint64_t myhtml_hperf_clock(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_OK; - - struct timespec tspec; - int error = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tspec); - if (error) { - if(status) - *status = MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK; - - return 0; - } - - return TIMESPEC_TO_USEC(tspec); -} - -#elif defined(__APPLE__) && defined(__MACH__) - -/* - * TODO: on OSX we can use clock_get_time: http://stackoverflow.com/questions/5167269/clock-gettime-alternative-in-mac-os-x - * Or this: http://web.archive.org/web/20100517095152/http://www.wand.net.nz/~smr26/wordpress/2009/01/19/monotonic-time-in-mac-os-x/comment-page-1/ - */ - -// TODO: this is incorrect plug for mac os x -// look at links before this comment - -#include - -uint64_t myhtml_hperf_res(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_OK; - - unsigned long long freq = 0; - - size_t len = sizeof(freq); - int mib[2] = {CTL_HW, HW_CPU_FREQ}; - - int error = sysctl(mib, 2, &freq, &len, NULL, 0); - if (error) { - if(status) - *status = MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK; - - return 0; - } - - return freq; -} - -uint64_t myhtml_hperf_clock(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_OK; - - return mach_absolute_time(); -} - -#else - -# warning No hperf implementation for this platform - -uint64_t myhtml_hperf_res(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK; - - return 0; -} - -uint64_t myhtml_hperf_clock(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_PERF_ERROR_FIND_CPU_CLOCK; - - return 0; -} - -#endif /* defined(MyHTML_FORCE_RDTSC) ... */ -#endif /* MyHTML_WITH_PERF */ - -#define _MyHTML_CHECK_STATUS_AND_PRINT_ERROR \ - if(status == MyHTML_STATUS_PERF_ERROR_COMPILED_WITHOUT_PERF) { \ - fprintf(fh, "MyHTML: Library compiled without perf source. Please, build library with -DMyHTML_WITH_PERF flag\n"); \ - } \ - else if(status) { \ - fprintf(fh, "MyHTML: Something wrong! Perhaps, your platform does not support the measurement of performance\n"); \ - } \ - else - -myhtml_status_t myhtml_hperf_print(const char *name, uint64_t x, uint64_t y, FILE *fh) { - myhtml_status_t status; - - unsigned long long freq = myhtml_hperf_res(&status); - - if(freq) { - _MyHTML_CHECK_STATUS_AND_PRINT_ERROR { - fprintf(fh, "%s: %0.5f\n", name, (((float)(y - x) / (float)freq))); - } - } - - return status; -} - -myhtml_status_t myhtml_hperf_print_by_val(const char *name, uint64_t x, FILE *fh) { - myhtml_status_t status; - - unsigned long long freq = myhtml_hperf_res(&status); - - if(freq) { - _MyHTML_CHECK_STATUS_AND_PRINT_ERROR { - fprintf(fh, "%s: %0.5f\n", name, ((float)x / (float)freq)); - } - } - - return status; -} - - diff --git a/source/myhtml/rules.c b/source/myhtml/rules.c index 3921952..17e08ce 100644 --- a/source/myhtml/rules.c +++ b/source/myhtml/rules.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,16 +20,16 @@ #include "myhtml/rules.h" -void myhtml_insertion_fix_emit_for_text_begin_ws(myhtml_token_node_t* token) +void myhtml_insertion_fix_emit_for_text_begin_ws(myhtml_token_t* token, myhtml_token_node_t* node) { - myhtml_token_node_wait_for_done(token); - myhtml_string_crop_whitespace_from_begin(&token->str); + myhtml_token_node_wait_for_done(token, node); + mycore_string_crop_whitespace_from_begin(&node->str); } myhtml_token_node_t * myhtml_insertion_fix_split_for_text_begin_ws(myhtml_tree_t* tree, myhtml_token_node_t* token) { - myhtml_token_node_wait_for_done(token); - size_t len = myhtml_string_whitespace_from_begin(&token->str); + myhtml_token_node_wait_for_done(tree->token, token); + size_t len = mycore_string_whitespace_from_begin(&token->str); if(len == 0) return NULL; @@ -40,9 +40,9 @@ myhtml_token_node_t * myhtml_insertion_fix_split_for_text_begin_ws(myhtml_tree_t if(new_token == NULL) return NULL; - myhtml_string_init(tree->mchar, tree->mchar_node_id, &new_token->str, (len + 2)); + mycore_string_init(tree->mchar, tree->mchar_node_id, &new_token->str, (len + 2)); - myhtml_string_append(&new_token->str, token->str.data, len); + mycore_string_append(&new_token->str, token->str.data, len); new_token->type |= MyHTML_TOKEN_TYPE_DONE; @@ -55,9 +55,9 @@ myhtml_token_node_t * myhtml_insertion_fix_split_for_text_begin_ws(myhtml_tree_t void myhtml_insertion_fix_for_null_char_drop_all(myhtml_tree_t* tree, myhtml_token_node_t* token) { - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); - myhtml_string_t *str = &token->str; + mycore_string_t *str = &token->str; size_t len = str->length; size_t offset = 0; @@ -93,7 +93,7 @@ bool myhtml_insertion_mode_initial(myhtml_tree_t* tree, myhtml_token_node_t* tok return false; } - myhtml_insertion_fix_emit_for_text_begin_ws(token); + myhtml_insertion_fix_emit_for_text_begin_ws(tree->token, token); // default, other token tree->compat_mode = MyHTML_TREE_COMPAT_MODE_QUIRKS; @@ -109,7 +109,7 @@ bool myhtml_insertion_mode_initial(myhtml_tree_t* tree, myhtml_token_node_t* tok case MyHTML_TAG__DOCTYPE: { - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); myhtml_token_release_and_check_doctype_attributes(tree->token, token, &tree->doctype); @@ -183,7 +183,7 @@ bool myhtml_insertion_mode_before_html(myhtml_tree_t* tree, myhtml_token_node_t* break; } - myhtml_insertion_fix_emit_for_text_begin_ws(token); + myhtml_insertion_fix_emit_for_text_begin_ws(tree->token, token); // default, other token myhtml_tree_node_insert_root(tree, NULL, MyHTML_NAMESPACE_HTML); @@ -246,7 +246,7 @@ bool myhtml_insertion_mode_before_head(myhtml_tree_t* tree, myhtml_token_node_t* break; } - myhtml_insertion_fix_emit_for_text_begin_ws(token); + myhtml_insertion_fix_emit_for_text_begin_ws(tree->token, token); // default, other token tree->node_head = myhtml_tree_node_insert(tree, MyHTML_TAG_HEAD, MyHTML_NAMESPACE_HTML); @@ -1207,8 +1207,8 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok myhtml_tree_node_t* top_node = tree->open_elements->list[0]; if(top_node->token) { - myhtml_token_node_wait_for_done(token); - myhtml_token_node_wait_for_done(top_node->token); + myhtml_token_node_wait_for_done(tree->token, token); + myhtml_token_node_wait_for_done(tree->token, top_node->token); myhtml_token_node_attr_copy_with_check(tree->token, token, top_node->token, tree->mcasync_rules_attr_id); } else { @@ -1259,8 +1259,8 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok myhtml_tree_node_t* top_node = tree->open_elements->list[1]; if(top_node->token) { - myhtml_token_node_wait_for_done(token); - myhtml_token_node_wait_for_done(top_node->token); + myhtml_token_node_wait_for_done(tree->token, token); + myhtml_token_node_wait_for_done(tree->token, top_node->token); myhtml_token_node_attr_copy_with_check(tree->token, token, top_node->token, tree->mcasync_rules_attr_id); } else { @@ -1701,7 +1701,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok myhtml_tree_node_insert_html_element(tree, token); myhtml_tree_open_elements_pop(tree); - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); if(myhtml_token_attr_match_case(tree->token, token, "type", 4, "hidden", 6) == NULL) { tree->flags ^= (tree->flags & MyHTML_TREE_FLAGS_FRAMESET_OK); } @@ -1902,7 +1902,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok { myhtml_tree_active_formatting_reconstruction(tree); - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); myhtml_token_adjust_mathml_attributes(token); myhtml_token_adjust_foreign_attributes(token); @@ -1920,7 +1920,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok { myhtml_tree_active_formatting_reconstruction(tree); - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); myhtml_token_adjust_svg_attributes(token); myhtml_token_adjust_foreign_attributes(token); @@ -2182,7 +2182,8 @@ bool myhtml_insertion_mode_in_table(myhtml_tree_t* tree, myhtml_token_node_t* to case MyHTML_TAG_INPUT: { - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); + if(myhtml_token_attr_match_case(tree->token, token, "type", 4, "hidden", 6) == NULL) { tree->foster_parenting = true; myhtml_insertion_mode_in_body(tree, token); @@ -3301,7 +3302,7 @@ bool myhtml_insertion_mode_after_body(myhtml_tree_t* tree, myhtml_token_node_t* case MyHTML_TAG__COMMENT: { if(tree->open_elements->length == 0) { - MyHTML_DEBUG_ERROR("after body state; open_elements length < 1"); + MyCORE_DEBUG_ERROR("after body state; open_elements length < 1"); break; } @@ -3396,8 +3397,8 @@ bool myhtml_insertion_mode_in_frameset(myhtml_tree_t* tree, myhtml_token_node_t* // parse error /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */ - myhtml_token_node_wait_for_done(token); - myhtml_string_stay_only_whitespace(&token->str); + myhtml_token_node_wait_for_done(tree->token, token); + mycore_string_stay_only_whitespace(&token->str); if(token->str.length) myhtml_tree_node_insert_text(tree, token); @@ -3488,8 +3489,8 @@ bool myhtml_insertion_mode_after_frameset(myhtml_tree_t* tree, myhtml_token_node // parse error /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */ - myhtml_token_node_wait_for_done(token); - myhtml_string_stay_only_whitespace(&token->str); + myhtml_token_node_wait_for_done(tree->token, token); + mycore_string_stay_only_whitespace(&token->str); if(token->str.length) myhtml_tree_node_insert_text(tree, token); @@ -3684,7 +3685,7 @@ bool myhtml_insertion_mode_in_foreign_content_start_other(myhtml_tree_t* tree, m { myhtml_tree_node_t* adjusted_node = myhtml_tree_adjusted_current_node(tree); - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); if(adjusted_node->ns == MyHTML_NAMESPACE_MATHML) { myhtml_token_adjust_mathml_attributes(token); @@ -3738,7 +3739,7 @@ bool myhtml_insertion_mode_in_foreign_content(myhtml_tree_t* tree, myhtml_token_ // parse error /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:NULL_CHAR LEVEL:ERROR */ - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); myhtml_token_set_replacement_character_for_null_token(tree, token); } @@ -3812,12 +3813,14 @@ bool myhtml_insertion_mode_in_foreign_content(myhtml_tree_t* tree, myhtml_token_ if(token->tag_id == MyHTML_TAG_FONT) { - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); if(myhtml_token_attr_by_name(token, "color", 5) == NULL && myhtml_token_attr_by_name(token, "face" , 4) == NULL && myhtml_token_attr_by_name(token, "size" , 4) == NULL) + { return myhtml_insertion_mode_in_foreign_content_start_other(tree, token); + } } if(tree->fragment == NULL) { @@ -3854,7 +3857,7 @@ bool myhtml_rules_check_for_first_newline(myhtml_tree_t* tree, myhtml_token_node if(tree->flags &MyHTML_TREE_FLAGS_PARSE_FLAG_EMIT_NEWLINE) { if(token->tag_id == MyHTML_TAG__TEXT) { - myhtml_token_node_wait_for_done(token); + myhtml_token_node_wait_for_done(tree->token, token); if(token->str.length > 0) { if(token->str.data[0] == '\n') { @@ -3924,9 +3927,9 @@ bool myhtml_rules_tree_dispatcher(myhtml_tree_t* tree, myhtml_token_node_t* toke return reprocess; } -myhtml_status_t myhtml_rules_init(myhtml_t* myhtml) +mystatus_t myhtml_rules_init(myhtml_t* myhtml) { - myhtml->insertion_func = (myhtml_insertion_f*)myhtml_malloc(sizeof(myhtml_insertion_f) * MyHTML_INSERTION_MODE_LAST_ENTRY); + myhtml->insertion_func = (myhtml_insertion_f*)mycore_malloc(sizeof(myhtml_insertion_f) * MyHTML_INSERTION_MODE_LAST_ENTRY); if(myhtml->insertion_func == NULL) return MyHTML_STATUS_RULES_ERROR_MEMORY_ALLOCATION; diff --git a/source/myhtml/rules.h b/source/myhtml/rules.h index 1724e22..7610115 100644 --- a/source/myhtml/rules.h +++ b/source/myhtml/rules.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -30,8 +30,7 @@ extern "C" { #include "myhtml/myhtml.h" #include "myhtml/tree.h" - -myhtml_status_t myhtml_rules_init(myhtml_t* myhtml); +mystatus_t myhtml_rules_init(myhtml_t* myhtml); void myhtml_rules_stop_parsing(myhtml_tree_t* tree); bool myhtml_rules_tree_dispatcher(myhtml_tree_t* tree, myhtml_token_node_t* token); @@ -39,7 +38,6 @@ bool myhtml_insertion_mode_in_body_other_end_tag(myhtml_tree_t* tree, myhtml_tok bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* token); bool myhtml_insertion_mode_in_template(myhtml_tree_t* tree, myhtml_token_node_t* token); - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/source/myhtml/serialization.c b/source/myhtml/serialization.c index 75003f3..c142c91 100755 --- a/source/myhtml/serialization.c +++ b/source/myhtml/serialization.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2016 Alexander Borisov + Copyright (C) 2016-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -19,22 +19,21 @@ Author: https://github.com/EmielBruijntjes (Emiel Bruijntjes) */ -#include #include "myhtml/serialization.h" /** * Forward declaration of all the functions that are used inside this module */ -static void myhtml_serialization_append(const char* str, size_t size, myhtml_callback_serialize_f callback, void *ptr); -static void myhtml_serialization_append_attr(const char* str, size_t length, myhtml_callback_serialize_f callback, void *ptr); -static void myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, myhtml_callback_serialize_f callback, void *ptr); -static void myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void *ptr); -static void myhtml_serialization_node_append_close(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void *ptr); +static mystatus_t myhtml_serialization_append(const char* str, size_t size, mycore_callback_serialize_f callback, void *ptr); +static mystatus_t myhtml_serialization_append_attr(const char* str, size_t length, mycore_callback_serialize_f callback, void *ptr); +static mystatus_t myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, mycore_callback_serialize_f callback, void *ptr); +static mystatus_t myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void *ptr); +static mystatus_t myhtml_serialization_node_append_close(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void *ptr); /** * See the function myhtml_serialization_tree_buffer */ -bool myhtml_serialization(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str) +mystatus_t myhtml_serialization(myhtml_tree_node_t* scope_node, mycore_string_raw_t* str) { return myhtml_serialization_tree_buffer(scope_node, str); } @@ -42,7 +41,7 @@ bool myhtml_serialization(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* s /** * See the function myhtml_serialization_node_buffer */ -bool myhtml_serialization_node(myhtml_tree_node_t* node, myhtml_string_raw_t* str) +mystatus_t myhtml_serialization_node(myhtml_tree_node_t* node, mycore_string_raw_t* str) { return myhtml_serialization_node_buffer(node, str); } @@ -55,32 +54,41 @@ bool myhtml_serialization_node(myhtml_tree_node_t* node, myhtml_string_raw_t* st * @param ptr user-supplied pointer * @return bool */ -bool myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, myhtml_callback_serialize_f callback, void *ptr) +mystatus_t myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, mycore_callback_serialize_f callback, void *ptr) { myhtml_tree_node_t* node = scope_node; while(node) { - if(!myhtml_serialization_node_callback(node, callback, ptr)) return false; + if(myhtml_serialization_node_callback(node, callback, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; if(node->child) node = node->child; else { while(node != scope_node && node->next == NULL) { - myhtml_serialization_node_append_close(node, callback, ptr); + if(myhtml_serialization_node_append_close(node, callback, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + node = node->parent; } if(node == scope_node) { - if(node != node->tree->document) myhtml_serialization_node_append_close(node, callback, ptr); + if(node != node->tree->document) { + if(myhtml_serialization_node_append_close(node, callback, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + break; } - myhtml_serialization_node_append_close(node, callback, ptr); + if(myhtml_serialization_node_append_close(node, callback, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + node = node->next; } } - return true; + return MyCORE_STATUS_OK; } /** @@ -91,46 +99,73 @@ bool myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, myhtml_c * @param ptr user-supplied pointer * @return bool */ -bool myhtml_serialization_node_callback(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void *ptr) +mystatus_t myhtml_serialization_node_callback(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void *ptr) { switch (node->tag_id) { case MyHTML_TAG__TEXT: { - myhtml_serialization_node_append_text_node(node, callback, ptr); + if(myhtml_serialization_node_append_text_node(node, callback, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + break; } case MyHTML_TAG__COMMENT: { - callback("", 3, ptr); + if(callback("", 3, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + break; } case MyHTML_TAG__DOCTYPE: { - callback("token) { myhtml_tree_attr_t* attr = node->token->attr_first; if(attr->key.data && attr->key.length) { - callback(" ", 1, ptr); - callback(attr->key.data, attr->key.length, ptr); + if(callback(" ", 1, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + + if(callback(attr->key.data, attr->key.length, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; } } - callback(">", 1, ptr); + + if(callback(">", 1, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + break; } default: { size_t length; const char *tag = myhtml_tag_name_by_id(node->tree, node->tag_id, &length); - callback("<", 1, ptr); - callback(tag, length, ptr); - if(node->token) myhtml_serialization_attributes(node->tree, node->token->attr_first, callback, ptr); - callback(">", 1, ptr); + if(callback("<", 1, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + + if(callback(tag, length, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + + if(node->token) { + if(myhtml_serialization_attributes(node->tree, node->token->attr_first, callback, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + + if(callback(">", 1, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + break; } } - return true; + return MyCORE_STATUS_OK; } /** @@ -140,28 +175,33 @@ bool myhtml_serialization_node_callback(myhtml_tree_node_t* node, myhtml_callbac * @param callback * @param ptr */ -void myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, myhtml_callback_serialize_f callback, void* ptr) +mystatus_t myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, mycore_callback_serialize_f callback, void* ptr) { while(attr) { - callback(" ", 1, ptr); + if(callback(" ", 1, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; switch (attr->ns) { case MyHTML_NAMESPACE_XML: - callback("xml:", 4, ptr); + if(callback("xml:", 4, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + break; case MyHTML_NAMESPACE_XMLNS: { /* If the attribute is in the XMLNS namespace and the attribute's local name is not xmlns The attribute's serialized name is the string "xmlns:" followed by the attribute's local name. */ - if(attr->key.data && attr->key.length == 5 && myhtml_strcmp(attr->key.data, "xmlns")) { - callback("xmlns:", 6, ptr); + if(attr->key.data && attr->key.length == 5 && mycore_strcmp(attr->key.data, "xmlns")) { + if(callback("xmlns:", 6, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; } break; } case MyHTML_NAMESPACE_XLINK: { - callback("xlink:", 6, ptr); + if(callback("xlink:", 6, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; break; } @@ -172,14 +212,25 @@ void myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* at size_t length; const char *data = myhtml_attribute_key(attr, &length); - if(data) callback(data, length, ptr); - callback("=\"", 2, ptr); + if(data) { + if(callback(data, length, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + if(callback("=\"", 2, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; data = myhtml_attribute_value(attr, &length); - if(data) myhtml_serialization_append_attr(data, length, callback, ptr); - callback("\"", 1, ptr); + if(data) { + if(myhtml_serialization_append_attr(data, length, callback, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + if(callback("\"", 1, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + attr = attr->next; } + + return MyCORE_STATUS_OK; } /** @@ -189,7 +240,7 @@ void myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* at * @param callback * @param ptr */ -void myhtml_serialization_node_append_close(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void* ptr) +mystatus_t myhtml_serialization_node_append_close(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void* ptr) { if(node->tag_id != MyHTML_TAG__TEXT && node->tag_id != MyHTML_TAG__COMMENT && @@ -198,10 +249,17 @@ void myhtml_serialization_node_append_close(myhtml_tree_node_t* node, myhtml_cal size_t length; const char *tag = myhtml_tag_name_by_id(node->tree, node->tag_id, &length); - callback("", 1, ptr); + if(callback("", 1, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; } + + return MyCORE_STATUS_OK; } /** @@ -211,11 +269,16 @@ void myhtml_serialization_node_append_close(myhtml_tree_node_t* node, myhtml_cal * @param callback * @param ptr */ -void myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void* ptr) +mystatus_t myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void* ptr) { - if(node->token == NULL || node->token->str.data == NULL) return; + if(node->token == NULL || node->token->str.data == NULL) return MyCORE_STATUS_OK; - if(node->parent == NULL) return myhtml_serialization_append(node->token->str.data, node->token->str.length, callback, ptr); + if(node->parent == NULL) { + if(myhtml_serialization_append(node->token->str.data, node->token->str.length, callback, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + + return MyCORE_STATUS_OK; + } switch (node->parent->tag_id) { case MyHTML_TAG_STYLE: @@ -225,12 +288,17 @@ void myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, myhtml case MyHTML_TAG_NOEMBED: case MyHTML_TAG_NOFRAMES: case MyHTML_TAG_PLAINTEXT: - callback(node->token->str.data, node->token->str.length, ptr); + if(callback(node->token->str.data, node->token->str.length, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; break; default: - myhtml_serialization_append(node->token->str.data, node->token->str.length, callback, ptr); + if(myhtml_serialization_append(node->token->str.data, node->token->str.length, callback, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + break; } + + return MyCORE_STATUS_OK; } /** @@ -240,7 +308,7 @@ void myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, myhtml * @param callback * @param ptr */ -void myhtml_serialization_append(const char *data, size_t size, myhtml_callback_serialize_f callback, void* ptr) +mystatus_t myhtml_serialization_append(const char *data, size_t size, mycore_callback_serialize_f callback, void* ptr) { // number of chars not yet displayed size_t notwritten = 0; @@ -249,24 +317,48 @@ void myhtml_serialization_append(const char *data, size_t size, myhtml_callback_ for (size_t i = 0; i < size; ++i) { switch ((unsigned char)data[i]) { case '&': - if (notwritten) callback(data + i - notwritten, notwritten, ptr); - callback("&", 5, ptr); + if(notwritten) { + if(callback(data + i - notwritten, notwritten, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + + if(callback("&", 5, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + notwritten = 0; break; case '<': - if (notwritten) callback(data + i - notwritten, notwritten, ptr); - callback("<", 4, ptr); + if(notwritten) { + if(callback(data + i - notwritten, notwritten, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + + if(callback("<", 4, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + notwritten = 0; break; case '>': - if (notwritten) callback(data + i - notwritten, notwritten, ptr); - callback(">", 4, ptr); + if(notwritten) { + if(callback(data + i - notwritten, notwritten, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + + if(callback(">", 4, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + notwritten = 0; break; case 0xA0: if(i > 0 && (unsigned char)(data[(i - 1)]) == 0xC2) { - if (notwritten) callback(data + i - notwritten, (notwritten - 1), ptr); - callback(" ", 6, ptr); + if(notwritten) { + if(callback(data + i - notwritten, (notwritten - 1), ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + + if(callback(" ", 6, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + notwritten = 0; } else { @@ -280,7 +372,11 @@ void myhtml_serialization_append(const char *data, size_t size, myhtml_callback_ } } - if (notwritten) callback(data + size - notwritten, notwritten, ptr); + if(notwritten) + if(callback(data + size - notwritten, notwritten, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + + return MyCORE_STATUS_OK; } /** @@ -290,7 +386,7 @@ void myhtml_serialization_append(const char *data, size_t size, myhtml_callback_ * @param callback * @param ptr */ -void myhtml_serialization_append_attr(const char* data, size_t size, myhtml_callback_serialize_f callback, void* ptr) +mystatus_t myhtml_serialization_append_attr(const char* data, size_t size, mycore_callback_serialize_f callback, void* ptr) { // number of chars not yet displayed size_t notwritten = 0; @@ -299,19 +395,37 @@ void myhtml_serialization_append_attr(const char* data, size_t size, myhtml_call for (size_t i = 0; i < size; ++i) { switch ((unsigned char)data[i]) { case '&': - if (notwritten) callback(data + i - notwritten, notwritten, ptr); - callback("&", 5, ptr); + if(notwritten) { + if(callback(data + i - notwritten, notwritten, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + + if(callback("&", 5, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + notwritten = 0; break; case '"': - if (notwritten) callback(data + i - notwritten, notwritten, ptr); - callback(""", 6, ptr); + if(notwritten) { + if(callback(data + i - notwritten, notwritten, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + + if(callback(""", 6, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + notwritten = 0; break; case 0xA0: if(i > 0 && (unsigned char)(data[(i - 1)]) == 0xC2) { - if (notwritten) callback(data + i - notwritten, (notwritten - 1), ptr); - callback(" ", 6, ptr); + if(notwritten) { + if(callback(data + i - notwritten, (notwritten - 1), ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + + if(callback(" ", 6, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + notwritten = 0; } else { @@ -325,57 +439,53 @@ void myhtml_serialization_append_attr(const char* data, size_t size, myhtml_call } } - if (notwritten) callback(data + size - notwritten, notwritten, ptr); + if(notwritten) { + if(callback(data + size - notwritten, notwritten, ptr)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } + + return MyCORE_STATUS_OK; } -/** - * Because we want to leap out of the algorithm if we're halfway through - * serializing, we use a longjmp() call to jump back to the public APP - * @var jmp_buf - */ -static jmp_buf leap; - /** * Reallocate the buffer * @param str the buffer to reallocate * @param size new size */ -void myhtml_serialization_reallocate(myhtml_string_raw_t *str, size_t size) +mystatus_t myhtml_serialization_reallocate(mycore_string_raw_t *str, size_t size) { // construct a buffer - char *data = (char*)myhtml_realloc(str->data, size * sizeof(char)); + char *data = (char*)mycore_realloc(str->data, size * sizeof(char)); // was it ok? - if (data == NULL) { - - // allocation failed, reset the string object - myhtml_free(str->data); - memset(str, 0, sizeof(myhtml_string_raw_t)); - - // leap back to the source of the serialization algorithm - longjmp(leap, 1); + if(data == NULL) { + memset(str, 0, sizeof(mycore_string_raw_t)); + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; } else { - // reallocation succeeded str->data = data; str->size = size; } + + return MyCORE_STATUS_OK; } /** - * Implementation of the myhtml_callback_serialize_f function for internal + * Implementation of the mycore_callback_serialize_f function for internal * use that concatenats everything to a string * @param data * @param size */ -void myhtml_serialization_concatenate(const char* data, size_t length, void *ptr) +mystatus_t myhtml_serialization_concatenate(const char* data, size_t length, void *ptr) { - // get the string back - myhtml_string_raw_t* str = (myhtml_string_raw_t *)ptr; + mycore_string_raw_t *str = (mycore_string_raw_t*)ptr; // do we still have enough size in the output buffer? - if ((length + str->length) >= str->size) myhtml_serialization_reallocate(str, length + str->length + 4096); + if ((length + str->length) >= str->size) { + if(myhtml_serialization_reallocate(str, length + str->length + 4096)) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; + } // copy data strncpy(&str->data[ str->length ], data, length); @@ -383,6 +493,8 @@ void myhtml_serialization_concatenate(const char* data, size_t length, void *ptr // update counters str->length += length; str->data[ str->length ] = '\0'; + + return MyCORE_STATUS_OK; } /** @@ -392,7 +504,7 @@ void myhtml_serialization_concatenate(const char* data, size_t length, void *ptr * @param str * @return bool */ -bool myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str) { +mystatus_t myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, mycore_string_raw_t* str) { // we need an output variable if(str == NULL) return false; @@ -401,25 +513,15 @@ bool myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, myhtml_str if(str->data == NULL) { str->size = 4098 * 5; str->length = 0; - str->data = (char*)myhtml_malloc(str->size * sizeof(char)); + str->data = (char*)mycore_malloc(str->size * sizeof(char)); if(str->data == NULL) { str->size = 0; - return false; + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; } } - // if allocation halfway the algorithm fails, we want to leap back - if (setjmp(leap) == 0) - { - // serialize the entire tree - return myhtml_serialization_tree_callback(scope_node, myhtml_serialization_concatenate, str); - } - else - { - // the serialization algorithm failed because of a memory-allocation failure - return false; - } + return myhtml_serialization_tree_callback(scope_node, myhtml_serialization_concatenate, str); } /** @@ -429,7 +531,7 @@ bool myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, myhtml_str * @param str * @return bool */ -bool myhtml_serialization_node_buffer(myhtml_tree_node_t* node, myhtml_string_raw_t* str) { +mystatus_t myhtml_serialization_node_buffer(myhtml_tree_node_t* node, mycore_string_raw_t* str) { // we need an output variable if(str == NULL) return false; @@ -438,24 +540,14 @@ bool myhtml_serialization_node_buffer(myhtml_tree_node_t* node, myhtml_string_ra if(str->data == NULL) { str->size = 2048; str->length = 0; - str->data = (char*)myhtml_malloc(str->size * sizeof(char)); + str->data = (char*)mycore_malloc(str->size * sizeof(char)); if(str->data == NULL) { str->size = 0; return false; } } - - // if allocation halfway the algorithm fails, we want to leap back - if (setjmp(leap) == 0) - { - // pass on - return myhtml_serialization_node_callback(node, myhtml_serialization_concatenate, str); - } - else - { - // the serialization algorithm failed because of a memory-allocation failure - return false; - } + + return myhtml_serialization_node_callback(node, myhtml_serialization_concatenate, str); } diff --git a/source/myhtml/serialization.h b/source/myhtml/serialization.h index e651769..d21fd70 100644 --- a/source/myhtml/serialization.h +++ b/source/myhtml/serialization.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016 Alexander Borisov + Copyright (C) 2016-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -31,15 +31,15 @@ extern "C" { #endif // the serialization functions -bool myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str); -bool myhtml_serialization_node_buffer(myhtml_tree_node_t* node, myhtml_string_raw_t* str); -bool myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, myhtml_callback_serialize_f callback, void* ptr); -bool myhtml_serialization_node_callback(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void* ptr); +mystatus_t myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, mycore_string_raw_t* str); +mystatus_t myhtml_serialization_node_buffer(myhtml_tree_node_t* node, mycore_string_raw_t* str); +mystatus_t myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, mycore_callback_serialize_f callback, void* ptr); +mystatus_t myhtml_serialization_node_callback(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void* ptr); // in versuon 1.0.3 this is in public api. // Need to set deprecated? -bool myhtml_serialization(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str); -bool myhtml_serialization_node(myhtml_tree_node_t* node, myhtml_string_raw_t* str); +mystatus_t myhtml_serialization(myhtml_tree_node_t* scope_node, mycore_string_raw_t* str); +mystatus_t myhtml_serialization_node(myhtml_tree_node_t* node, mycore_string_raw_t* str); #ifdef __cplusplus } /* extern "C" */ diff --git a/source/myhtml/stream.c b/source/myhtml/stream.c index b0779df..0090301 100644 --- a/source/myhtml/stream.c +++ b/source/myhtml/stream.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -22,14 +22,14 @@ myhtml_stream_buffer_t * myhtml_stream_buffer_create(void) { - return myhtml_calloc(1, sizeof(myhtml_stream_buffer_t)); + return mycore_calloc(1, sizeof(myhtml_stream_buffer_t)); } -myhtml_status_t myhtml_stream_buffer_init(myhtml_stream_buffer_t* stream_buffer, size_t entries_size) +mystatus_t myhtml_stream_buffer_init(myhtml_stream_buffer_t* stream_buffer, size_t entries_size) { stream_buffer->length = 0; stream_buffer->size = entries_size; - stream_buffer->entries = myhtml_calloc(entries_size, sizeof(myhtml_stream_buffer_entry_t)); + stream_buffer->entries = mycore_calloc(entries_size, sizeof(myhtml_stream_buffer_entry_t)); if(stream_buffer->entries == NULL) return MyHTML_STATUS_STREAM_BUFFER_ERROR_INIT; @@ -37,18 +37,18 @@ myhtml_status_t myhtml_stream_buffer_init(myhtml_stream_buffer_t* stream_buffer, return MyHTML_STATUS_OK; } -myhtml_status_t myhtml_stream_buffer_entry_init(myhtml_stream_buffer_entry_t* stream_buffer_entry, size_t size) +mystatus_t myhtml_stream_buffer_entry_init(myhtml_stream_buffer_entry_t* stream_buffer_entry, size_t size) { if(stream_buffer_entry->data) { if(size <= stream_buffer_entry->size) return MyHTML_STATUS_OK; else - myhtml_free(stream_buffer_entry->data); + mycore_free(stream_buffer_entry->data); } stream_buffer_entry->length = 0; stream_buffer_entry->size = size; - stream_buffer_entry->data = myhtml_malloc(size * sizeof(char)); + stream_buffer_entry->data = mycore_malloc(size * sizeof(char)); if(stream_buffer_entry->data == NULL) return MyHTML_STATUS_STREAM_BUFFER_ENTRY_ERROR_INIT; @@ -68,10 +68,10 @@ myhtml_stream_buffer_entry_t * myhtml_stream_buffer_entry_destroy(myhtml_stream_ return NULL; if(stream_buffer_entry->data) - myhtml_free(stream_buffer_entry->data); + mycore_free(stream_buffer_entry->data); if(self_destroy) { - myhtml_free(stream_buffer_entry); + mycore_free(stream_buffer_entry); return NULL; } @@ -93,11 +93,11 @@ myhtml_stream_buffer_t * myhtml_stream_buffer_destroy(myhtml_stream_buffer_t* st for(size_t i = 0; i < stream_buffer->length; i++) myhtml_stream_buffer_entry_destroy(&stream_buffer->entries[i], false); - myhtml_free(stream_buffer->entries); + mycore_free(stream_buffer->entries); } if(self_destroy) { - myhtml_free(stream_buffer); + mycore_free(stream_buffer); return NULL; } @@ -109,7 +109,7 @@ myhtml_stream_buffer_entry_t * myhtml_stream_buffer_add_entry(myhtml_stream_buff if(stream_buffer->length >= stream_buffer->size) { size_t new_size = stream_buffer->size << 1; - myhtml_stream_buffer_entry_t *entries = myhtml_realloc(stream_buffer, sizeof(myhtml_stream_buffer_entry_t) * new_size); + myhtml_stream_buffer_entry_t *entries = mycore_realloc(stream_buffer, sizeof(myhtml_stream_buffer_entry_t) * new_size); if(entries) { memset(&entries[stream_buffer->size], 0, (new_size - stream_buffer->size)); @@ -122,7 +122,7 @@ myhtml_stream_buffer_entry_t * myhtml_stream_buffer_add_entry(myhtml_stream_buff } myhtml_stream_buffer_entry_t *entry = &stream_buffer->entries[ stream_buffer->length ]; - myhtml_status_t status = myhtml_stream_buffer_entry_init(entry, entry_data_size); + mystatus_t status = myhtml_stream_buffer_entry_init(entry, entry_data_size); if(status != MyHTML_STATUS_OK) return NULL; diff --git a/source/myhtml/stream.h b/source/myhtml/stream.h index e015438..94c4fc2 100644 --- a/source/myhtml/stream.h +++ b/source/myhtml/stream.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -41,17 +41,17 @@ struct myhtml_stream_buffer { size_t length; size_t size; - myhtml_encoding_result_t res; + myencoding_result_t res; }; myhtml_stream_buffer_t * myhtml_stream_buffer_create(void); -myhtml_status_t myhtml_stream_buffer_init(myhtml_stream_buffer_t* stream_buffer, size_t entries_size); +mystatus_t myhtml_stream_buffer_init(myhtml_stream_buffer_t* stream_buffer, size_t entries_size); void myhtml_stream_buffer_clean(myhtml_stream_buffer_t* stream_buffer); myhtml_stream_buffer_t * myhtml_stream_buffer_destroy(myhtml_stream_buffer_t* stream_buffer, bool self_destroy); myhtml_stream_buffer_entry_t * myhtml_stream_buffer_add_entry(myhtml_stream_buffer_t* stream_buffer, size_t entry_data_size); myhtml_stream_buffer_entry_t * myhtml_stream_buffer_current_entry(myhtml_stream_buffer_t* stream_buffer); -myhtml_status_t myhtml_stream_buffer_entry_init(myhtml_stream_buffer_entry_t* stream_buffer_entry, size_t size); +mystatus_t myhtml_stream_buffer_entry_init(myhtml_stream_buffer_entry_t* stream_buffer_entry, size_t size); void myhtml_stream_buffer_entry_clean(myhtml_stream_buffer_entry_t* stream_buffer_entry); myhtml_stream_buffer_entry_t * myhtml_stream_buffer_entry_destroy(myhtml_stream_buffer_entry_t* stream_buffer_entry, bool self_destroy); diff --git a/source/myhtml/tag.c b/source/myhtml/tag.c index 65a8f56..4664bb1 100755 --- a/source/myhtml/tag.c +++ b/source/myhtml/tag.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -23,11 +23,13 @@ myhtml_tag_t * myhtml_tag_create(void) { - return (myhtml_tag_t*)myhtml_malloc(sizeof(myhtml_tag_t)); + return (myhtml_tag_t*)mycore_malloc(sizeof(myhtml_tag_t)); } -myhtml_status_t myhtml_tag_init(myhtml_tree_t *tree, myhtml_tag_t *tags) +mystatus_t myhtml_tag_init(myhtml_tree_t *tree, myhtml_tag_t *tags) { + mystatus_t status; + tags->mcsimple_context = mcsimple_create(); if(tags->mcsimple_context == NULL) @@ -35,10 +37,16 @@ myhtml_status_t myhtml_tag_init(myhtml_tree_t *tree, myhtml_tag_t *tags) mcsimple_init(tags->mcsimple_context, 128, 1024, sizeof(myhtml_tag_context_t)); - tags->mchar_node = mchar_async_node_add(tree->mchar); - tags->tree = mctree_create(2); - tags->mchar = tree->mchar; - tags->tags_count = MyHTML_TAG_LAST_ENTRY; + tags->mchar_node = mchar_async_node_add(tree->mchar, &status); + tags->tree = mctree_create(2); + tags->mchar = tree->mchar; + tags->tags_count = MyHTML_TAG_LAST_ENTRY; + + if(status) + return status; + + if(tags->tree == NULL) + return MyCORE_STATUS_ERROR_MEMORY_ALLOCATION; myhtml_tag_clean(tags); @@ -64,7 +72,7 @@ myhtml_tag_t * myhtml_tag_destroy(myhtml_tag_t* tags) mchar_async_node_delete(tags->mchar, tags->mchar_node); - myhtml_free(tags); + mycore_free(tags); return NULL; } @@ -134,24 +142,3 @@ const myhtml_tag_context_t * myhtml_tag_get_by_name(myhtml_tag_t* tags, const ch return (myhtml_tag_context_t*)tags->tree->nodes[idx].value; } - -void myhtml_tag_print(myhtml_tag_t* tags, FILE* fh) -{ - size_t i; - for(i = MyHTML_TAG_FIRST_ENTRY; i < MyHTML_TAG_LAST_ENTRY; i++) - { - const myhtml_tag_context_t *ctx = myhtml_tag_get_by_id(tags, i); - - fprintf(fh, "<%s id=\"%zu\">\n", ctx->name, i); - } - - for(i = (MyHTML_TAG_LAST_ENTRY + 1); i < tags->tags_count; i++) - { - const myhtml_tag_context_t *ctx = myhtml_tag_get_by_id(tags, i); - - fprintf(fh, "<%s id=\"%zu\">\n", ctx->name, i); - } -} - - - diff --git a/source/myhtml/tag.h b/source/myhtml/tag.h index 3814ef9..945b65a 100644 --- a/source/myhtml/tag.h +++ b/source/myhtml/tag.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -31,12 +31,12 @@ extern "C" { #include "myhtml/tag_const.h" #include "myhtml/tokenizer.h" #include "myhtml/tree.h" -#include "myhtml/utils.h" -#include "myhtml/utils/mctree.h" -#include "myhtml/utils/mchar_async.h" -#include "myhtml/utils/mcobject.h" -#include "myhtml/utils/mcobject_async.h" -#include "myhtml/utils/mcsimple.h" +#include "mycore/utils.h" +#include "mycore/utils/mctree.h" +#include "mycore/utils/mchar_async.h" +#include "mycore/utils/mcobject.h" +#include "mycore/utils/mcobject_async.h" +#include "mycore/utils/mcsimple.h" #define myhtml_tag_get(tags, idx, attr) tags->context[idx].attr @@ -52,7 +52,7 @@ extern "C" { tags->context_length++; \ if(tags->context_length == tags->context_size) { \ tags->context_size += 4096; \ - tags->context = (myhtml_tag_context_t*)myhtml_realloc(tags->context, \ + tags->context = (myhtml_tag_context_t*)mycore_realloc(tags->context, \ sizeof(myhtml_tag_context_t) * tags->context_size); \ } \ myhtml_tag_context_clean(tags, tags->context_length) @@ -89,7 +89,7 @@ struct myhtml_tag { }; myhtml_tag_t * myhtml_tag_create(void); -myhtml_status_t myhtml_tag_init(myhtml_tree_t *tree, myhtml_tag_t *tags); +mystatus_t myhtml_tag_init(myhtml_tree_t *tree, myhtml_tag_t *tags); void myhtml_tag_clean(myhtml_tag_t* tags); myhtml_tag_t * myhtml_tag_destroy(myhtml_tag_t* tags); @@ -105,8 +105,6 @@ const myhtml_tag_context_t * myhtml_tag_get_by_name(myhtml_tag_t* tags, const ch const myhtml_tag_context_t * myhtml_tag_static_get_by_id(size_t idx); const myhtml_tag_context_t * myhtml_tag_static_search(const char* name, size_t length); -void myhtml_tag_print(myhtml_tag_t* tags, FILE* fh); - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/source/myhtml/tag_const.h b/source/myhtml/tag_const.h index 0f5b389..1b29934 100644 --- a/source/myhtml/tag_const.h +++ b/source/myhtml/tag_const.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/tag_init.c b/source/myhtml/tag_init.c index c66c266..d96e62f 100755 --- a/source/myhtml/tag_init.c +++ b/source/myhtml/tag_init.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -18,8 +18,8 @@ Author: lex.borisov@gmail.com (Alexander Borisov) */ -#include "myhtml/utils/resources.h" -#include "tag.h" +#include "mycore/utils/resources.h" +#include "myhtml/tag.h" static const myhtml_tag_context_t myhtml_tag_base_list[MyHTML_TAG_LAST_ENTRY] = { @@ -2591,15 +2591,15 @@ static const myhtml_tag_static_list_t myhtml_tag_static_list_index[] = const myhtml_tag_context_t * myhtml_tag_static_search(const char* name, size_t length) { - size_t idx = ((myhtml_string_chars_lowercase_map[ (const unsigned char)name[0] ] * - myhtml_string_chars_lowercase_map[ (const unsigned char)name[(length - 1)] ] * + size_t idx = ((mycore_string_chars_lowercase_map[ (const unsigned char)name[0] ] * + mycore_string_chars_lowercase_map[ (const unsigned char)name[(length - 1)] ] * length) % MyHTML_BASE_STATIC_SIZE) + 1; while (myhtml_tag_static_list_index[idx].ctx) { if(myhtml_tag_static_list_index[idx].ctx->name_length == length) { - if(myhtml_strncasecmp(myhtml_tag_static_list_index[idx].ctx->name, name, length) == 0) + if(mycore_strncasecmp(myhtml_tag_static_list_index[idx].ctx->name, name, length) == 0) return myhtml_tag_static_list_index[idx].ctx; if(myhtml_tag_static_list_index[idx].next) diff --git a/source/myhtml/thread.c b/source/myhtml/thread.c deleted file mode 100644 index 94e277a..0000000 --- a/source/myhtml/thread.c +++ /dev/null @@ -1,1185 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/thread.h" - -#ifndef MyHTML_BUILD_WITHOUT_THREADS - -#if defined(IS_OS_WINDOWS) -/*********************************************************************************** - * - * For Windows - * - ***********************************************************************************/ -myhtml_status_t myhtml_thread_create(mythread_t *mythread, mythread_list_t *thr, void *work_func) -{ - thr->pth = CreateThread(NULL, // default security attributes - 0, // use default stack size - work_func, // thread function name - &thr->data, // argument to thread function - 0, // use default creation flags - NULL); // returns the thread identifier - - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_join(mythread_t *mythread, mythread_list_t *thr) -{ - WaitForSingleObject(thr->pth, INFINITE); - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_cancel(mythread_t *mythread, mythread_list_t *thr) -{ - TerminateThread(thr->pth, 0); - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_attr_init(mythread_t *mythread) -{ - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_attr_clean(mythread_t *mythread) -{ - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_attr_destroy(mythread_t *mythread) -{ - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_create(mythread_t *mythread, mythread_context_t *ctx, size_t prefix_id) -{ - ctx->mutex = CreateSemaphore(NULL, 0, 1, NULL); - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_post(mythread_t *mythread, mythread_context_t *ctx) -{ - ReleaseSemaphore(ctx->mutex, 1, NULL); - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_wait(mythread_t *mythread, mythread_context_t *ctx) -{ - WaitForSingleObject(ctx->mutex, INFINITE); - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_try_wait(mythread_t *mythread, mythread_context_t *ctx) -{ - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_close(mythread_t *mythread, mythread_context_t *ctx) -{ - CloseHandle(ctx->mutex); - - return MyHTML_STATUS_OK; -} - -void myhtml_thread_nanosleep(const struct timespec *tomeout) -{ - Sleep(0); -} - -#else /* defined(IS_OS_WINDOWS) */ -/*********************************************************************************** - * - * For all unix system. POSIX pthread - * - ***********************************************************************************/ - -myhtml_status_t myhtml_thread_create(mythread_t *mythread, mythread_list_t *thr, void *work_func) -{ - pthread_create(&thr->pth, mythread->attr, - work_func, - (void*)(&thr->data)); - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_join(mythread_t *mythread, mythread_list_t *thr) -{ - pthread_join(thr->pth, NULL); - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_cancel(mythread_t *mythread, mythread_list_t *thr) -{ - pthread_cancel(thr->pth); - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_attr_init(mythread_t *mythread) -{ - mythread->attr = (pthread_attr_t*)myhtml_calloc(1, sizeof(pthread_attr_t)); - - if(mythread->attr == NULL) - return MyHTML_STATUS_THREAD_ERROR_ATTR_MALLOC; - - mythread->sys_last_error = pthread_attr_init(mythread->attr); - if(mythread->sys_last_error) - return MyHTML_STATUS_THREAD_ERROR_ATTR_INIT; - - mythread->sys_last_error = pthread_attr_setdetachstate(mythread->attr, PTHREAD_CREATE_JOINABLE); - if(mythread->sys_last_error) - return MyHTML_STATUS_THREAD_ERROR_ATTR_SET; - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_attr_clean(mythread_t *mythread) -{ - mythread->attr = NULL; - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_thread_attr_destroy(mythread_t *mythread) -{ - if(mythread->attr) { - mythread->sys_last_error = pthread_attr_destroy(mythread->attr); - - myhtml_free(mythread->attr); - mythread->attr = NULL; - - if(mythread->sys_last_error) - return MyHTML_STATUS_THREAD_ERROR_ATTR_DESTROY; - } - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_create(mythread_t *mythread, mythread_context_t *ctx, size_t prefix_id) -{ - ctx->mutex = (pthread_mutex_t*)myhtml_calloc(1, sizeof(pthread_mutex_t)); - - if(ctx->mutex == NULL) - return MyHTML_STATUS_THREAD_ERROR_MUTEX_MALLOC; - - if(pthread_mutex_init(ctx->mutex, NULL)) { - mythread->sys_last_error = errno; - return MyHTML_STATUS_THREAD_ERROR_MUTEX_INIT; - } - - if(pthread_mutex_lock(ctx->mutex)) { - mythread->sys_last_error = errno; - return MyHTML_STATUS_THREAD_ERROR_MUTEX_LOCK; - } - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_post(mythread_t *mythread, mythread_context_t *ctx) -{ - if(pthread_mutex_unlock(ctx->mutex)) { - mythread->sys_last_error = errno; - return MyHTML_STATUS_THREAD_ERROR_MUTEX_UNLOCK; - } - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_wait(mythread_t *mythread, mythread_context_t *ctx) -{ - if(pthread_mutex_lock(ctx->mutex)) { - mythread->sys_last_error = errno; - return MyHTML_STATUS_THREAD_ERROR_MUTEX_LOCK; - } - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_try_wait(mythread_t *mythread, mythread_context_t *ctx) -{ - if(pthread_mutex_trylock(ctx->mutex)) { - mythread->sys_last_error = errno; - return MyHTML_STATUS_THREAD_ERROR_MUTEX_LOCK; - } - - return MyHTML_STATUS_OK; -} - -myhtml_status_t myhtml_hread_mutex_close(mythread_t *mythread, mythread_context_t *ctx) -{ - if(ctx->mutex) { - pthread_mutex_destroy(ctx->mutex); - myhtml_free(ctx->mutex); - - ctx->mutex = NULL; - } - - return MyHTML_STATUS_OK; -} - -void myhtml_thread_nanosleep(const struct timespec *tomeout) -{ - nanosleep(tomeout, NULL); -} - -#endif /* !defined(IS_OS_WINDOWS) */ -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - -/* - * - * MyTHREAD logic - * - */ - -mythread_t * mythread_create(void) -{ - return myhtml_calloc(1, sizeof(mythread_t)); -} - -#ifdef MyHTML_BUILD_WITHOUT_THREADS - -myhtml_status_t mythread_init(mythread_t *mythread, const char *sem_prefix, size_t thread_count) -{ - return MyHTML_STATUS_OK; -} - -#else /* MyHTML_BUILD_WITHOUT_THREADS */ - -myhtml_status_t mythread_init(mythread_t *mythread, const char *sem_prefix, size_t thread_count) -{ - mythread->batch_count = 0; - mythread->batch_first_id = 0; - mythread->stream_opt = MyTHREAD_OPT_STOP; - mythread->batch_opt = MyTHREAD_OPT_STOP; - - if(thread_count) - { - myhtml_status_t status = myhtml_thread_attr_init(mythread); - if(status) - return status; - - mythread->pth_list_root = 1; - mythread->pth_list_length = 1; - mythread->pth_list_size = thread_count + 1; - mythread->pth_list = (mythread_list_t*)myhtml_calloc(mythread->pth_list_size, sizeof(mythread_list_t)); - - if(mythread->pth_list == NULL) - return MyHTML_STATUS_THREAD_ERROR_LIST_INIT; - } - else { - myhtml_thread_attr_clean(mythread); - - mythread->sys_last_error = 0; - mythread->pth_list_root = 1; - mythread->pth_list_length = 1; - mythread->pth_list_size = 0; - mythread->pth_list = NULL; - } - - if(sem_prefix) - { - mythread->sem_prefix_length = strlen(sem_prefix); - - if(mythread->sem_prefix_length) { - mythread->sem_prefix = myhtml_calloc((mythread->sem_prefix_length + 1), sizeof(char)); - - if(mythread->sem_prefix == NULL) { - mythread->sem_prefix_length = 0; - return MyHTML_STATUS_THREAD_ERROR_SEM_PREFIX_MALLOC; - } - - myhtml_string_raw_copy(mythread->sem_prefix, sem_prefix, mythread->sem_prefix_length); - } - } - - return MyHTML_STATUS_OK; -} - -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - -void mythread_clean(mythread_t *mythread) -{ - mythread->sys_last_error = 0; -} - -mythread_t * mythread_destroy(mythread_t *mythread, mythread_callback_before_join_f before_join, bool self_destroy) -{ - if(mythread == NULL) - return NULL; - -#ifndef MyHTML_BUILD_WITHOUT_THREADS - - myhtml_thread_attr_destroy(mythread); - - if(mythread->pth_list) { - mythread_resume_all(mythread); - mythread_stream_quit_all(mythread); - mythread_batch_quit_all(mythread); - - if(before_join) - before_join(mythread); - - for (size_t i = mythread->pth_list_root; i < mythread->pth_list_length; i++) - { - myhtml_thread_join(mythread, &mythread->pth_list[i]); - } - - myhtml_free(mythread->pth_list); - mythread->pth_list = NULL; - } - - if(mythread->sem_prefix) { - myhtml_free(mythread->sem_prefix); - - mythread->sem_prefix = NULL; - mythread->sem_prefix_length = 0; - } - -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - - if(self_destroy) { - myhtml_free(mythread); - return NULL; - } - - return mythread; -} - -#ifndef MyHTML_BUILD_WITHOUT_THREADS - -mythread_id_t _myhread_create_stream_raw(mythread_t *mythread, mythread_work_f work_func, void *process_func, mythread_thread_opt_t opt, myhtml_status_t *status, size_t total_count) -{ - mythread->sys_last_error = 0; - - if(status) - *status = MyHTML_STATUS_OK; - - if(mythread->pth_list_length >= mythread->pth_list_size) { - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_NO_SLOTS; - - return 0; - } - - mythread_list_t *thr = &mythread->pth_list[mythread->pth_list_length]; - - thr->data.mythread = mythread; - thr->data.func = work_func; - thr->data.id = mythread->pth_list_length; - thr->data.t_count = total_count; - thr->data.opt = opt; - thr->data.status = 0; - - myhtml_status_t m_status = myhtml_hread_mutex_create(mythread, &thr->data, 0); - - if(m_status != MyHTML_STATUS_OK && status) { - *status = m_status; - return 0; - } - - m_status = myhtml_thread_create(mythread, thr, process_func); - if(m_status != MyHTML_STATUS_OK) - return 0; - - mythread->pth_list_length++; - return thr->data.id; -} - -mythread_id_t myhread_create_stream(mythread_t *mythread, mythread_process_f process_func, mythread_work_f work_func, mythread_thread_opt_t opt, myhtml_status_t *status) -{ - return _myhread_create_stream_raw(mythread, work_func, process_func, opt, status, 0); -} - -mythread_id_t myhread_create_batch(mythread_t *mythread, mythread_process_f process_func, mythread_work_f work_func, mythread_thread_opt_t opt, myhtml_status_t *status, size_t count) -{ - if(mythread->batch_count) { - *status = MyHTML_STATUS_THREAD_ERROR_BATCH_INIT; - return 0; - } - else if((mythread->pth_list_length + count) > mythread->pth_list_size) { - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_NO_SLOTS; - - return 0; - } - - if(count == 0) - count = 1; - - mythread->batch_first_id = 0; - mythread->batch_count = count; - - size_t start = mythread->pth_list_length; - *status = MyHTML_STATUS_OK; - - bool init_first = false; - - for (size_t i = 0; i < count; i++) - { - mythread_id_t curr_id = _myhread_create_stream_raw(mythread, work_func, process_func, opt, status, i); - - if(init_first == false) { - mythread->batch_first_id = curr_id; - init_first = true; - } - - if(*status) - { - for (size_t n = start; n < (start + i); n++) - { - mythread_list_t *thr = &mythread->pth_list[n]; - - myhtml_thread_cancel(mythread, thr); - - myhtml_hread_mutex_post(mythread, &thr->data); - myhtml_hread_mutex_close(mythread, &thr->data); - } - - mythread->batch_first_id = 0; - mythread->batch_count = 0; - - break; - } - } - - return mythread->batch_first_id; -} - -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - -// mythread queue functions -#ifndef MyHTML_BUILD_WITHOUT_THREADS -mythread_queue_list_t * mythread_queue_list_create(myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_OK; - - mythread_queue_list_t* queue_list = (mythread_queue_list_t*)myhtml_calloc(1, sizeof(mythread_queue_list_t)); - - if(queue_list == NULL) { - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_MALLOC; - return NULL; - } - - return queue_list; -} - -void mythread_queue_list_destroy(mythread_queue_list_t* queue_list) -{ - if(queue_list == NULL) - return; - - myhtml_free(queue_list); -} - -size_t mythread_queue_list_get_count(mythread_queue_list_t* queue_list) -{ - return queue_list->count; -} - -mythread_queue_list_entry_t * mythread_queue_list_entry_push(mythread_t *mythread, mythread_queue_t *queue, myhtml_status_t *status) -{ - mythread_queue_list_t *queue_list = (mythread_queue_list_t*)mythread->context; - - if(status) - *status = MyHTML_STATUS_OK; - - mythread_queue_list_entry_t* entry = (mythread_queue_list_entry_t*)myhtml_calloc(1, sizeof(mythread_queue_list_entry_t)); - - if(entry == NULL) { - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_MALLOC; - return NULL; - } - - entry->thread_param = (mythread_queue_thread_param_t*)myhtml_calloc(mythread->pth_list_size, sizeof(mythread_queue_thread_param_t)); - - if(entry->thread_param == NULL) { - myhtml_free(entry); - - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_MALLOC; - return NULL; - } - - size_t idx; - for (idx = mythread->batch_first_id; idx < (mythread->batch_first_id + mythread->batch_count); idx++) { - entry->thread_param[idx].use = mythread->pth_list[idx].data.t_count; - } - - entry->queue = queue; - - if(mythread->stream_opt == MyTHREAD_OPT_UNDEF) { - mythread_suspend_all(mythread); - } - else if(mythread->stream_opt == MyTHREAD_OPT_STOP) { - mythread_stop_all(mythread); - } - - if(queue_list->first) { - queue_list->last->next = entry; - entry->prev = queue_list->last; - - queue_list->last = entry; - } - else { - queue_list->first = entry; - queue_list->last = entry; - } - - queue_list->count++; - - if(mythread->stream_opt != MyTHREAD_OPT_STOP) - mythread_resume_all(mythread); - - return entry; -} - -mythread_queue_list_entry_t * mythread_queue_list_entry_delete(mythread_t *mythread, mythread_queue_list_entry_t *entry, bool destroy_queue) -{ - mythread_queue_list_t *queue_list = (mythread_queue_list_t*)mythread->context; - - mythread_queue_list_entry_t *next = entry->next; - mythread_queue_list_entry_t *prev = entry->prev; - - if(mythread->stream_opt == MyTHREAD_OPT_UNDEF) { - mythread_suspend_all(mythread); - } - else if(mythread->stream_opt == MyTHREAD_OPT_STOP) { - mythread_stop_all(mythread); - } - - if(prev) - prev->next = next; - - if(next) - next->prev = prev; - - if(queue_list->first == entry) - queue_list->first = next; - - if(queue_list->last == entry) - queue_list->last = prev; - - if(mythread->stream_opt != MyTHREAD_OPT_STOP) - mythread_resume_all(mythread); - - if(destroy_queue && entry->queue) - mythread_queue_destroy(entry->queue); - - if(entry->thread_param) - myhtml_free(entry->thread_param); - - myhtml_free(entry); - - queue_list->count--; - - return NULL; -} - -void mythread_queue_list_entry_clean(mythread_t *mythread, mythread_queue_list_entry_t *entry) -{ - if(entry == NULL) - return; - - mythread_queue_clean(entry->queue); - - size_t idx; - for (idx = mythread->pth_list_root; idx < mythread->batch_first_id; idx++) { - entry->thread_param[idx].use = 0; - } - - for (idx = mythread->batch_first_id; idx < (mythread->batch_first_id + mythread->batch_count); idx++) { - entry->thread_param[idx].use = mythread->pth_list[idx].data.t_count; - } -} - -void mythread_queue_list_entry_wait_for_done(mythread_t *mythread, mythread_queue_list_entry_t *entry) -{ - if(entry == NULL) - return; - - size_t idx; - const struct timespec tomeout = {0, 0}; - - for (idx = mythread->pth_list_root; idx < mythread->pth_list_size; idx++) { - mythread_queue_thread_param_t *thread_param = &entry->thread_param[ idx ]; - while(thread_param->use < entry->queue->nodes_uses) { - myhtml_thread_nanosleep(&tomeout); - } - } -} - -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - -mythread_queue_t * mythread_queue_create(size_t size, myhtml_status_t *status) -{ - if(status) - *status = MyHTML_STATUS_OK; - - if(size < 4096) - size = 4096; - - mythread_queue_t* queue = (mythread_queue_t*)myhtml_malloc(sizeof(mythread_queue_t)); - - if(queue == NULL) { - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_MALLOC; - return NULL; - } - - queue->nodes_pos_size = 512; - queue->nodes_size = size; - queue->nodes = (mythread_queue_node_t**)myhtml_calloc(queue->nodes_pos_size, sizeof(mythread_queue_node_t*)); - - if(queue->nodes == NULL) { - myhtml_free(queue); - - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_NODES_MALLOC; - return NULL; - } - - mythread_queue_clean(queue); - - queue->nodes[queue->nodes_pos] = (mythread_queue_node_t*)myhtml_malloc(sizeof(mythread_queue_node_t) * queue->nodes_size); - - if(queue->nodes[queue->nodes_pos] == NULL) { - myhtml_free(queue->nodes); - myhtml_free(queue); - - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_NODE_MALLOC; - return NULL; - } - - return queue; -} - -void mythread_queue_clean(mythread_queue_t* queue) -{ - queue->nodes_length = 0; - queue->nodes_pos = 0; - queue->nodes_root = 0; - queue->nodes_uses = 0; - - if(queue->nodes[queue->nodes_pos]) - mythread_queue_node_clean(&queue->nodes[queue->nodes_pos][queue->nodes_length]); -} - -mythread_queue_t * mythread_queue_destroy(mythread_queue_t* queue) -{ - if(queue == NULL) - return NULL; - - if(queue->nodes) { - for (size_t i = 0; i <= queue->nodes_pos; i++) { - myhtml_free(queue->nodes[i]); - } - - myhtml_free(queue->nodes); - } - - myhtml_free(queue); - - return NULL; -} - -void mythread_queue_node_clean(mythread_queue_node_t* qnode) -{ - memset(qnode, 0, sizeof(mythread_queue_node_t)); -} - -mythread_queue_node_t * mythread_queue_get_prev_node(mythread_queue_node_t* qnode) -{ - return qnode->prev; -} - -mythread_queue_node_t * mythread_queue_get_current_node(mythread_queue_t* queue) -{ - return &queue->nodes[queue->nodes_pos][queue->nodes_length]; -} - -mythread_queue_node_t * mythread_queue_get_first_node(mythread_queue_t* queue) -{ - return &queue->nodes[0][0]; -} - -size_t mythread_queue_count_used_node(mythread_queue_t* queue) -{ - return queue->nodes_uses; -} - -mythread_queue_node_t * mythread_queue_node_malloc(mythread_t *mythread, mythread_queue_t* queue, myhtml_status_t *status) -{ - queue->nodes_length++; - - if(queue->nodes_length >= queue->nodes_size) - { - queue->nodes_pos++; - - if(queue->nodes_pos >= queue->nodes_pos_size) - { - mythread_queue_wait_all_for_done(mythread); - - queue->nodes_pos_size <<= 1; - mythread_queue_node_t** tmp = myhtml_realloc(queue->nodes, sizeof(mythread_queue_node_t*) * queue->nodes_pos_size); - - if(tmp) { - memset(&tmp[queue->nodes_pos], 0, sizeof(mythread_queue_node_t*) * (queue->nodes_pos_size - queue->nodes_pos)); - - queue->nodes = tmp; - } - else { - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_NODES_MALLOC; - - return NULL; - } - } - - if(queue->nodes[queue->nodes_pos] == NULL) { - queue->nodes[queue->nodes_pos] = (mythread_queue_node_t*)myhtml_malloc(sizeof(mythread_queue_node_t) * queue->nodes_size); - - if(queue->nodes[queue->nodes_pos] == NULL) { - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_NODE_MALLOC; - - return NULL; - } - } - - queue->nodes_length = 0; - } - - queue->nodes_uses++; - - return &queue->nodes[queue->nodes_pos][queue->nodes_length]; -} - -mythread_queue_node_t * mythread_queue_node_malloc_limit(mythread_t *mythread, mythread_queue_t* queue, size_t limit, myhtml_status_t *status) -{ - queue->nodes_length++; - - if(queue->nodes_uses >= limit) { - queue->nodes_uses++; - mythread_queue_wait_all_for_done(mythread); - - queue->nodes_length = 0; - queue->nodes_pos = 0; - queue->nodes_root = 0; - queue->nodes_uses = 0; - } - else if(queue->nodes_length >= queue->nodes_size) - { - queue->nodes_pos++; - - if(queue->nodes_pos >= queue->nodes_pos_size) - { - mythread_queue_wait_all_for_done(mythread); - - queue->nodes_pos_size <<= 1; - mythread_queue_node_t** tmp = myhtml_realloc(queue->nodes, sizeof(mythread_queue_node_t*) * queue->nodes_pos_size); - - if(tmp) { - memset(&tmp[queue->nodes_pos], 0, sizeof(mythread_queue_node_t*) * (queue->nodes_pos_size - queue->nodes_pos)); - - queue->nodes = tmp; - } - else { - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_NODES_MALLOC; - - return NULL; - } - } - - if(queue->nodes[queue->nodes_pos] == NULL) { - queue->nodes[queue->nodes_pos] = (mythread_queue_node_t*)myhtml_malloc(sizeof(mythread_queue_node_t) * queue->nodes_size); - - if(queue->nodes[queue->nodes_pos] == NULL) { - if(status) - *status = MyHTML_STATUS_THREAD_ERROR_QUEUE_NODE_MALLOC; - - return NULL; - } - } - - queue->nodes_length = 0; - } - - queue->nodes_uses++; - - return &queue->nodes[queue->nodes_pos][queue->nodes_length]; -} - -#ifndef MyHTML_BUILD_WITHOUT_THREADS - -mythread_queue_node_t * mythread_queue_node_malloc_round(mythread_t *mythread, mythread_queue_list_entry_t *entry, myhtml_status_t *status) -{ - mythread_queue_t* queue = entry->queue; - - queue->nodes_length++; - - if(queue->nodes_length >= queue->nodes_size) { - queue->nodes_uses++; - - mythread_queue_list_entry_wait_for_done(mythread, entry); - mythread_queue_list_entry_clean(mythread, entry); - } - else - queue->nodes_uses++; - - return &queue->nodes[queue->nodes_pos][queue->nodes_length]; -} - -void mythread_queue_wait_all_for_done(mythread_t *mythread) -{ - const struct timespec tomeout = {0, 0}; - - mythread_queue_list_t *queue_list = (mythread_queue_list_t*)mythread->context; - mythread_queue_list_entry_t *entry = queue_list->first; - - while(entry) - { - for (size_t idx = mythread->pth_list_root; idx < mythread->pth_list_size; idx++) { - while(entry->thread_param[idx].use < entry->queue->nodes_uses) { - myhtml_thread_nanosleep(&tomeout); - } - } - - entry = entry->next; - } -} - -#else - -void mythread_queue_wait_all_for_done(mythread_t *mythread) {} - -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - -#ifdef MyHTML_BUILD_WITHOUT_THREADS - -void mythread_stream_quit_all(mythread_t *mythread) {} -void mythread_batch_quit_all(mythread_t *mythread) {} -void mythread_stream_stop_all(mythread_t *mythread) {} -void mythread_batch_stop_all(mythread_t *mythread) {} -void mythread_stop_all(mythread_t *mythread) {} -void mythread_resume_all(mythread_t *mythread) {} -void mythread_suspend_all(mythread_t *mythread) {} - -#else /* MyHTML_BUILD_WITHOUT_THREADS */ - -// mythread functions -void mythread_stream_quit_all(mythread_t *mythread) -{ - mythread->stream_opt = MyTHREAD_OPT_QUIT; -} - -void mythread_batch_quit_all(mythread_t *mythread) -{ - mythread->batch_opt = MyTHREAD_OPT_QUIT; -} - -void mythread_stream_stop_all(mythread_t *mythread) -{ - if(mythread->stream_opt != MyTHREAD_OPT_STOP) - mythread->stream_opt = MyTHREAD_OPT_STOP; - - size_t idx; - const struct timespec tomeout = {0, 0}; - - for (idx = mythread->pth_list_root; idx < mythread->batch_first_id; idx++) { - while(mythread->pth_list[idx].data.opt != MyTHREAD_OPT_STOP) { - myhtml_thread_nanosleep(&tomeout); - } - } -} - -void mythread_batch_stop_all(mythread_t *mythread) -{ - if(mythread->batch_opt != MyTHREAD_OPT_STOP) - mythread->batch_opt = MyTHREAD_OPT_STOP; - - size_t idx; - const struct timespec tomeout = {0, 0}; - - for (idx = mythread->batch_first_id; idx < (mythread->batch_first_id + mythread->batch_count); idx++) { - while(mythread->pth_list[idx].data.opt != MyTHREAD_OPT_STOP) { - myhtml_thread_nanosleep(&tomeout); - } - } -} - -void mythread_stop_all(mythread_t *mythread) -{ - mythread_stream_stop_all(mythread); - mythread_batch_stop_all(mythread); -} - -void mythread_resume_all(mythread_t *mythread) -{ - if(mythread->stream_opt == MyTHREAD_OPT_UNDEF && - mythread->batch_opt == MyTHREAD_OPT_UNDEF) - return; - - if(mythread->stream_opt == MyTHREAD_OPT_WAIT || - mythread->batch_opt == MyTHREAD_OPT_WAIT) - { - mythread->stream_opt = MyTHREAD_OPT_UNDEF; - mythread->batch_opt = MyTHREAD_OPT_UNDEF; - } - else { - mythread->stream_opt = MyTHREAD_OPT_UNDEF; - mythread->batch_opt = MyTHREAD_OPT_UNDEF; - - for (size_t idx = mythread->pth_list_root; idx < mythread->pth_list_size; idx++) { - myhtml_hread_mutex_post(mythread, &mythread->pth_list[idx].data); - } - } -} - -void mythread_suspend_all(mythread_t *mythread) -{ - if(mythread->stream_opt != MyTHREAD_OPT_WAIT) - mythread->stream_opt = MyTHREAD_OPT_WAIT; - - if(mythread->batch_opt != MyTHREAD_OPT_WAIT) - mythread->batch_opt = MyTHREAD_OPT_WAIT; - - const struct timespec tomeout = {0, 0}; - - for (size_t idx = mythread->pth_list_root; idx < mythread->pth_list_size; idx++) { - myhtml_hread_mutex_try_wait(mythread, &mythread->pth_list[idx].data); - - while(mythread->pth_list[idx].data.opt != MyTHREAD_OPT_WAIT) { - myhtml_thread_nanosleep(&tomeout); - } - } -} - -unsigned int mythread_check_status(mythread_t *mythread) -{ - for (size_t idx = mythread->pth_list_root; idx < mythread->pth_list_size; idx++) { - if(mythread->pth_list[idx].data.status) { - return mythread->pth_list[idx].data.status; - } - } - - return MyHTML_STATUS_OK; -} - -bool mythread_function_see_for_all_done(mythread_queue_list_t *queue_list, size_t thread_id) -{ - size_t done_count = 0; - - mythread_queue_list_entry_t *entry = queue_list->first; - while(entry) - { - if(entry->thread_param[ thread_id ].use >= entry->queue->nodes_uses) { - done_count++; - entry = entry->next; - } - else - break; - } - - return done_count == queue_list->count; -} - -bool mythread_function_see_opt(mythread_context_t *ctx, volatile mythread_thread_opt_t opt, size_t done_count, const struct timespec *timeout) -{ - mythread_t * mythread = ctx->mythread; - mythread_queue_list_t *queue_list = (mythread_queue_list_t*)mythread->context; - - if(done_count != queue_list->count) - return false; - - if(opt & MyTHREAD_OPT_STOP) - { - if(mythread_function_see_for_all_done(queue_list, ctx->id)) - { - ctx->opt = MyTHREAD_OPT_STOP; - myhtml_hread_mutex_wait(mythread, ctx); - ctx->opt = MyTHREAD_OPT_UNDEF; - - return false; - } - } - else if(opt & MyTHREAD_OPT_QUIT) - { - if(mythread_function_see_for_all_done(queue_list, ctx->id)) - { - myhtml_hread_mutex_close(mythread, ctx); - ctx->opt = MyTHREAD_OPT_QUIT; - return true; - } - } - - myhtml_thread_nanosleep(timeout); - - return false; -} - -void mythread_function_queue_batch(void *arg) -{ - mythread_context_t *ctx = (mythread_context_t*)arg; - mythread_t * mythread = ctx->mythread; - mythread_queue_list_t *queue_list = (mythread_queue_list_t*)mythread->context; - - const struct timespec timeout = {0, 0}; - myhtml_hread_mutex_wait(mythread, ctx); - - do { - if(mythread->batch_opt & MyTHREAD_OPT_WAIT) { - ctx->opt = MyTHREAD_OPT_WAIT; - - while (mythread->batch_opt & MyTHREAD_OPT_WAIT) { - myhtml_thread_nanosleep(&timeout); - } - - ctx->opt = MyTHREAD_OPT_UNDEF; - } - - mythread_queue_list_entry_t *entry = queue_list->first; - size_t done_count = 0; - - while(entry) - { - mythread_queue_thread_param_t *thread_param = &entry->thread_param[ ctx->id ]; - - if(thread_param->use < entry->queue->nodes_uses) - { - size_t pos = thread_param->use / entry->queue->nodes_size; - size_t len = thread_param->use % entry->queue->nodes_size; - - mythread_queue_node_t *qnode = &entry->queue->nodes[pos][len]; - - if((qnode->tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0) - ctx->func(ctx->id, (void*)qnode); - - thread_param->use += mythread->batch_count; - } - else - done_count++; - - entry = entry->next; - } - - if(done_count == queue_list->count && - mythread_function_see_opt(ctx, mythread->batch_opt, done_count, &timeout)) - break; - } - while (1); -} - -void mythread_function_queue_stream(void *arg) -{ - mythread_context_t *ctx = (mythread_context_t*)arg; - mythread_t * mythread = ctx->mythread; - mythread_queue_list_t *queue_list = (mythread_queue_list_t*)mythread->context; - - const struct timespec timeout = {0, 0}; - myhtml_hread_mutex_wait(mythread, ctx); - - do { - if(mythread->stream_opt & MyTHREAD_OPT_WAIT) { - ctx->opt = MyTHREAD_OPT_WAIT; - - while (mythread->stream_opt & MyTHREAD_OPT_WAIT) { - myhtml_thread_nanosleep(&timeout); - } - - ctx->opt = MyTHREAD_OPT_UNDEF; - } - - mythread_queue_list_entry_t *entry = queue_list->first; - size_t done_count = 0; - - while(entry) - { - mythread_queue_thread_param_t *thread_param = &entry->thread_param[ ctx->id ]; - - if(thread_param->use < entry->queue->nodes_uses) - { - size_t pos = thread_param->use / entry->queue->nodes_size; - size_t len = thread_param->use % entry->queue->nodes_size; - - mythread_queue_node_t *qnode = &entry->queue->nodes[pos][len]; - - if((qnode->tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0) - ctx->func(ctx->id, (void*)qnode); - - thread_param->use++; - } - else - done_count++; - - entry = entry->next; - } - - if(done_count == queue_list->count && - mythread_function_see_opt(ctx, mythread->stream_opt, done_count, &timeout)) - break; - } - while (1); -} - -void mythread_function(void *arg) -{ - mythread_context_t *ctx = (mythread_context_t*)arg; - mythread_t * mythread = ctx->mythread; - - myhtml_hread_mutex_wait(mythread, ctx); - - do { - if(mythread->stream_opt & MyTHREAD_OPT_STOP || ctx->opt & MyTHREAD_OPT_STOP) - { - ctx->opt |= MyTHREAD_OPT_DONE; - myhtml_hread_mutex_wait(mythread, ctx); - - if(mythread->stream_opt & MyTHREAD_OPT_QUIT || ctx->opt & MyTHREAD_OPT_QUIT) - { - myhtml_hread_mutex_close(mythread, ctx); - ctx->opt = MyTHREAD_OPT_QUIT; - break; - } - - ctx->opt = MyTHREAD_OPT_UNDEF; - } - else if(mythread->stream_opt & MyTHREAD_OPT_QUIT || ctx->opt & MyTHREAD_OPT_QUIT) - { - myhtml_hread_mutex_close(mythread, ctx); - ctx->opt = MyTHREAD_OPT_QUIT; - break; - } - - ctx->func(ctx->id, ctx); - } - while (1); -} - -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - - diff --git a/source/myhtml/thread.h b/source/myhtml/thread.h deleted file mode 100644 index 208f6df..0000000 --- a/source/myhtml/thread.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_THREAD_H -#define MyHTML_THREAD_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "myhtml/myosi.h" - -#ifndef MyHTML_BUILD_WITHOUT_THREADS - -#if !defined(IS_OS_WINDOWS) -# include -# include -#endif - -#include -#include - -#include -#include - -#include "myhtml/myhtml.h" -#include "myhtml/tree.h" -#include "myhtml/mystring.h" - -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - -#define MyTHREAD_SEM_NAME "myhtml" - -/* functions */ -typedef void (*mythread_callback_before_join_f)(mythread_t* mythread); -typedef void (*mythread_process_f)(void* arg); -typedef void (*mythread_work_f)(mythread_id_t thread_id, void* arg); - -#ifdef MyHTML_BUILD_WITHOUT_THREADS - -struct mythread { - int sys_last_error; -}; - -#else /* MyHTML_BUILD_WITHOUT_THREADS */ - -void mythread_function_queue_stream(void *arg); -void mythread_function_queue_batch(void *arg); -void mythread_function(void *arg); - -// thread -struct mythread_context { - mythread_id_t id; - -#if defined(IS_OS_WINDOWS) - HANDLE mutex; -#else - pthread_mutex_t *mutex; -#endif - - size_t sem_name_size; - - mythread_work_f func; - - volatile size_t t_count; - volatile mythread_thread_opt_t opt; - - mythread_t *mythread; - unsigned int status; -}; - -struct mythread_list { -#if defined(IS_OS_WINDOWS) - HANDLE pth; -#else - pthread_t pth; -#endif - mythread_context_t data; - mythread_process_f process_func; -}; - -struct mythread_workers_list { - mythread_list_t *list; - size_t count; -}; - -struct mythread { - mythread_list_t *pth_list; - size_t pth_list_length; - size_t pth_list_size; - size_t pth_list_root; - - void *context; - - char *sem_prefix; - size_t sem_prefix_length; - -#if !defined(IS_OS_WINDOWS) - pthread_attr_t *attr; -#endif - - int sys_last_error; - - mythread_id_t batch_first_id; - mythread_id_t batch_count; - - volatile mythread_thread_opt_t stream_opt; - volatile mythread_thread_opt_t batch_opt; -}; - -mythread_id_t myhread_create_stream(mythread_t *mythread, mythread_process_f process_func, mythread_work_f func, mythread_thread_opt_t opt, myhtml_status_t *status); -mythread_id_t myhread_create_batch(mythread_t *mythread, mythread_process_f process_func, mythread_work_f func, mythread_thread_opt_t opt, myhtml_status_t *status, size_t count); - -void myhtml_thread_nanosleep(const struct timespec *tomeout); - -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - -mythread_t * mythread_create(void); -myhtml_status_t mythread_init(mythread_t *mythread, const char *sem_prefix, size_t thread_count); -void mythread_clean(mythread_t *mythread); -mythread_t * mythread_destroy(mythread_t *mythread, mythread_callback_before_join_f before_join, bool self_destroy); - -void mythread_stream_quit_all(mythread_t *mythread); -void mythread_batch_quit_all(mythread_t *mythread); - -void mythread_stream_stop_all(mythread_t *mythread); -void mythread_batch_stop_all(mythread_t *mythread); - -void mythread_stop_all(mythread_t *mythread); -void mythread_queue_wait_all_for_done(mythread_t *mythread); -void mythread_resume_all(mythread_t *mythread); -void mythread_suspend_all(mythread_t *mythread); -unsigned int mythread_check_status(mythread_t *mythread); - -// queue -struct mythread_queue_node { - mythread_queue_node_t *prev; - myhtml_token_node_t *token; - myhtml_tree_t *tree; -}; - -struct mythread_queue_thread_param { - volatile size_t use; -}; - -struct mythread_queue_list_entry { - mythread_queue_list_entry_t *next; - mythread_queue_list_entry_t *prev; - mythread_queue_t *queue; - mythread_queue_thread_param_t *thread_param; -}; - -struct mythread_queue_list { - mythread_queue_list_entry_t *first; - mythread_queue_list_entry_t *last; - - volatile size_t count; -}; - -struct mythread_queue { - mythread_queue_node_t **nodes; - - size_t nodes_pos; - size_t nodes_pos_size; - size_t nodes_length; - - volatile size_t nodes_uses; - volatile size_t nodes_size; - volatile size_t nodes_root; -}; - -mythread_queue_t * mythread_queue_create(size_t size, myhtml_status_t *status); -void mythread_queue_clean(mythread_queue_t* queue); -mythread_queue_t * mythread_queue_destroy(mythread_queue_t* token); - -void mythread_queue_node_clean(mythread_queue_node_t* qnode); - -size_t mythread_queue_count_used_node(mythread_queue_t* queue); -mythread_queue_node_t * mythread_queue_get_first_node(mythread_queue_t* queue); -mythread_queue_node_t * mythread_queue_get_prev_node(mythread_queue_node_t* qnode); -mythread_queue_node_t * mythread_queue_get_current_node(mythread_queue_t* queue); -mythread_queue_node_t * mythread_queue_node_malloc(mythread_t *mythread, mythread_queue_t* queue, myhtml_status_t *status); -mythread_queue_node_t * mythread_queue_node_malloc_limit(mythread_t *mythread, mythread_queue_t* queue, size_t limit, myhtml_status_t *status); - -#ifndef MyHTML_BUILD_WITHOUT_THREADS - -mythread_queue_list_t * mythread_queue_list_create(myhtml_status_t *status); -void mythread_queue_list_destroy(mythread_queue_list_t* queue_list); - -size_t mythread_queue_list_get_count(mythread_queue_list_t* queue_list); - -mythread_queue_list_entry_t * mythread_queue_list_entry_push(mythread_t *mythread, mythread_queue_t *queue, myhtml_status_t *status); -mythread_queue_list_entry_t * mythread_queue_list_entry_delete(mythread_t *mythread, mythread_queue_list_entry_t *entry, bool destroy_queue); -void mythread_queue_list_entry_clean(mythread_t *mythread, mythread_queue_list_entry_t *entry); -void mythread_queue_list_entry_wait_for_done(mythread_t *mythread, mythread_queue_list_entry_t *entry); - -mythread_queue_node_t * mythread_queue_node_malloc_round(mythread_t *mythread, mythread_queue_list_entry_t *entry, myhtml_status_t *status); - -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* defined(__myhtml__myhtml_thread__) */ - diff --git a/source/myhtml/token.c b/source/myhtml/token.c index 1536764..0828dfc 100644 --- a/source/myhtml/token.c +++ b/source/myhtml/token.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -102,7 +102,7 @@ myhtml_token_t * myhtml_token_create(myhtml_tree_t* tree, size_t size) if(size == 0) size = 4096; - myhtml_token_t* token = (myhtml_token_t*)myhtml_malloc(sizeof(myhtml_token_t)); + myhtml_token_t* token = (myhtml_token_t*)mycore_malloc(sizeof(myhtml_token_t)); if(token == NULL) return NULL; @@ -110,15 +110,15 @@ myhtml_token_t * myhtml_token_create(myhtml_tree_t* tree, size_t size) token->nodes_obj = mcobject_async_create(); if(token->nodes_obj == NULL) { - free(token); + mycore_free(token); return NULL; } token->attr_obj = mcobject_async_create(); if(token->attr_obj == NULL) { - free(token->nodes_obj); - free(token); + mycore_free(token->nodes_obj); + mycore_free(token); return NULL; } @@ -157,7 +157,7 @@ myhtml_token_t * myhtml_token_destroy(myhtml_token_t* token) if(token->attr_obj) token->attr_obj = mcobject_async_destroy(token->attr_obj, 1); - myhtml_free(token); + mycore_free(token); return NULL; } @@ -177,7 +177,7 @@ void myhtml_token_node_clean(myhtml_token_node_t* node) memset(node, 0, sizeof(myhtml_token_node_t)); node->type = MyHTML_TOKEN_TYPE_OPEN|MyHTML_TOKEN_TYPE_WHITESPACE; - myhtml_string_clean_all(&node->str); + mycore_string_clean_all(&node->str); } myhtml_token_attr_t * myhtml_token_attr_create(myhtml_token_t* token, size_t async_node_id) @@ -195,8 +195,8 @@ void myhtml_token_attr_clean(myhtml_token_attr_t* attr) memset(attr, 0, sizeof(myhtml_token_attr_t)); attr->ns = MyHTML_NAMESPACE_HTML; - myhtml_string_clean_all(&attr->key); - myhtml_string_clean_all(&attr->value); + mycore_string_clean_all(&attr->key); + mycore_string_clean_all(&attr->value); } myhtml_tag_id_t myhtml_token_node_tag_id(myhtml_token_node_t *token_node) @@ -238,7 +238,7 @@ const char * myhtml_token_node_text(myhtml_token_node_t *token_node, size_t *len return token_node->str.data; } -myhtml_string_t * myhtml_token_node_string(myhtml_token_node_t *token_node) +mycore_string_t * myhtml_token_node_string(myhtml_token_node_t *token_node) { return &token_node->str; } @@ -253,16 +253,18 @@ bool myhtml_token_node_is_close_self(myhtml_token_node_t *token_node) return (token_node->type & MyHTML_TOKEN_TYPE_CLOSE_SELF); } -void myhtml_token_node_wait_for_done(myhtml_token_node_t* node) +void myhtml_token_node_wait_for_done(myhtml_token_t* token, myhtml_token_node_t* node) { -#ifndef MyHTML_BUILD_WITHOUT_THREADS - - const struct timespec timeout = {0, 0}; - while((node->type & MyHTML_TOKEN_TYPE_DONE) == 0) {myhtml_thread_nanosleep(&timeout);} - +#ifndef MyCORE_BUILD_WITHOUT_THREADS + while((node->type & MyHTML_TOKEN_TYPE_DONE) == 0) {mythread_nanosleep_sleep(token->tree->myhtml->thread_stream->timespec);} #endif } +void myhtml_token_set_done(myhtml_token_node_t* node) +{ + node->type |= MyHTML_TOKEN_TYPE_DONE; +} + myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_token_node_t* node, size_t token_thread_idx, size_t attr_thread_idx) { if(node == NULL) @@ -283,7 +285,7 @@ myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_toke new_node->element_begin = node->element_begin; new_node->element_length = node->element_length; - myhtml_string_init(tree->mchar, tree->mchar_node_id, &new_node->str, node->str.size); + mycore_string_init(tree->mchar, tree->mchar_node_id, &new_node->str, node->str.size); myhtml_token_node_attr_copy(token, node, new_node, attr_thread_idx); return new_node; @@ -291,10 +293,10 @@ myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_toke void myhtml_token_node_text_append(myhtml_token_t* token, myhtml_token_node_t* dest, const char* text, size_t text_len) { - myhtml_string_init(token->tree->mchar, token->tree->mchar_node_id, &dest->str, (text_len + 2)); + mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &dest->str, (text_len + 2)); - myhtml_string_t* string = &dest->str; - myhtml_string_append(string, text, text_len); + mycore_string_t* string = &dest->str; + mycore_string_append(string, text, text_len); } myhtml_token_attr_t * myhtml_token_node_attr_append(myhtml_token_t* token, myhtml_token_node_t* dest, @@ -305,18 +307,18 @@ myhtml_token_attr_t * myhtml_token_node_attr_append(myhtml_token_t* token, myhtm new_attr->next = 0; if(key_len) { - myhtml_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1)); - myhtml_string_append_lowercase(&new_attr->key, key, key_len); + mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1)); + mycore_string_append_lowercase(&new_attr->key, key, key_len); } else - myhtml_string_clean_all(&new_attr->key); + mycore_string_clean_all(&new_attr->key); if(value_len) { - myhtml_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1)); - myhtml_string_append(&new_attr->value, value, value_len); + mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1)); + mycore_string_append(&new_attr->value, value, value_len); } else - myhtml_string_clean_all(&new_attr->value); + mycore_string_clean_all(&new_attr->value); if(dest->attr_first == NULL) { new_attr->prev = 0; @@ -339,32 +341,32 @@ myhtml_token_attr_t * myhtml_token_node_attr_append(myhtml_token_t* token, myhtm myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml_token_t* token, myhtml_token_node_t* dest, const char* key, size_t key_len, const char* value, size_t value_len, - size_t thread_idx, myhtml_encoding_t encoding) + size_t thread_idx, myencoding_t encoding) { myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL); new_attr->next = 0; if(key_len) { - myhtml_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1)); + mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1)); - if(encoding == MyHTML_ENCODING_UTF_8) - myhtml_string_append_lowercase(&new_attr->key, key, key_len); + if(encoding == MyENCODING_UTF_8) + mycore_string_append_lowercase(&new_attr->key, key, key_len); else - myhtml_string_append_lowercase_ascii_with_convert_encoding(&new_attr->key, key, key_len, encoding); + myencoding_string_append_lowercase_ascii(&new_attr->key, key, key_len, encoding); } else - myhtml_string_clean_all(&new_attr->key); + mycore_string_clean_all(&new_attr->key); if(value_len) { - myhtml_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1)); + mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1)); - if(encoding == MyHTML_ENCODING_UTF_8) - myhtml_string_append(&new_attr->value, value, value_len); + if(encoding == MyENCODING_UTF_8) + mycore_string_append(&new_attr->value, value, value_len); else - myhtml_string_append_with_convert_encoding(&new_attr->value, value, value_len, encoding); + myencoding_string_append(&new_attr->value, value, value_len, encoding); } else - myhtml_string_clean_all(&new_attr->value); + mycore_string_clean_all(&new_attr->value); if(dest->attr_first == NULL) { new_attr->prev = 0; @@ -415,18 +417,18 @@ bool myhtml_token_attr_copy(myhtml_token_t* token, myhtml_token_attr_t* attr, my new_attr->next = 0; if(attr->key.length) { - myhtml_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (attr->key.length + 1)); - myhtml_string_append_lowercase(&new_attr->key, attr->key.data, attr->key.length); + mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (attr->key.length + 1)); + mycore_string_append_lowercase(&new_attr->key, attr->key.data, attr->key.length); } else - myhtml_string_clean_all(&new_attr->key); + mycore_string_clean_all(&new_attr->key); if(attr->value.length) { - myhtml_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (attr->value.length + 1)); - myhtml_string_append(&new_attr->value, attr->value.data, attr->value.length); + mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (attr->value.length + 1)); + mycore_string_append(&new_attr->value, attr->value.data, attr->value.length); } else - myhtml_string_clean_all(&new_attr->value); + mycore_string_clean_all(&new_attr->value); if(dest->attr_first == NULL) { new_attr->prev = 0; @@ -455,8 +457,8 @@ myhtml_token_attr_t * myhtml_token_attr_match(myhtml_token_t* token, myhtml_toke { if(attr->key.length == key_size && attr->value.length == value_size) { - if((myhtml_strcmp(attr->key.data, key) == 0)) { - if((myhtml_strcmp(attr->value.data, value) == 0)) + if((mycore_strcmp(attr->key.data, key) == 0)) { + if((mycore_strcmp(attr->value.data, value) == 0)) return attr; else return NULL; @@ -478,8 +480,8 @@ myhtml_token_attr_t * myhtml_token_attr_match_case(myhtml_token_t* token, myhtml { if(attr->key.length == key_size && attr->value.length == value_size) { - if((myhtml_strcmp(attr->key.data, key) == 0)) { - if((myhtml_strcasecmp(attr->value.data, value) == 0)) + if((mycore_strcmp(attr->key.data, key) == 0)) { + if((mycore_strcasecmp(attr->value.data, value) == 0)) return attr; else return NULL; @@ -504,7 +506,7 @@ void myhtml_token_adjust_mathml_attributes(myhtml_token_node_t* target) void _myhtml_token_create_copy_srt(myhtml_token_t* token, const char* from, size_t from_size, char** to) { *to = mchar_async_malloc(token->tree->mchar, token->tree->mchar_node_id, (from_size + 2)); - myhtml_string_raw_copy(*to, from, from_size); + mycore_string_raw_copy(*to, from, from_size); } void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_tree_doctype_t* return_doctype) @@ -514,7 +516,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no if(attr && attr->key.length) { _myhtml_token_create_copy_srt(token, attr->key.data, attr->key.length, &return_doctype->attr_name); - if(myhtml_strcmp("html", return_doctype->attr_name)) + if(mycore_strcmp("html", return_doctype->attr_name)) return_doctype->is_html = false; else return_doctype->is_html = true; @@ -525,11 +527,11 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no _myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_name); if(return_doctype->attr_public) - myhtml_free(return_doctype->attr_public); + mycore_free(return_doctype->attr_public); return_doctype->attr_public = NULL; if(return_doctype->attr_system) - myhtml_free(return_doctype->attr_system); + mycore_free(return_doctype->attr_system); return_doctype->attr_system = NULL; return; @@ -539,7 +541,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no if(attr && attr->value.length) { - if(myhtml_strcasecmp(attr->value.data, "PUBLIC") == 0) + if(mycore_strcasecmp(attr->value.data, "PUBLIC") == 0) { // try see public attr = attr->next; @@ -554,22 +556,22 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no _myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_system); else { if(return_doctype->attr_system) - myhtml_free(return_doctype->attr_system); + mycore_free(return_doctype->attr_system); _myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_system); } } else { if(return_doctype->attr_public) - myhtml_free(return_doctype->attr_public); + mycore_free(return_doctype->attr_public); return_doctype->attr_public = NULL; if(return_doctype->attr_system) - myhtml_free(return_doctype->attr_system); + mycore_free(return_doctype->attr_system); return_doctype->attr_system = NULL; } } - else if(myhtml_strncasecmp(attr->value.data, "SYSTEM", attr->value.length) == 0) + else if(mycore_strncasecmp(attr->value.data, "SYSTEM", attr->value.length) == 0) { attr = attr->next; @@ -579,21 +581,21 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no } else { if(return_doctype->attr_public) - myhtml_free(return_doctype->attr_public); + mycore_free(return_doctype->attr_public); return_doctype->attr_public = NULL; if(return_doctype->attr_system) - myhtml_free(return_doctype->attr_system); + mycore_free(return_doctype->attr_system); return_doctype->attr_system = NULL; } } else { if(return_doctype->attr_public) - myhtml_free(return_doctype->attr_public); + mycore_free(return_doctype->attr_public); return_doctype->attr_public = NULL; if(return_doctype->attr_system) - myhtml_free(return_doctype->attr_system); + mycore_free(return_doctype->attr_system); return_doctype->attr_system = NULL; } } @@ -601,14 +603,14 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no bool myhtml_token_doctype_check_html_4_0(myhtml_tree_doctype_t* return_doctype) { - return myhtml_strcmp(return_doctype->attr_public, "-//W3C//DTD HTML 4.0//EN") && - (return_doctype->attr_system == NULL || myhtml_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/REC-html40/strict.dtd")); + return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD HTML 4.0//EN") && + (return_doctype->attr_system == NULL || mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/REC-html40/strict.dtd")); } bool myhtml_token_doctype_check_html_4_01(myhtml_tree_doctype_t* return_doctype) { - return myhtml_strcmp(return_doctype->attr_public, "-//W3C//DTD HTML 4.01//EN") && - (return_doctype->attr_system == NULL || myhtml_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/html4/strict.dtd")); + return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD HTML 4.01//EN") && + (return_doctype->attr_system == NULL || mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/html4/strict.dtd")); } bool myhtml_token_doctype_check_xhtml_1_0(myhtml_tree_doctype_t* return_doctype) @@ -616,8 +618,8 @@ bool myhtml_token_doctype_check_xhtml_1_0(myhtml_tree_doctype_t* return_doctype) if(return_doctype->attr_system == NULL) return true; - return myhtml_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.0 Strict//EN") && - myhtml_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"); + return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.0 Strict//EN") && + mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"); } bool myhtml_token_doctype_check_xhtml_1_1(myhtml_tree_doctype_t* return_doctype) @@ -625,8 +627,8 @@ bool myhtml_token_doctype_check_xhtml_1_1(myhtml_tree_doctype_t* return_doctype) if(return_doctype->attr_system == NULL) return true; - return myhtml_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.1//EN") && - myhtml_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"); + return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.1//EN") && + mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"); } bool myhtml_token_release_and_check_doctype_attributes(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_tree_doctype_t* return_doctype) @@ -641,7 +643,7 @@ bool myhtml_token_release_and_check_doctype_attributes(myhtml_token_t* token, my if((return_doctype->is_html || return_doctype->attr_public || - (return_doctype->attr_system && myhtml_strcmp(return_doctype->attr_system, "about:legacy-compat")))) + (return_doctype->attr_system && mycore_strcmp(return_doctype->attr_system, "about:legacy-compat")))) { if(return_doctype->attr_public == NULL) return false; @@ -669,8 +671,8 @@ void myhtml_token_adjust_svg_attributes(myhtml_token_node_t* target) myhtml_token_attr_svg_replacement[i].from_size); if(attr) { - myhtml_string_clean(&attr->key); - myhtml_string_append(&attr->key, myhtml_token_attr_svg_replacement[i].to, + mycore_string_clean(&attr->key); + mycore_string_append(&attr->key, myhtml_token_attr_svg_replacement[i].to, myhtml_token_attr_svg_replacement[i].to_size); } } @@ -686,8 +688,8 @@ void myhtml_token_adjust_foreign_attributes(myhtml_token_node_t* target) myhtml_token_attr_namespace_replacement[i].from_size); if(attr) { - myhtml_string_clean(&attr->key); - myhtml_string_append(&attr->key, myhtml_token_attr_namespace_replacement[i].to, + mycore_string_clean(&attr->key); + mycore_string_append(&attr->key, myhtml_token_attr_namespace_replacement[i].to, myhtml_token_attr_namespace_replacement[i].to_size); attr->ns = myhtml_token_attr_namespace_replacement[i].ns; @@ -708,10 +710,10 @@ bool myhtml_token_attr_compare(myhtml_token_node_t* target, myhtml_token_node_t* if(target_attr->key.length == dest_attr->key.length && target_attr->value.length == dest_attr->value.length) { - if(myhtml_strcmp(target_attr->key.data, dest_attr->key.data) != 0) + if(mycore_strcmp(target_attr->key.data, dest_attr->key.data) != 0) break; - if(myhtml_strcasecmp(target_attr->value.data, dest_attr->value.data) != 0) + if(mycore_strcasecmp(target_attr->value.data, dest_attr->value.data) != 0) break; } else @@ -734,7 +736,7 @@ myhtml_token_attr_t * myhtml_token_attr_by_name(myhtml_token_node_t* node, const while (attr) { if(name_length == attr->key.length) { - if(myhtml_strcmp(attr->key.data, name) == 0) + if(mycore_strcmp(attr->key.data, name) == 0) break; } @@ -803,11 +805,11 @@ myhtml_token_attr_t * myhtml_token_attr_remove_by_name(myhtml_token_node_t* node myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree, myhtml_token_node_t* token_to, myhtml_token_node_t* token_from, bool cp_reverse) { - myhtml_token_node_wait_for_done(token_to); - myhtml_token_node_wait_for_done(token_from); + myhtml_token_node_wait_for_done(tree->token, token_to); + myhtml_token_node_wait_for_done(tree->token, token_from); - myhtml_string_t *string1 = &token_to->str; - myhtml_string_t *string2 = &token_from->str; + mycore_string_t *string1 = &token_to->str; + mycore_string_t *string2 = &token_from->str; token_to->raw_begin = 0; token_to->raw_length = 0; @@ -815,10 +817,10 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree, if(token_to->str.node_idx == tree->mchar_node_id) { if(cp_reverse) { - //myhtml_string_copy(string2, &string_base); + //mycore_string_copy(string2, &string_base); } else { - myhtml_string_copy(string1, string2); + mycore_string_copy(string1, string2); } return token_to; @@ -826,25 +828,25 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree, if(token_from->str.node_idx == tree->mchar_node_id) { if(cp_reverse) { - myhtml_string_copy(string2, string1); + mycore_string_copy(string2, string1); } else { - myhtml_string_copy(string1, string2); + mycore_string_copy(string1, string2); } return token_from; } else { - myhtml_string_t string_base; - myhtml_string_init(tree->mchar, tree->mchar_node_id, &string_base, (string1->length + string2->length + 2)); + mycore_string_t string_base; + mycore_string_init(tree->mchar, tree->mchar_node_id, &string_base, (string1->length + string2->length + 2)); if(cp_reverse) { - myhtml_string_copy(&string_base, string2); - myhtml_string_copy(&string_base, string1); + mycore_string_copy(&string_base, string2); + mycore_string_copy(&string_base, string1); } else { - myhtml_string_copy(&string_base, string1); - myhtml_string_copy(&string_base, string2); + mycore_string_copy(&string_base, string1); + mycore_string_copy(&string_base, string2); } token_to->str = string_base; @@ -855,117 +857,13 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree, void myhtml_token_set_replacement_character_for_null_token(myhtml_tree_t* tree, myhtml_token_node_t* node) { - myhtml_token_node_wait_for_done(node); + myhtml_token_node_wait_for_done(tree->token, node); - myhtml_string_t new_str; - myhtml_string_init(tree->mchar, tree->mchar_node_id, &new_str, (node->str.length + 2)); + mycore_string_t new_str; + mycore_string_init(tree->mchar, tree->mchar_node_id, &new_str, (node->str.length + 2)); - myhtml_string_append_with_replacement_null_characters_only(&new_str, node->str.data, node->str.length); + mycore_string_append_with_replacement_null_characters(&new_str, node->str.data, node->str.length); node->str = new_str; } -void myhtml_token_set_done(myhtml_token_node_t* node) -{ - node->type |= MyHTML_TOKEN_TYPE_DONE; -} - -void myhtml_token_print_param_by_idx(myhtml_tree_t* myhtml_tree, myhtml_token_node_t* node, FILE* out) -{ - if(node->type & MyHTML_TOKEN_TYPE_CLOSE) { - fprintf(out, "tag_id, node->raw_begin, node->raw_length, - (size_t)node->attr_first, (size_t)node->attr_last); - - if(node->type & MyHTML_TOKEN_TYPE_CLOSE_SELF) { - fprintf(out, " />\n"); - } - else { - fprintf(out, ">\n"); - } -} - -void myhtml_token_print_by_idx(myhtml_tree_t* tree, myhtml_token_node_t* node, FILE* out) -{ - const myhtml_tag_context_t *ctx = myhtml_tag_get_by_id(tree->tags, node->tag_id); - - if(node->tag_id == MyHTML_TAG__TEXT || - node->tag_id == MyHTML_TAG__COMMENT) - { - if(node->str.length) { - fprintf(out, "%.*s: %.*s\n", (int)ctx->name_length, ctx->name, - (int)node->str.length, node->str.data); - } - else { - fprintf(out, "%.*s is empty\n", (int)ctx->name_length, ctx->name); - } - } - else - { - if(node->type & MyHTML_TOKEN_TYPE_CLOSE) { - fprintf(out, "name_length, ctx->name, node->tag_id); - - myhtml_token_print_attr(tree, node, out); - - if(node->type & MyHTML_TOKEN_TYPE_CLOSE_SELF) { - fprintf(out, " />\n"); - } - else { - fprintf(out, ">\n"); - } - } -} - -void myhtml_token_print_attr(myhtml_tree_t* tree, myhtml_token_node_t* node, FILE* out) -{ - myhtml_token_attr_t* attr = node->attr_first; - - while(attr) - { - fprintf(out, " %s", attr->key.data); - - if(attr->ns != MyHTML_NAMESPACE_HTML) - { - switch (attr->ns) { - case MyHTML_NAMESPACE_SVG: - fprintf(out, ":svg"); - break; - case MyHTML_NAMESPACE_MATHML: - fprintf(out, ":math"); - break; - case MyHTML_NAMESPACE_XLINK: - fprintf(out, ":xlink"); - break; - case MyHTML_NAMESPACE_XML: - fprintf(out, ":xml"); - break; - case MyHTML_NAMESPACE_XMLNS: - fprintf(out, ":xmlns"); - break; - default: - fprintf(out, ":UNDEF"); - break; - } - } - - if(attr->value.length) { - fprintf(out, "=\"%s\"", attr->value.data); - } - - attr = attr->next; - } -} - - diff --git a/source/myhtml/token.h b/source/myhtml/token.h index 0f3de04..3936f4a 100644 --- a/source/myhtml/token.h +++ b/source/myhtml/token.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -29,13 +29,13 @@ extern "C" { #include #include "myhtml/myosi.h" -#include "myhtml/utils.h" +#include "mycore/utils.h" #include "myhtml/tag.h" #include "myhtml/myhtml.h" #include "myhtml/mystring.h" -#include "myhtml/utils/mcobject_async.h" -#include "myhtml/utils/mchar_async.h" -#include "myhtml/utils/mcsync.h" +#include "mycore/utils/mcobject_async.h" +#include "mycore/utils/mchar_async.h" +#include "mycore/utils/mcsync.h" #define myhtml_token_node_set_done(token_node) token_node->type |= MyHTML_TOKEN_TYPE_DONE @@ -61,8 +61,8 @@ struct myhtml_token_attr { myhtml_token_attr_t* next; myhtml_token_attr_t* prev; - myhtml_string_t key; - myhtml_string_t value; + mycore_string_t key; + mycore_string_t value; size_t raw_key_begin; size_t raw_key_length; @@ -75,7 +75,7 @@ struct myhtml_token_attr { struct myhtml_token_node { myhtml_tag_id_t tag_id; - myhtml_string_t str; + mycore_string_t str; size_t raw_begin; size_t raw_length; @@ -115,7 +115,7 @@ myhtml_tree_attr_t * myhtml_token_node_attribute_first(myhtml_token_node_t *toke myhtml_tree_attr_t * myhtml_token_node_attribute_last(myhtml_token_node_t *token_node); const char * myhtml_token_node_text(myhtml_token_node_t *token_node, size_t *length); -myhtml_string_t * myhtml_token_node_string(myhtml_token_node_t *token_node); +mycore_string_t * myhtml_token_node_string(myhtml_token_node_t *token_node); bool myhtml_token_node_is_close(myhtml_token_node_t *token_node); bool myhtml_token_node_is_close_self(myhtml_token_node_t *token_node); @@ -130,7 +130,7 @@ myhtml_token_attr_t * myhtml_token_attr_remove_by_name(myhtml_token_node_t* node void myhtml_token_attr_delete_all(myhtml_token_t* token, myhtml_token_node_t* node); void myhtml_token_delete(myhtml_token_t* token, myhtml_token_node_t* node); -void myhtml_token_node_wait_for_done(myhtml_token_node_t* node); +void myhtml_token_node_wait_for_done(myhtml_token_t* token, myhtml_token_node_t* node); void myhtml_token_set_done(myhtml_token_node_t* node); myhtml_token_attr_t * myhtml_token_attr_match(myhtml_token_t* token, myhtml_token_node_t* target, const char* key, size_t key_size, const char* value, size_t value_size); @@ -143,7 +143,7 @@ void myhtml_token_adjust_svg_attributes(myhtml_token_node_t* target); void myhtml_token_adjust_foreign_attributes(myhtml_token_node_t* target); myhtml_token_attr_t * myhtml_token_node_attr_append(myhtml_token_t* token, myhtml_token_node_t* dest, const char* key, size_t key_len, const char* value, size_t value_len, size_t thread_idx); -myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml_token_t* token, myhtml_token_node_t* dest, const char* key, size_t key_len, const char* value, size_t value_len, size_t thread_idx, myhtml_encoding_t encoding); +myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml_token_t* token, myhtml_token_node_t* dest, const char* key, size_t key_len, const char* value, size_t value_len, size_t thread_idx, myencoding_t encoding); void myhtml_token_node_text_append(myhtml_token_t* token, myhtml_token_node_t* dest, const char* text, size_t text_len); void myhtml_token_node_attr_copy(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_token_node_t* dest, size_t thread_idx); void myhtml_token_node_attr_copy_with_check(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_token_node_t* dest, size_t thread_idx); @@ -154,10 +154,6 @@ bool myhtml_token_attr_compare(myhtml_token_node_t* target, myhtml_token_node_t* myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree, myhtml_token_node_t* token_to, myhtml_token_node_t* token_from, bool cp_reverse); void myhtml_token_set_replacement_character_for_null_token(myhtml_tree_t* tree, myhtml_token_node_t* node); -void myhtml_token_print_param_by_idx(myhtml_tree_t* myhtml_tree, myhtml_token_node_t* node, FILE* out); -void myhtml_token_print_by_idx(myhtml_tree_t* myhtml_tree, myhtml_token_node_t* node, FILE* out); -void myhtml_token_print_attr(myhtml_tree_t* myhtml_tree, myhtml_token_node_t* node, FILE* out); - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/source/myhtml/tokenizer.c b/source/myhtml/tokenizer.c index 1740a71..1dca519 100644 --- a/source/myhtml/tokenizer.c +++ b/source/myhtml/tokenizer.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -19,14 +19,14 @@ */ #include "myhtml/tokenizer.h" -#include "myhtml/utils/resources.h" +#include "mycore/utils/resources.h" -myhtml_status_t myhtml_tokenizer_set_first_settings(myhtml_tree_t* tree, const char* html, size_t html_length) +mystatus_t myhtml_tokenizer_set_first_settings(myhtml_tree_t* tree, const char* html, size_t html_length) { tree->current_qnode = mythread_queue_get_current_node(tree->queue); mythread_queue_node_clean(tree->current_qnode); - tree->current_qnode->tree = tree; + tree->current_qnode->context = tree; tree->current_token_node = myhtml_token_node_create(tree->token, tree->token->mcasync_token_id); if(tree->current_token_node == NULL) @@ -37,20 +37,20 @@ myhtml_status_t myhtml_tokenizer_set_first_settings(myhtml_tree_t* tree, const c return MyHTML_STATUS_OK; } -myhtml_status_t myhtml_tokenizer_begin(myhtml_tree_t* tree, const char* html, size_t html_length) +mystatus_t myhtml_tokenizer_begin(myhtml_tree_t* tree, const char* html, size_t html_length) { return myhtml_tokenizer_chunk(tree, html, html_length); } -myhtml_status_t myhtml_tokenizer_chunk_process(myhtml_tree_t* tree, const char* html, size_t html_length) +mystatus_t myhtml_tokenizer_chunk_process(myhtml_tree_t* tree, const char* html, size_t html_length) { myhtml_t* myhtml = tree->myhtml; myhtml_tokenizer_state_f* state_f = myhtml->parse_state_func; // add for a chunk - tree->incoming_buf = myhtml_incoming_buffer_add(tree->incoming_buf, tree->mcobject_incoming_buf, html, html_length); + tree->incoming_buf = mycore_incoming_buffer_add(tree->incoming_buf, tree->mcobject_incoming_buf, html, html_length); -#ifndef MyHTML_BUILD_WITHOUT_THREADS +#ifndef MyCORE_BUILD_WITHOUT_THREADS if(myhtml->opt & MyHTML_OPTIONS_PARSE_MODE_SINGLE) tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE; @@ -58,8 +58,10 @@ myhtml_status_t myhtml_tokenizer_chunk_process(myhtml_tree_t* tree, const char* if((tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0) { if(tree->queue_entry == NULL) { - myhtml_status_t status = MyHTML_STATUS_OK; - tree->queue_entry = mythread_queue_list_entry_push(myhtml->thread, tree->queue, &status); + mystatus_t status = MyHTML_STATUS_OK; + tree->queue_entry = mythread_queue_list_entry_push(myhtml->thread_list, 2, + myhtml->thread_stream->context, tree->queue, + myhtml->thread_total, &status); if(status) return status; @@ -89,10 +91,10 @@ myhtml_status_t myhtml_tokenizer_chunk_process(myhtml_tree_t* tree, const char* return MyHTML_STATUS_OK; } -myhtml_status_t myhtml_tokenizer_chunk(myhtml_tree_t* tree, const char* html, size_t html_length) +mystatus_t myhtml_tokenizer_chunk(myhtml_tree_t* tree, const char* html, size_t html_length) { - if(tree->encoding_usereq == MyHTML_ENCODING_UTF_16LE || - tree->encoding_usereq == MyHTML_ENCODING_UTF_16BE) + if(tree->encoding_usereq == MyENCODING_UTF_16LE || + tree->encoding_usereq == MyENCODING_UTF_16BE) { return myhtml_tokenizer_chunk_with_stream_buffer(tree, html, html_length); } @@ -100,10 +102,10 @@ myhtml_status_t myhtml_tokenizer_chunk(myhtml_tree_t* tree, const char* html, si return myhtml_tokenizer_chunk_process(tree, html, html_length); } -myhtml_status_t myhtml_tokenizer_chunk_with_stream_buffer(myhtml_tree_t* tree, const char* html, size_t html_length) +mystatus_t myhtml_tokenizer_chunk_with_stream_buffer(myhtml_tree_t* tree, const char* html, size_t html_length) { unsigned const char* u_html = (unsigned const char*)html; - const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(tree->encoding); + const myencoding_custom_f func = myencoding_get_function_by_id(tree->encoding); if(tree->stream_buffer == NULL) { tree->stream_buffer = myhtml_stream_buffer_create(); @@ -111,7 +113,7 @@ myhtml_status_t myhtml_tokenizer_chunk_with_stream_buffer(myhtml_tree_t* tree, c if(tree->stream_buffer == NULL) return MyHTML_STATUS_STREAM_BUFFER_ERROR_CREATE; - myhtml_status_t status = myhtml_stream_buffer_init(tree->stream_buffer, 1024); + mystatus_t status = myhtml_stream_buffer_init(tree->stream_buffer, 1024); if(status) return status; @@ -127,11 +129,11 @@ myhtml_status_t myhtml_tokenizer_chunk_with_stream_buffer(myhtml_tree_t* tree, c for (size_t i = 0; i < html_length; i++) { - if(func(u_html[i], &stream_buffer->res) == MyHTML_ENCODING_STATUS_OK) + if(func(u_html[i], &stream_buffer->res) == MyENCODING_STATUS_OK) { if((stream_entry->length + 4) >= stream_entry->size) { - tree->encoding = MyHTML_ENCODING_UTF_8; + tree->encoding = MyENCODING_UTF_8; myhtml_tokenizer_chunk_process(tree, &stream_entry->data[temp_curr_pos], (stream_entry->length - temp_curr_pos)); stream_entry = myhtml_stream_buffer_add_entry(stream_buffer, (4096 * 4)); @@ -142,19 +144,19 @@ myhtml_status_t myhtml_tokenizer_chunk_with_stream_buffer(myhtml_tree_t* tree, c temp_curr_pos = stream_entry->length; } - stream_entry->length += myhtml_encoding_codepoint_to_ascii_utf_8(stream_buffer->res.result, &stream_entry->data[ stream_entry->length ]); + stream_entry->length += myencoding_codepoint_to_ascii_utf_8(stream_buffer->res.result, &stream_entry->data[ stream_entry->length ]); } } if((stream_entry->length - temp_curr_pos)) { - tree->encoding = MyHTML_ENCODING_UTF_8; + tree->encoding = MyENCODING_UTF_8; myhtml_tokenizer_chunk_process(tree, &stream_entry->data[temp_curr_pos], (stream_entry->length - temp_curr_pos)); } return MyHTML_STATUS_OK; } -myhtml_status_t myhtml_tokenizer_end(myhtml_tree_t* tree) +mystatus_t myhtml_tokenizer_end(myhtml_tree_t* tree) { if(tree->incoming_buf) { @@ -170,21 +172,24 @@ myhtml_status_t myhtml_tokenizer_end(myhtml_tree_t* tree) tree->tokenizer_status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; } - myhtml_status_t status = tree->tokenizer_status; + mystatus_t status = tree->tokenizer_status; -#ifndef MyHTML_BUILD_WITHOUT_THREADS +#ifndef MyCORE_BUILD_WITHOUT_THREADS if((tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0) { - mythread_queue_list_entry_wait_for_done(tree->myhtml->thread, tree->queue_entry); - tree->queue_entry = mythread_queue_list_entry_delete(tree->myhtml->thread, tree->queue_entry, false); + mythread_queue_list_entry_wait_for_done(tree->myhtml->thread_stream, tree->queue_entry); - if(mythread_queue_list_get_count(tree->myhtml->thread->context) == 0) { + tree->queue_entry = mythread_queue_list_entry_delete(tree->myhtml->thread_list, 2, + tree->myhtml->thread_stream->context, + tree->queue_entry, false); + + /* Further, any work with tree... */ + if(mythread_queue_list_get_count(tree->myhtml->thread_stream->context) == 0) myhtml_tokenizer_pause(tree); - } if(status == MyHTML_STATUS_OK) - status = mythread_check_status(tree->myhtml->thread); + status = mythread_check_status(tree->myhtml->thread_stream); } #endif @@ -238,17 +243,32 @@ myhtml_tree_node_t * myhtml_tokenizer_fragment_init(myhtml_tree_t* tree, myhtml_ void myhtml_tokenizer_wait(myhtml_tree_t* tree) { - mythread_queue_wait_all_for_done(tree->myhtml->thread); +#ifndef MyCORE_BUILD_WITHOUT_THREADS + if(tree->myhtml->thread_stream) + mythread_queue_list_entry_wait_for_done(tree->myhtml->thread_stream, tree->queue_entry); +#endif } void myhtml_tokenizer_post(myhtml_tree_t* tree) { - mythread_resume_all(tree->myhtml->thread); +#ifndef MyCORE_BUILD_WITHOUT_THREADS + if(tree->myhtml->thread_stream) + mythread_resume(tree->myhtml->thread_stream, MyTHREAD_OPT_UNDEF); + + if(tree->myhtml->thread_batch) + mythread_resume(tree->myhtml->thread_batch, MyTHREAD_OPT_UNDEF); +#endif } void myhtml_tokenizer_pause(myhtml_tree_t* tree) { - mythread_stop_all(tree->myhtml->thread); +#ifndef MyCORE_BUILD_WITHOUT_THREADS + if(tree->myhtml->thread_stream) + mythread_stop(tree->myhtml->thread_stream); + + if(tree->myhtml->thread_batch) + mythread_stop(tree->myhtml->thread_batch); +#endif } void myhtml_tokenizer_calc_current_namespace(myhtml_tree_t* tree, myhtml_token_node_t* token_node) @@ -362,8 +382,8 @@ size_t myhtml_tokenizer_state_rcdata(myhtml_tree_t* tree, myhtml_token_node_t* t mythread_queue_node_t* prev_qnode = mythread_queue_get_prev_node(tree->current_qnode); - if(prev_qnode && prev_qnode->token) { - tree->tmp_tag_id = prev_qnode->token->tag_id; + if(prev_qnode && prev_qnode->args) { + tree->tmp_tag_id = ((myhtml_token_node_t*)(prev_qnode->args))->tag_id; } else if(tree->fragment) { tree->tmp_tag_id = tree->fragment->tag_id; @@ -404,7 +424,7 @@ size_t myhtml_tokenizer_state_rcdata_less_than_sign(myhtml_tree_t* tree, myhtml_ size_t myhtml_tokenizer_state_rcdata_end_tag_open(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size) { - if(myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyHTML_TOKENIZER_CHAR_A_Z_a_z) + if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_A_Z_a_z) { myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA_END_TAG_NAME; } @@ -467,7 +487,7 @@ size_t myhtml_tokenizer_state_rcdata_end_tag_name(myhtml_tree_t* tree, myhtml_to while(html_offset < html_size) { - if(myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyHTML_TOKENIZER_CHAR_WHITESPACE) + if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_WHITESPACE) { if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RCDATA)) { myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME; @@ -515,7 +535,7 @@ size_t myhtml_tokenizer_state_rcdata_end_tag_name(myhtml_tree_t* tree, myhtml_to break; } - else if (myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyHTML_TOKENIZER_CHAR_A_Z_a_z) { + else if (mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyCORE_STRING_MAP_CHAR_A_Z_a_z) { myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RCDATA; break; } @@ -538,8 +558,8 @@ size_t myhtml_tokenizer_state_rawtext(myhtml_tree_t* tree, myhtml_token_node_t* mythread_queue_node_t* prev_qnode = mythread_queue_get_prev_node(tree->current_qnode); - if(prev_qnode && prev_qnode->token) { - tree->tmp_tag_id = prev_qnode->token->tag_id; + if(prev_qnode && prev_qnode->args) { + tree->tmp_tag_id = ((myhtml_token_node_t*)prev_qnode->args)->tag_id; } else if(tree->fragment) { tree->tmp_tag_id = tree->fragment->tag_id; @@ -581,7 +601,7 @@ size_t myhtml_tokenizer_state_rawtext_less_than_sign(myhtml_tree_t* tree, myhtml size_t myhtml_tokenizer_state_rawtext_end_tag_open(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size) { - if(myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyHTML_TOKENIZER_CHAR_A_Z_a_z) + if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_A_Z_a_z) { myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RAWTEXT_END_TAG_NAME; } @@ -599,7 +619,7 @@ size_t myhtml_tokenizer_state_rawtext_end_tag_name(myhtml_tree_t* tree, myhtml_t while(html_offset < html_size) { - if(myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyHTML_TOKENIZER_CHAR_WHITESPACE) + if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_WHITESPACE) { if(_myhtml_tokenizer_state_andata_end_tag_name(tree, token_node, html, &html_offset, tmp_begin, MyHTML_TOKEN_TYPE_RAWTEXT)) { myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME; @@ -641,7 +661,7 @@ size_t myhtml_tokenizer_state_rawtext_end_tag_name(myhtml_tree_t* tree, myhtml_t return html_offset; } - else if (myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyHTML_TOKENIZER_CHAR_A_Z_a_z) { + else if (mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyCORE_STRING_MAP_CHAR_A_Z_a_z) { token_node->raw_begin = tmp_begin; myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_RAWTEXT; @@ -751,7 +771,7 @@ size_t myhtml_tokenizer_state_data(myhtml_tree_t* tree, myhtml_token_node_t* tok token_node->type |= MyHTML_TOKEN_TYPE_NULL; } else if(token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE && - myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyHTML_TOKENIZER_CHAR_WHITESPACE) { + mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] != MyCORE_STRING_MAP_CHAR_WHITESPACE) { token_node->type ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE); token_node->type |= MyHTML_TOKEN_TYPE_DATA; } @@ -768,7 +788,7 @@ size_t myhtml_tokenizer_state_data(myhtml_tree_t* tree, myhtml_token_node_t* tok ///////////////////////////////////////////////////////// size_t myhtml_tokenizer_state_tag_open(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size) { - if(myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyHTML_TOKENIZER_CHAR_A_Z_a_z) + if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_A_Z_a_z) { token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((tree->global_offset + html_offset) - 1), MyHTML_TOKEN_TYPE_DATA); if(token_node == NULL) { @@ -830,7 +850,7 @@ size_t myhtml_tokenizer_state_tag_open(myhtml_tree_t* tree, myhtml_token_node_t* ///////////////////////////////////////////////////////// size_t myhtml_tokenizer_state_end_tag_open(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size) { - if(myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyHTML_TOKENIZER_CHAR_A_Z_a_z) + if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_A_Z_a_z) { token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((tree->global_offset + html_offset) - 2), MyHTML_TOKEN_TYPE_DATA); if(token_node == NULL) { @@ -902,7 +922,7 @@ size_t myhtml_tokenizer_state_markup_declaration_open(myhtml_tree_t* tree, myhtm tagname = myhtml_tree_incomming_buffer_make_data(tree, token_node->raw_begin, 7); - if(myhtml_strncasecmp(tagname, "DOCTYPE", 7) == 0) + if(mycore_strncasecmp(tagname, "DOCTYPE", 7) == 0) { myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DOCTYPE; @@ -916,9 +936,9 @@ size_t myhtml_tokenizer_state_markup_declaration_open(myhtml_tree_t* tree, myhtm // CDATA sections can only be used in foreign content (MathML or SVG) if(strncmp(tagname, "[CDATA[", 7) == 0) { - if(tree->current_qnode->prev && tree->current_qnode->prev->token) + if(tree->current_qnode->prev && tree->current_qnode->prev->args) { - myhtml_tree_wait_for_last_done_token(tree, tree->current_qnode->prev->token); + myhtml_tree_wait_for_last_done_token(tree, tree->current_qnode->prev->args); myhtml_tree_node_t *adjusted_current_node = myhtml_tree_adjusted_current_node(tree); if(adjusted_current_node && @@ -952,7 +972,7 @@ size_t myhtml_tokenizer_state_tag_name(myhtml_tree_t* tree, myhtml_token_node_t* { while(html_offset < html_size) { - if(myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyHTML_TOKENIZER_CHAR_WHITESPACE) + if(mycore_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyCORE_STRING_MAP_CHAR_WHITESPACE) { token_node->raw_length = (tree->global_offset + html_offset) - token_node->raw_begin; myhtml_check_tag_parser(tree, token_node, html, html_offset); @@ -1655,9 +1675,9 @@ size_t myhtml_tokenizer_state_parse_error_stop(myhtml_tree_t* tree, myhtml_token return html_size; } -myhtml_status_t myhtml_tokenizer_state_init(myhtml_t* myhtml) +mystatus_t myhtml_tokenizer_state_init(myhtml_t* myhtml) { - myhtml->parse_state_func = (myhtml_tokenizer_state_f*)myhtml_malloc(sizeof(myhtml_tokenizer_state_f) * + myhtml->parse_state_func = (myhtml_tokenizer_state_f*)mycore_malloc(sizeof(myhtml_tokenizer_state_f) * ((MyHTML_TOKENIZER_STATE_LAST_ENTRY * MyHTML_TOKENIZER_STATE_LAST_ENTRY) + 1)); @@ -1896,7 +1916,7 @@ myhtml_status_t myhtml_tokenizer_state_init(myhtml_t* myhtml) void myhtml_tokenizer_state_destroy(myhtml_t* myhtml) { if(myhtml->parse_state_func) - myhtml_free(myhtml->parse_state_func); + mycore_free(myhtml->parse_state_func); } diff --git a/source/myhtml/tokenizer.h b/source/myhtml/tokenizer.h index be1e15f..84cc823 100644 --- a/source/myhtml/tokenizer.h +++ b/source/myhtml/tokenizer.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -27,23 +27,23 @@ extern "C" { #endif #include "myhtml/myosi.h" -#include "myhtml/utils.h" +#include "mycore/utils.h" +#include "mycore/mythread.h" #include "myhtml/myhtml.h" #include "myhtml/tag.h" -#include "myhtml/thread.h" #include "myhtml/tokenizer_doctype.h" #include "myhtml/tokenizer_script.h" #include "myhtml/tokenizer_end.h" - + #define myhtml_tokenizer_inc_html_offset(offset, size) \ offset++; \ if(offset >= size) \ return offset -myhtml_status_t myhtml_tokenizer_begin(myhtml_tree_t* tree, const char* html, size_t html_length); -myhtml_status_t myhtml_tokenizer_chunk(myhtml_tree_t* tree, const char* html, size_t html_length); -myhtml_status_t myhtml_tokenizer_chunk_with_stream_buffer(myhtml_tree_t* tree, const char* html, size_t html_length); -myhtml_status_t myhtml_tokenizer_end(myhtml_tree_t* tree); +mystatus_t myhtml_tokenizer_begin(myhtml_tree_t* tree, const char* html, size_t html_length); +mystatus_t myhtml_tokenizer_chunk(myhtml_tree_t* tree, const char* html, size_t html_length); +mystatus_t myhtml_tokenizer_chunk_with_stream_buffer(myhtml_tree_t* tree, const char* html, size_t html_length); +mystatus_t myhtml_tokenizer_end(myhtml_tree_t* tree); void myhtml_tokenizer_set_state(myhtml_tree_t* tree, myhtml_token_node_t* token_node); void myhtml_tokenizer_calc_current_namespace(myhtml_tree_t* tree, myhtml_token_node_t* token_node); @@ -54,7 +54,7 @@ void myhtml_tokenizer_wait(myhtml_tree_t* tree); void myhtml_tokenizer_post(myhtml_tree_t* tree); void myhtml_tokenizer_pause(myhtml_tree_t* tree); -myhtml_status_t myhtml_tokenizer_state_init(myhtml_t* myhtml); +mystatus_t myhtml_tokenizer_state_init(myhtml_t* myhtml); void myhtml_tokenizer_state_destroy(myhtml_t* myhtml); myhtml_token_node_t * myhtml_tokenizer_queue_create_text_node_if_need(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t absolute_html_offset, enum myhtml_token_type type); diff --git a/source/myhtml/tokenizer_doctype.c b/source/myhtml/tokenizer_doctype.c index d63036f..cb649a1 100644 --- a/source/myhtml/tokenizer_doctype.c +++ b/source/myhtml/tokenizer_doctype.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -169,7 +169,7 @@ size_t myhtml_tokenizer_state_custom_after_doctype_name_a_z(myhtml_tree_t* tree, const char *param = myhtml_tree_incomming_buffer_make_data(tree, token_node->str.length, 6); - if(myhtml_strncasecmp(param, "PUBLIC", 6) == 0) { + if(mycore_strncasecmp(param, "PUBLIC", 6) == 0) { myhtml_parser_queue_set_attr(tree, token_node); tree->attr_current->raw_value_begin = token_node->str.length; @@ -185,7 +185,7 @@ size_t myhtml_tokenizer_state_custom_after_doctype_name_a_z(myhtml_tree_t* tree, html_offset = (token_node->str.length + 6) - tree->incoming_buf->offset; } - else if(myhtml_strncasecmp(param, "SYSTEM", 6) == 0) { + else if(mycore_strncasecmp(param, "SYSTEM", 6) == 0) { myhtml_parser_queue_set_attr(tree, token_node); tree->attr_current->raw_value_begin = token_node->str.length; diff --git a/source/myhtml/tokenizer_doctype.h b/source/myhtml/tokenizer_doctype.h index a773d10..aedec57 100644 --- a/source/myhtml/tokenizer_doctype.h +++ b/source/myhtml/tokenizer_doctype.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -27,7 +27,7 @@ extern "C" { #endif #include "myhtml/myosi.h" -#include "myhtml/utils.h" +#include "mycore/utils.h" #include "myhtml/myhtml.h" size_t myhtml_tokenizer_state_doctype(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size); diff --git a/source/myhtml/tokenizer_end.c b/source/myhtml/tokenizer_end.c index cca56fc..a80975f 100644 --- a/source/myhtml/tokenizer_end.c +++ b/source/myhtml/tokenizer_end.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/tokenizer_end.h b/source/myhtml/tokenizer_end.h index 3d78807..8b4d953 100644 --- a/source/myhtml/tokenizer_end.h +++ b/source/myhtml/tokenizer_end.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/source/myhtml/tokenizer_script.c b/source/myhtml/tokenizer_script.c index f5f473b..2c69cf7 100644 --- a/source/myhtml/tokenizer_script.c +++ b/source/myhtml/tokenizer_script.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -113,7 +113,7 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht size_t tmp_size = token_node->str.length; const char *tem_name = myhtml_tree_incomming_buffer_make_data(tree, tmp_size, 6); - if(myhtml_strncasecmp(tem_name, "script", 6) == 0) + if(mycore_strncasecmp(tem_name, "script", 6) == 0) { token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT); if(token_node == NULL) { @@ -146,7 +146,7 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht size_t tmp_size = token_node->str.length; const char *tem_name = myhtml_tree_incomming_buffer_make_data(tree, tmp_size, 6); - if(myhtml_strncasecmp(tem_name, "script", 6) == 0) + if(mycore_strncasecmp(tem_name, "script", 6) == 0) { token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT); if(token_node == NULL) { @@ -179,7 +179,7 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht size_t tmp_size = token_node->str.length; const char *tem_name = myhtml_tree_incomming_buffer_make_data(tree, tmp_size, 6); - if(myhtml_strncasecmp(tem_name, "script", 6) == 0) + if(mycore_strncasecmp(tem_name, "script", 6) == 0) { token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT); if(token_node == NULL) { @@ -289,7 +289,7 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr size_t tmp_size = token_node->str.length; const char *tem_name = myhtml_tree_incomming_buffer_make_data(tree, tmp_size, 6); - if(myhtml_strncasecmp(tem_name, "script", 6) == 0) + if(mycore_strncasecmp(tem_name, "script", 6) == 0) { token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT); if(token_node == NULL) { @@ -322,7 +322,7 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr size_t tmp_size = token_node->str.length; const char *tem_name = myhtml_tree_incomming_buffer_make_data(tree, tmp_size, 6); - if(myhtml_strncasecmp(tem_name, "script", 6) == 0) + if(mycore_strncasecmp(tem_name, "script", 6) == 0) { token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT); if(token_node == NULL) { @@ -355,7 +355,7 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr size_t tmp_size = token_node->str.length; const char *tem_name = myhtml_tree_incomming_buffer_make_data(tree, tmp_size, 6); - if(myhtml_strncasecmp(tem_name, "script", 6) == 0) + if(mycore_strncasecmp(tem_name, "script", 6) == 0) { token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT); if(token_node == NULL) { @@ -455,7 +455,7 @@ size_t myhtml_tokenizer_state_script_data_double_escape_start(myhtml_tree_t* tre size_t tmp_size = token_node->str.length; const char *tem_name = myhtml_tree_incomming_buffer_make_data(tree, tmp_size, 6); - if(myhtml_strncasecmp(tem_name, "script", 6) == 0) { + if(mycore_strncasecmp(tem_name, "script", 6) == 0) { myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED; } else { @@ -573,7 +573,7 @@ size_t myhtml_tokenizer_state_script_data_double_escape_end(myhtml_tree_t* tree, size_t tmp_size = token_node->str.length; const char *tem_name = myhtml_tree_incomming_buffer_make_data(tree, tmp_size, 6); - if(myhtml_strncasecmp(tem_name, "script", 6) == 0) { + if(mycore_strncasecmp(tem_name, "script", 6) == 0) { myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED; } else { diff --git a/source/myhtml/tokenizer_script.h b/source/myhtml/tokenizer_script.h index 65cb934..6bbf86b 100644 --- a/source/myhtml/tokenizer_script.h +++ b/source/myhtml/tokenizer_script.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -27,7 +27,7 @@ extern "C" { #endif #include "myhtml/myosi.h" -#include "myhtml/utils.h" +#include "mycore/utils.h" #include "myhtml/myhtml.h" #include "myhtml/tokenizer.h" diff --git a/source/myhtml/tree.c b/source/myhtml/tree.c index 618801e..533c307 100644 --- a/source/myhtml/tree.c +++ b/source/myhtml/tree.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -22,12 +22,12 @@ myhtml_tree_t * myhtml_tree_create(void) { - return (myhtml_tree_t*)myhtml_calloc(1, sizeof(myhtml_tree_t)); + return (myhtml_tree_t*)mycore_calloc(1, sizeof(myhtml_tree_t)); } -myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml) +mystatus_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml) { - myhtml_status_t status = MyHTML_STATUS_OK; + mystatus_t status = MyHTML_STATUS_OK; tree->myhtml = myhtml; tree->token = myhtml_token_create(tree, 512); @@ -38,7 +38,6 @@ myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml) tree->temp_tag_name.data = NULL; tree->stream_buffer = NULL; tree->parse_flags = MyHTML_TREE_PARSE_FLAGS_CLEAN; - tree->queue = mythread_queue_create(9182, &status); tree->context = NULL; tree->callback_before_token = NULL; @@ -54,12 +53,21 @@ myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml) if(status) return status; - /* init Incoming Buffer objects */ + /* Thread Queue */ + tree->queue = mythread_queue_create(); + if(tree->queue == NULL) + return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; + + status = mythread_queue_init(tree->queue, 9182); + if(status) + return status; + + /* Init Incoming Buffer objects */ tree->mcobject_incoming_buf = mcobject_create(); if(tree->mcobject_incoming_buf == NULL) return MyHTML_STATUS_TREE_ERROR_INCOMING_BUFFER_CREATE; - status = mcobject_init(tree->mcobject_incoming_buf, 256, sizeof(myhtml_incoming_buffer_t)); + status = mcobject_init(tree->mcobject_incoming_buf, 256, sizeof(mycore_incoming_buffer_t)); if(status) return status; @@ -72,13 +80,19 @@ myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml) if(mcstatus) return MyHTML_STATUS_TREE_ERROR_MCOBJECT_INIT; - tree->mchar = mchar_async_create(128, (4096 * 5)); + tree->mchar = mchar_async_create(); tree->active_formatting = myhtml_tree_active_formatting_init(tree); tree->open_elements = myhtml_tree_open_elements_init(tree); tree->other_elements = myhtml_tree_list_init(); tree->token_list = myhtml_tree_token_list_init(); tree->template_insertion = myhtml_tree_template_insertion_init(tree); + if(tree->mchar == NULL) + return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; + + if((status = mchar_async_init(tree->mchar, 128, (4096 * 5)))) + return status; + tree->mcasync_tree_id = mcobject_async_node_add(tree->tree_obj, &mcstatus); if(mcstatus) return MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE_NODE; @@ -91,36 +105,40 @@ myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml) if(mcstatus) return MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE_NODE; - tree->mchar_node_id = mchar_async_node_add(tree->mchar); - -#ifndef MyHTML_BUILD_WITHOUT_THREADS - - tree->async_args = (myhtml_async_args_t*)myhtml_calloc(myhtml->thread->pth_list_length, sizeof(myhtml_async_args_t)); - +#ifndef MyCORE_BUILD_WITHOUT_THREADS + tree->async_args = (myhtml_async_args_t*)mycore_calloc(myhtml->thread_total, sizeof(myhtml_async_args_t)); if(tree->async_args == NULL) - return MyHTML_STATUS_TREE_ERROR_MEMORY_ALLOCATION; - - // for single mode in main thread - tree->async_args[0].mchar_node_id = tree->mchar_node_id; + return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; // for batch thread - for(size_t i = 0; i < myhtml->thread->batch_count; i++) { - tree->async_args[(myhtml->thread->batch_first_id + i)].mchar_node_id = mchar_async_node_add(tree->mchar); + for(size_t i = 0; i < myhtml->thread_total; i++) { + tree->async_args[i].mchar_node_id = mchar_async_node_add(tree->mchar, &status); + + if(status) + return status; } - -#else /* MyHTML_BUILD_WITHOUT_THREADS */ - - tree->async_args = (myhtml_async_args_t*)myhtml_calloc(1, sizeof(myhtml_async_args_t)); +#else /* MyCORE_BUILD_WITHOUT_THREADS */ + tree->async_args = (myhtml_async_args_t*)mycore_calloc(1, sizeof(myhtml_async_args_t)); if(tree->async_args == NULL) return MyHTML_STATUS_TREE_ERROR_MEMORY_ALLOCATION; - tree->async_args->mchar_node_id = tree->mchar_node_id; + tree->async_args->mchar_node_id = mchar_async_node_add(tree->mchar, &status); -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ + if(status) + return status; + +#endif /* MyCORE_BUILD_WITHOUT_THREADS */ + + /* for main thread only after parsing */ + tree->mchar_node_id = tree->async_args->mchar_node_id; tree->sync = mcsync_create(); - mcsync_init(tree->sync); + if(tree->sync == NULL) + return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; + + if(mcsync_init(tree->sync)) + return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; /* init Tags after create and init mchar */ tree->tags = myhtml_tag_create(); @@ -133,22 +151,23 @@ myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml) void myhtml_tree_clean(myhtml_tree_t* tree) { -#ifndef MyHTML_BUILD_WITHOUT_THREADS +#ifndef MyCORE_BUILD_WITHOUT_THREADS myhtml_t* myhtml = tree->myhtml; - for(size_t i = 0; i < myhtml->thread->batch_count; i++) { - mchar_async_node_clean(tree->mchar, tree->async_args[(myhtml->thread->batch_first_id + i)].mchar_node_id); + for(size_t i = 0; i < myhtml->thread_total; i++) { + mchar_async_node_clean(tree->mchar, tree->async_args[i].mchar_node_id); } -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ +#else + mchar_async_node_clean(tree->mchar, tree->mchar_node_id); +#endif mcobject_async_node_clean(tree->tree_obj, tree->mcasync_tree_id); mcobject_async_node_clean(tree->token->nodes_obj, tree->mcasync_rules_token_id); mcobject_async_node_clean(tree->token->attr_obj, tree->mcasync_rules_attr_id); - mchar_async_node_clean(tree->mchar, tree->mchar_node_id); -#ifndef MyHTML_BUILD_WITHOUT_THREADS - mythread_queue_list_entry_clean(tree->myhtml->thread, tree->queue_entry); -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ +#ifndef MyCORE_BUILD_WITHOUT_THREADS + mythread_queue_list_entry_clean(tree->queue_entry); +#endif /* MyCORE_BUILD_WITHOUT_THREADS */ myhtml_token_clean(tree->token); @@ -184,8 +203,8 @@ void myhtml_tree_clean(myhtml_tree_t* tree) tree->token_last_done = NULL; tree->tokenizer_status = MyHTML_STATUS_OK; - tree->encoding = MyHTML_ENCODING_UTF_8; - tree->encoding_usereq = MyHTML_ENCODING_DEFAULT; + tree->encoding = MyENCODING_UTF_8; + tree->encoding_usereq = MyENCODING_DEFAULT; myhtml_stream_buffer_clean(tree->stream_buffer); @@ -239,8 +258,8 @@ void myhtml_tree_clean_all(myhtml_tree_t* tree) tree->token_last_done = NULL; tree->tokenizer_status = MyHTML_STATUS_OK; - tree->encoding = MyHTML_ENCODING_UTF_8; - tree->encoding_usereq = MyHTML_ENCODING_DEFAULT; + tree->encoding = MyENCODING_UTF_8; + tree->encoding_usereq = MyENCODING_DEFAULT; myhtml_stream_buffer_clean(tree->stream_buffer); @@ -252,9 +271,9 @@ void myhtml_tree_clean_all(myhtml_tree_t* tree) mcobject_clean(tree->mcobject_incoming_buf); myhtml_tag_clean(tree->tags); -#ifndef MyHTML_BUILD_WITHOUT_THREADS - mythread_queue_list_entry_clean(tree->myhtml->thread, tree->queue_entry); -#endif /* MyHTML_BUILD_WITHOUT_THREADS */ +#ifndef MyCORE_BUILD_WITHOUT_THREADS + mythread_queue_list_entry_clean(tree->queue_entry); +#endif tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id); } @@ -281,8 +300,8 @@ myhtml_tree_t * myhtml_tree_destroy(myhtml_tree_t* tree) myhtml_tree_temp_tag_name_destroy(&tree->temp_tag_name, false); - myhtml_free(tree->async_args); - myhtml_free(tree); + mycore_free(tree->async_args); + mycore_free(tree); return NULL; } @@ -493,7 +512,7 @@ myhtml_tree_node_t * myhtml_tree_node_clone(myhtml_tree_node_t* node) { myhtml_tree_node_t* new_node = myhtml_tree_node_create(node->tree); - myhtml_token_node_wait_for_done(node->token); + myhtml_token_node_wait_for_done(node->tree->token, node->token); new_node->token = myhtml_token_node_clone(node->tree->token, node->token, node->tree->mcasync_rules_token_id, @@ -735,11 +754,11 @@ bool myhtml_tree_element_in_scope_by_node(myhtml_tree_node_t* node, enum myhtml_ // list myhtml_tree_list_t * myhtml_tree_list_init(void) { - myhtml_tree_list_t* list = myhtml_malloc(sizeof(myhtml_tree_list_t)); + myhtml_tree_list_t* list = mycore_malloc(sizeof(myhtml_tree_list_t)); list->length = 0; list->size = 4096; - list->list = (myhtml_tree_node_t**)myhtml_malloc(sizeof(myhtml_tree_node_t*) * list->size); + list->list = (myhtml_tree_node_t**)mycore_malloc(sizeof(myhtml_tree_node_t*) * list->size); return list; } @@ -755,10 +774,10 @@ myhtml_tree_list_t * myhtml_tree_list_destroy(myhtml_tree_list_t* list, bool des return NULL; if(list->list) - myhtml_free(list->list); + mycore_free(list->list); if(destroy_self && list) { - myhtml_free(list); + mycore_free(list); return NULL; } @@ -770,7 +789,7 @@ void myhtml_tree_list_append(myhtml_tree_list_t* list, myhtml_tree_node_t* node) if(list->length >= list->size) { list->size <<= 1; - myhtml_tree_node_t** tmp = (myhtml_tree_node_t**)myhtml_realloc(list->list, sizeof(myhtml_tree_node_t*) * list->size); + myhtml_tree_node_t** tmp = (myhtml_tree_node_t**)mycore_realloc(list->list, sizeof(myhtml_tree_node_t*) * list->size); if(tmp) list->list = tmp; @@ -790,7 +809,7 @@ void myhtml_tree_list_insert_by_index(myhtml_tree_list_t* list, myhtml_tree_node if(list->length >= list->size) { list->size <<= 1; - myhtml_tree_node_t** tmp = (myhtml_tree_node_t**)myhtml_realloc(list->list, sizeof(myhtml_tree_node_t*) * list->size); + myhtml_tree_node_t** tmp = (myhtml_tree_node_t**)mycore_realloc(list->list, sizeof(myhtml_tree_node_t*) * list->size); if(tmp) list->list = tmp; @@ -831,7 +850,7 @@ myhtml_tree_list_t * myhtml_tree_open_elements_destroy(myhtml_tree_t* tree) myhtml_tree_node_t * myhtml_tree_current_node(myhtml_tree_t* tree) { if(tree->open_elements->length == 0) { - MyHTML_DEBUG("Current node; Open elements is 0"); + MyCORE_DEBUG("Current node; Open elements is 0"); return 0; } @@ -861,9 +880,9 @@ void myhtml_tree_open_elements_pop(myhtml_tree_t* tree) if(tree->open_elements->length) tree->open_elements->length--; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(tree->open_elements->length == 0) { - MyHTML_DEBUG("Pop open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); + MyCORE_DEBUG("Pop open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); } #endif } @@ -886,9 +905,9 @@ void myhtml_tree_open_elements_remove(myhtml_tree_t* tree, myhtml_tree_node_t* n } } -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(tree->open_elements->length == 0) { - MyHTML_DEBUG("Remove open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); + MyCORE_DEBUG("Remove open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); } #endif } @@ -913,9 +932,9 @@ void myhtml_tree_open_elements_pop_until(myhtml_tree_t* tree, myhtml_tag_id_t ta } } -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(tree->open_elements->length == 0) { - MyHTML_DEBUG("Until open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); + MyCORE_DEBUG("Until open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); } #endif } @@ -936,9 +955,9 @@ void myhtml_tree_open_elements_pop_until_by_node(myhtml_tree_t* tree, myhtml_tre } } -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(tree->open_elements->length == 0) { - MyHTML_DEBUG("Until by node open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); + MyCORE_DEBUG("Until by node open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); } #endif } @@ -957,9 +976,9 @@ void myhtml_tree_open_elements_pop_until_by_index(myhtml_tree_t* tree, size_t id } } -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(tree->open_elements->length == 0) { - MyHTML_DEBUG("Until by index open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); + MyCORE_DEBUG("Until by index open elements; Now, Open Elements set 0; Good, if the end of parsing, otherwise is very bad"); } #endif } @@ -1045,7 +1064,7 @@ myhtml_tree_node_t * myhtml_tree_open_elements_find_by_tag_idx(myhtml_tree_t* tr void myhtml_tree_generate_implied_end_tags(myhtml_tree_t* tree, myhtml_tag_id_t exclude_tag_idx, myhtml_namespace_t mynamespace) { if(tree->open_elements->length == 0) { - MyHTML_DEBUG("Generate implied end tags; Open elements is 0"); + MyCORE_DEBUG("Generate implied end tags; Open elements is 0"); return; } @@ -1053,9 +1072,9 @@ void myhtml_tree_generate_implied_end_tags(myhtml_tree_t* tree, myhtml_tag_id_t { myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree); -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(current_node == NULL) { - MyHTML_DEBUG_ERROR("Generate implied end tags; Current node is NULL! This is very bad"); + MyCORE_DEBUG_ERROR("Generate implied end tags; Current node is NULL! This is very bad"); } #endif @@ -1087,7 +1106,7 @@ void myhtml_tree_generate_implied_end_tags(myhtml_tree_t* tree, myhtml_tag_id_t void myhtml_tree_generate_all_implied_end_tags(myhtml_tree_t* tree, myhtml_tag_id_t exclude_tag_idx, myhtml_namespace_t mynamespace) { if(tree->open_elements->length == 0) { - MyHTML_DEBUG("Generate all implied end tags; Open elements is 0"); + MyCORE_DEBUG("Generate all implied end tags; Open elements is 0"); return; } @@ -1095,9 +1114,9 @@ void myhtml_tree_generate_all_implied_end_tags(myhtml_tree_t* tree, myhtml_tag_i { myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree); -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(current_node == NULL) { - MyHTML_DEBUG_ERROR("Generate all implied end tags; Current node is NULL! This is very bad"); + MyCORE_DEBUG_ERROR("Generate all implied end tags; Current node is NULL! This is very bad"); } #endif @@ -1136,7 +1155,7 @@ void myhtml_tree_generate_all_implied_end_tags(myhtml_tree_t* tree, myhtml_tag_i void myhtml_tree_reset_insertion_mode_appropriately(myhtml_tree_t* tree) { if(tree->open_elements->length == 0) { - MyHTML_DEBUG("Reset insertion mode appropriately; Open elements is 0"); + MyCORE_DEBUG("Reset insertion mode appropriately; Open elements is 0"); return; } @@ -1154,9 +1173,9 @@ void myhtml_tree_reset_insertion_mode_appropriately(myhtml_tree_t* tree) // step 2 myhtml_tree_node_t* node = list[i]; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(node == NULL) { - MyHTML_DEBUG_ERROR("Reset insertion mode appropriately; node is NULL! This is very bad"); + MyCORE_DEBUG_ERROR("Reset insertion mode appropriately; node is NULL! This is very bad"); } #endif @@ -1197,9 +1216,9 @@ void myhtml_tree_reset_insertion_mode_appropriately(myhtml_tree_t* tree) return; } -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(ancestor == 0) { - MyHTML_DEBUG_ERROR("Reset insertion mode appropriately; Ancestor is 0! This is very, very bad"); + MyCORE_DEBUG_ERROR("Reset insertion mode appropriately; Ancestor is 0! This is very, very bad"); } #endif @@ -1315,9 +1334,9 @@ myhtml_tree_list_t * myhtml_tree_active_formatting_destroy(myhtml_tree_t* tree) bool myhtml_tree_active_formatting_is_marker(myhtml_tree_t* tree, myhtml_tree_node_t* node) { -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(node == NULL) { - MyHTML_DEBUG_ERROR("Active formatting is marker; node is NULL!"); + MyCORE_DEBUG_ERROR("Active formatting is marker; node is NULL!"); } #endif @@ -1351,9 +1370,9 @@ void myhtml_tree_active_formatting_pop(myhtml_tree_t* tree) if(tree->active_formatting->length) tree->active_formatting->length--; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(tree->active_formatting->length == 0) { - MyHTML_DEBUG("Pop active formatting; length is 0"); + MyCORE_DEBUG("Pop active formatting; length is 0"); } #endif } @@ -1376,9 +1395,9 @@ void myhtml_tree_active_formatting_remove(myhtml_tree_t* tree, myhtml_tree_node_ } } -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(tree->active_formatting->length == 0) { - // MyHTML_DEBUG("Remove active formatting; length is 0"); + // MyCORE_DEBUG("Remove active formatting; length is 0"); } #endif } @@ -1390,9 +1409,9 @@ void myhtml_tree_active_formatting_remove_by_index(myhtml_tree_t* tree, size_t i memmove(&list[idx], &list[idx + 1], sizeof(myhtml_tree_node_t*) * (tree->active_formatting->length - idx)); tree->active_formatting->length--; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(tree->active_formatting->length == 0) { - MyHTML_DEBUG("Remove active formatting by index; length is 0"); + MyCORE_DEBUG("Remove active formatting by index; length is 0"); } #endif } @@ -1413,9 +1432,9 @@ void myhtml_tree_active_formatting_append_with_check(myhtml_tree_t* tree, myhtml { i--; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(list[i] == NULL) { - MyHTML_DEBUG("Appen active formatting with check; list[%zu] is NULL", i); + MyCORE_DEBUG("Appen active formatting with check; list[" MyCORE_FORMAT_Z "] is NULL", i); } #endif @@ -1424,8 +1443,8 @@ void myhtml_tree_active_formatting_append_with_check(myhtml_tree_t* tree, myhtml if(list[i]->token && node->token) { - myhtml_token_node_wait_for_done(list[i]->token); - myhtml_token_node_wait_for_done(node->token); + myhtml_token_node_wait_for_done(tree->token, list[i]->token); + myhtml_token_node_wait_for_done(tree->token, node->token); if(list[i]->ns == node->ns && list[i]->tag_id == node->tag_id && @@ -1446,7 +1465,7 @@ void myhtml_tree_active_formatting_append_with_check(myhtml_tree_t* tree, myhtml myhtml_tree_node_t * myhtml_tree_active_formatting_current_node(myhtml_tree_t* tree) { if(tree->active_formatting->length == 0) { - MyHTML_DEBUG("Current node active formatting; length is 0"); + MyCORE_DEBUG("Current node active formatting; length is 0"); return 0; } @@ -1482,9 +1501,9 @@ void myhtml_tree_active_formatting_up_to_last_marker(myhtml_tree_t* tree) if(tree->active_formatting->length == 0) return; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(list[ tree->active_formatting->length ] == NULL) { - MyHTML_DEBUG("Up to last marker active formatting; list[%zu] is NULL", tree->active_formatting->length); + MyCORE_DEBUG("Up to last marker active formatting; list[" MyCORE_FORMAT_Z "] is NULL", tree->active_formatting->length); } #endif @@ -1492,9 +1511,9 @@ void myhtml_tree_active_formatting_up_to_last_marker(myhtml_tree_t* tree) { tree->active_formatting->length--; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(list[ tree->active_formatting->length ] == NULL) { - MyHTML_DEBUG("Up to last marker active formatting; list[%zu] is NULL", tree->active_formatting->length); + MyCORE_DEBUG("Up to last marker active formatting; list[" MyCORE_FORMAT_Z "] is NULL", tree->active_formatting->length); } #endif @@ -1515,9 +1534,9 @@ myhtml_tree_node_t * myhtml_tree_active_formatting_between_last_marker(myhtml_tr { i--; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(list[i] == NULL) { - MyHTML_DEBUG("Between last marker active formatting; list[%zu] is NULL", i); + MyCORE_DEBUG("Between last marker active formatting; list[" MyCORE_FORMAT_Z "] is NULL", i); } #endif @@ -1554,9 +1573,9 @@ void myhtml_tree_active_formatting_reconstruction(myhtml_tree_t* tree) { af_idx--; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(list[af_idx] == NULL) { - MyHTML_DEBUG("Formatting reconstruction; Step 4--6; list[%zu] is NULL", af_idx); + MyCORE_DEBUG("Formatting reconstruction; Step 4--6; list[" MyCORE_FORMAT_Z "] is NULL", af_idx); } #endif @@ -1570,9 +1589,9 @@ void myhtml_tree_active_formatting_reconstruction(myhtml_tree_t* tree) while (af_idx < af->length) { -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(list[af_idx] == NULL) { - MyHTML_DEBUG("Formatting reconstruction; Next steps; list[%zu] is NULL", af_idx); + MyCORE_DEBUG("Formatting reconstruction; Next steps; list[" MyCORE_FORMAT_Z "] is NULL", af_idx); } #endif @@ -1596,9 +1615,9 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_token_nod myhtml_tree_node_t** afe_list = tree->active_formatting->list; myhtml_tree_node_t* current_node = oel_list[oel_curr_index]; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(current_node == NULL) { - MyHTML_DEBUG_ERROR("Adoption agency algorithm; Current node is NULL"); + MyCORE_DEBUG_ERROR("Adoption agency algorithm; Current node is NULL"); } #endif @@ -1707,18 +1726,18 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_token_nod /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:AAA_BEGIN LEVEL:INFO */ -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(oel_format_el_idx == 0) { - MyHTML_DEBUG_ERROR("Adoption agency algorithm; Step 11; oel_format_el_idx is 0; Bad!"); + MyCORE_DEBUG_ERROR("Adoption agency algorithm; Step 11; oel_format_el_idx is 0; Bad!"); } #endif // step 12 myhtml_tree_node_t* common_ancestor = oel_list[oel_format_el_idx - 1]; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(common_ancestor == NULL) { - MyHTML_DEBUG_ERROR("Adoption agency algorithm; Step 11; common_ancestor is NULL"); + MyCORE_DEBUG_ERROR("Adoption agency algorithm; Step 11; common_ancestor is NULL"); } #endif @@ -1744,7 +1763,7 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_token_nod if(node_index > 0) node_index--; else { - fprintf(stderr, "ERROR: adoption agency algorithm; decrement node_index, node_index is null"); + MyCORE_DEBUG_ERROR("Adoption agency algorithm; decrement node_index, node_index is null"); return false; } @@ -1752,9 +1771,9 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_token_nod node = oel_list[node_index]; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(node == NULL) { - MyHTML_DEBUG_ERROR("Adoption agency algorithm; Step 13.3; node is NULL"); + MyCORE_DEBUG_ERROR("Adoption agency algorithm; Step 13.3; node is NULL"); } #endif // step 14.4 @@ -1795,9 +1814,9 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_token_nod if(last == furthest_block) { bookmark = afe_node_index + 1; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(bookmark >= tree->active_formatting->length) { - MyHTML_DEBUG_ERROR("Adoption agency algorithm; Step 13.8; bookmark >= open_elements length"); + MyCORE_DEBUG_ERROR("Adoption agency algorithm; Step 13.8; bookmark >= open_elements length"); } #endif } @@ -1846,9 +1865,9 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_token_nod if(afe_index < bookmark) bookmark--; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(bookmark >= tree->active_formatting->length) { - MyHTML_DEBUG_ERROR("Adoption agency algorithm; Before Step 18; bookmark (%zu) >= open_elements length", bookmark); + MyCORE_DEBUG_ERROR("Adoption agency algorithm; Before Step 18; bookmark (" MyCORE_FORMAT_Z ") >= open_elements length", bookmark); } #endif @@ -1862,7 +1881,7 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_token_nod myhtml_tree_list_insert_by_index(tree->open_elements, new_formatting_element, idx_furthest_block + 1); } else { - MyHTML_DEBUG_ERROR("Adoption agency algorithm; Step 19; can't find furthest_block in open elements"); + MyCORE_DEBUG_ERROR("Adoption agency algorithm; Step 19; can't find furthest_block in open elements"); } } @@ -1881,9 +1900,9 @@ myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting(myhtml_tree_t* tree myhtml_tree_node_t* adjusted_location; if(tree->foster_parenting) { -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(target == NULL) { - MyHTML_DEBUG_ERROR("Appropriate place inserting; Step 2; target is NULL in return value! This IS very bad"); + MyCORE_DEBUG_ERROR("Appropriate place inserting; Step 2; target is NULL in return value! This IS very bad"); } #endif if(target->ns != MyHTML_NAMESPACE_HTML) @@ -1932,9 +1951,9 @@ myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting(myhtml_tree_t* tree break; } -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(idx_table == 0) { - MyHTML_DEBUG_ERROR("Appropriate place inserting; Step 2.5; idx_table is 0"); + MyCORE_DEBUG_ERROR("Appropriate place inserting; Step 2.5; idx_table is 0"); } #endif @@ -1950,9 +1969,9 @@ myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting(myhtml_tree_t* tree } } else { -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(target == NULL) { - MyHTML_DEBUG_ERROR("Appropriate place inserting; Step 3-5; target is NULL in return value! This IS very bad"); + MyCORE_DEBUG_ERROR("Appropriate place inserting; Step 3-5; target is NULL in return value! This IS very bad"); } #endif @@ -1972,9 +1991,9 @@ myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting_in_tree(myhtml_tree myhtml_tree_node_t* adjusted_location; if(target->tree->foster_parenting) { -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(target == NULL) { - MyHTML_DEBUG_ERROR("Appropriate place inserting; Step 2; target is NULL in return value! This IS very bad"); + MyCORE_DEBUG_ERROR("Appropriate place inserting; Step 2; target is NULL in return value! This IS very bad"); } #endif @@ -2028,12 +2047,6 @@ myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting_in_tree(myhtml_tree break; } -#ifdef DEBUG_MODE - if(idx_table == 0) { - MyHTML_DEBUG_ERROR("Appropriate place inserting; Step 2.5; idx_table is 0"); - } -#endif - // step 2.6-7 adjusted_location = target; @@ -2047,9 +2060,9 @@ myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting_in_tree(myhtml_tree } } else { -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(target == NULL) { - MyHTML_DEBUG_ERROR("Appropriate place inserting; Step 3-5; target is NULL in return value! This IS very bad"); + MyCORE_DEBUG_ERROR("Appropriate place inserting; Step 3-5; target is NULL in return value! This IS very bad"); } #endif @@ -2066,11 +2079,11 @@ myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting_in_tree(myhtml_tree // stack of template insertion modes myhtml_tree_insertion_list_t * myhtml_tree_template_insertion_init(myhtml_tree_t* tree) { - myhtml_tree_insertion_list_t* list = myhtml_malloc(sizeof(myhtml_tree_insertion_list_t)); + myhtml_tree_insertion_list_t* list = mycore_malloc(sizeof(myhtml_tree_insertion_list_t)); list->length = 0; list->size = 1024; - list->list = (enum myhtml_insertion_mode*)myhtml_malloc(sizeof(enum myhtml_insertion_mode) * list->size); + list->list = (enum myhtml_insertion_mode*)mycore_malloc(sizeof(enum myhtml_insertion_mode) * list->size); tree->template_insertion = list; @@ -2088,10 +2101,10 @@ myhtml_tree_insertion_list_t * myhtml_tree_template_insertion_destroy(myhtml_tre return NULL; if(tree->template_insertion->list) - myhtml_free(tree->template_insertion->list); + mycore_free(tree->template_insertion->list); if(tree->template_insertion) - myhtml_free(tree->template_insertion); + mycore_free(tree->template_insertion); return NULL; } @@ -2103,7 +2116,7 @@ void myhtml_tree_template_insertion_append(myhtml_tree_t* tree, enum myhtml_inse if(list->length >= list->size) { list->size <<= 1; - enum myhtml_insertion_mode* tmp = (enum myhtml_insertion_mode*)myhtml_realloc(list->list, + enum myhtml_insertion_mode* tmp = (enum myhtml_insertion_mode*)mycore_realloc(list->list, sizeof(enum myhtml_insertion_mode) * list->size); if(tmp) @@ -2119,9 +2132,9 @@ void myhtml_tree_template_insertion_pop(myhtml_tree_t* tree) if(tree->template_insertion->length) tree->template_insertion->length--; -#ifdef DEBUG_MODE +#ifdef MyCORE_BUILD_DEBUG if(tree->template_insertion->length == 0) { - MyHTML_DEBUG("Pop template insertion; length is 0"); + MyCORE_DEBUG("Pop template insertion; length is 0"); } #endif } @@ -2131,122 +2144,14 @@ size_t myhtml_tree_template_insertion_length(myhtml_tree_t* tree) return tree->template_insertion->length; } -void myhtml_tree_print_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out) -{ - if(node == NULL) - return; - - const myhtml_tag_context_t *ctx = myhtml_tag_get_by_id(tree->tags, node->tag_id); - - if(node->tag_id == MyHTML_TAG__TEXT || - node->tag_id == MyHTML_TAG__COMMENT) - { - if(node->token) - fprintf(out, "<%.*s>: %.*s\n", (int)ctx->name_length, ctx->name, - (int)node->token->str.length, node->token->str.data); - else - fprintf(out, "<%.*s>\n", (int)ctx->name_length, ctx->name); - } - else if(node->tag_id == MyHTML_TAG__DOCTYPE) - { - fprintf(out, "doctype.attr_name) { - fprintf(out, " %s", tree->doctype.attr_name); - } - - if(tree->doctype.attr_public) { - fprintf(out, " %s", tree->doctype.attr_public); - } - - if(tree->doctype.attr_system) { - fprintf(out, " %s", tree->doctype.attr_system); - } - - fprintf(out, ">\n"); - } - else - { - if(node->token && node->token->type & MyHTML_TOKEN_TYPE_CLOSE) { - fprintf(out, "name_length, ctx->name); - } - else { - fprintf(out, "<%.*s", (int)ctx->name_length, ctx->name); - } - - if(node->ns != MyHTML_NAMESPACE_HTML) { - switch (node->ns) { - case MyHTML_NAMESPACE_SVG: - fprintf(out, ":svg"); - break; - case MyHTML_NAMESPACE_MATHML: - fprintf(out, ":math"); - break; - case MyHTML_NAMESPACE_XLINK: - fprintf(out, ":xlink"); - break; - case MyHTML_NAMESPACE_XML: - fprintf(out, ":xml"); - break; - case MyHTML_NAMESPACE_XMLNS: - fprintf(out, ":xmlns"); - break; - default: - break; - } - } - - if(node->token) - myhtml_token_print_attr(tree, node->token, out); - - fprintf(out, ">\n"); - } -} - -void _myhtml_tree_print_node_children(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out, size_t inc) -{ - if(node == NULL) - return; - - size_t i; - - while(node) - { - for(i = 0; i < inc; i++) - fprintf(out, "\t"); - - myhtml_tree_print_node(tree, node, out); - _myhtml_tree_print_node_children(tree, node->child, out, (inc + 1)); - - node = node->next; - } -} - -void myhtml_tree_print_node_children(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out, size_t inc) -{ - if(node == NULL) - return; - - _myhtml_tree_print_node_children(tree, node->child, out, inc); -} - -void myhtml_tree_print_by_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out, size_t inc) -{ - if(node == NULL) - return; - - myhtml_tree_print_node(tree, node, out); - myhtml_tree_print_node_children(tree, node, out, (inc + 1)); -} - // token list myhtml_tree_token_list_t * myhtml_tree_token_list_init(void) { - myhtml_tree_token_list_t* list = myhtml_malloc(sizeof(myhtml_tree_token_list_t)); + myhtml_tree_token_list_t* list = mycore_malloc(sizeof(myhtml_tree_token_list_t)); list->length = 0; list->size = 4096; - list->list = (myhtml_token_node_t**)myhtml_malloc(sizeof(myhtml_token_node_t*) * list->size); + list->list = (myhtml_token_node_t**)mycore_malloc(sizeof(myhtml_token_node_t*) * list->size); return list; } @@ -2262,10 +2167,10 @@ myhtml_tree_token_list_t * myhtml_tree_token_list_destroy(myhtml_tree_token_list return NULL; if(list->list) - myhtml_free(list->list); + mycore_free(list->list); if(destroy_self && list) { - myhtml_free(list); + mycore_free(list); return NULL; } @@ -2277,7 +2182,7 @@ void myhtml_tree_token_list_append(myhtml_tree_token_list_t* list, myhtml_token_ if(list->length >= list->size) { list->size <<= 1; - myhtml_token_node_t** tmp = (myhtml_token_node_t**)myhtml_realloc(list->list, sizeof(myhtml_token_node_t*) * list->size); + myhtml_token_node_t** tmp = (myhtml_token_node_t**)mycore_realloc(list->list, sizeof(myhtml_token_node_t*) * list->size); if(tmp) list->list = tmp; @@ -2292,7 +2197,7 @@ void myhtml_tree_token_list_append_after_index(myhtml_tree_token_list_t* list, m if(list->length >= list->size) { list->size <<= 1; - myhtml_token_node_t** tmp = (myhtml_token_node_t**)myhtml_realloc(list->list, sizeof(myhtml_token_node_t*) * list->size); + myhtml_token_node_t** tmp = (myhtml_token_node_t**)mycore_realloc(list->list, sizeof(myhtml_token_node_t*) * list->size); if(tmp) list->list = tmp; @@ -2313,7 +2218,7 @@ void myhtml_tree_token_list_append_after_index(myhtml_tree_token_list_t* list, m myhtml_token_node_t * myhtml_tree_token_list_current_node(myhtml_tree_token_list_t* list) { if(list->length == 0) { - MyHTML_DEBUG("Token list current node; length is 0"); + MyCORE_DEBUG("Token list current node; length is 0"); return NULL; } @@ -2443,7 +2348,7 @@ bool myhtml_tree_is_html_integration_point(myhtml_tree_t* tree, myhtml_tree_node node->tag_id == MyHTML_TAG_ANNOTATION_XML && node->token && (node->token->type & MyHTML_TOKEN_TYPE_CLOSE) == 0) { - myhtml_token_node_wait_for_done(node->token); + myhtml_token_node_wait_for_done(tree->token, node->token); myhtml_token_attr_t* attr = myhtml_token_attr_match_case(tree->token, node->token, "encoding", 8, "text/html", 9); @@ -2460,11 +2365,11 @@ bool myhtml_tree_is_html_integration_point(myhtml_tree_t* tree, myhtml_tree_node } // temp tag name -myhtml_status_t myhtml_tree_temp_tag_name_init(myhtml_tree_temp_tag_name_t* temp_tag_name) +mystatus_t myhtml_tree_temp_tag_name_init(myhtml_tree_temp_tag_name_t* temp_tag_name) { temp_tag_name->size = 1024; temp_tag_name->length = 0; - temp_tag_name->data = (char *)myhtml_malloc(temp_tag_name->size * sizeof(char)); + temp_tag_name->data = (char *)mycore_malloc(temp_tag_name->size * sizeof(char)); if(temp_tag_name->data == NULL) return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; @@ -2483,23 +2388,23 @@ myhtml_tree_temp_tag_name_t * myhtml_tree_temp_tag_name_destroy(myhtml_tree_temp return NULL; if(temp_tag_name->data) { - myhtml_free(temp_tag_name->data); + mycore_free(temp_tag_name->data); temp_tag_name->data = NULL; } if(self_destroy) { - myhtml_free(temp_tag_name); + mycore_free(temp_tag_name); return NULL; } return temp_tag_name; } -myhtml_status_t myhtml_tree_temp_tag_name_append_one(myhtml_tree_temp_tag_name_t* temp_tag_name, const char name) +mystatus_t myhtml_tree_temp_tag_name_append_one(myhtml_tree_temp_tag_name_t* temp_tag_name, const char name) { if(temp_tag_name->length >= temp_tag_name->size) { size_t nsize = temp_tag_name->size << 1; - char *tmp = (char *)myhtml_realloc(temp_tag_name->data, nsize * sizeof(char)); + char *tmp = (char *)mycore_realloc(temp_tag_name->data, nsize * sizeof(char)); if(tmp) { temp_tag_name->size = nsize; @@ -2515,14 +2420,14 @@ myhtml_status_t myhtml_tree_temp_tag_name_append_one(myhtml_tree_temp_tag_name_t return MyHTML_STATUS_OK; } -myhtml_status_t myhtml_tree_temp_tag_name_append(myhtml_tree_temp_tag_name_t* temp_tag_name, const char* name, size_t name_len) +mystatus_t myhtml_tree_temp_tag_name_append(myhtml_tree_temp_tag_name_t* temp_tag_name, const char* name, size_t name_len) { if(temp_tag_name->data == NULL || name_len == 0) return MyHTML_STATUS_OK; if((temp_tag_name->length + name_len) >= temp_tag_name->size) { size_t nsize = (temp_tag_name->size << 1) + name_len; - char *tmp = (char *)myhtml_realloc(temp_tag_name->data, nsize * sizeof(char)); + char *tmp = (char *)mycore_realloc(temp_tag_name->data, nsize * sizeof(char)); if(tmp) { temp_tag_name->size = nsize; @@ -2540,20 +2445,19 @@ myhtml_status_t myhtml_tree_temp_tag_name_append(myhtml_tree_temp_tag_name_t* te void myhtml_tree_wait_for_last_done_token(myhtml_tree_t* tree, myhtml_token_node_t* token_for_wait) { -#ifndef MyHTML_BUILD_WITHOUT_THREADS +#ifndef MyCORE_BUILD_WITHOUT_THREADS - const struct timespec timeout = {0, 0}; - while(tree->token_last_done != token_for_wait) {myhtml_thread_nanosleep(&timeout);} + while(tree->token_last_done != token_for_wait) {mythread_nanosleep_sleep(tree->myhtml->thread_stream->timespec);} #endif } /* special tonek list */ -myhtml_status_t myhtml_tree_special_list_init(myhtml_tree_special_token_list_t* special) +mystatus_t myhtml_tree_special_list_init(myhtml_tree_special_token_list_t* special) { special->size = 1024; special->length = 0; - special->list = (myhtml_tree_special_token_t *)myhtml_malloc(special->size * sizeof(myhtml_tree_special_token_t)); + special->list = (myhtml_tree_special_token_t *)mycore_malloc(special->size * sizeof(myhtml_tree_special_token_t)); if(special->list == NULL) return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; @@ -2572,23 +2476,23 @@ myhtml_tree_special_token_list_t * myhtml_tree_special_list_destroy(myhtml_tree_ return NULL; if(special->list) { - myhtml_free(special->list); + mycore_free(special->list); special->list = NULL; } if(self_destroy) { - myhtml_free(special); + mycore_free(special); return NULL; } return special; } -myhtml_status_t myhtml_tree_special_list_append(myhtml_tree_special_token_list_t* special, myhtml_token_node_t *token, myhtml_namespace_t ns) +mystatus_t myhtml_tree_special_list_append(myhtml_tree_special_token_list_t* special, myhtml_token_node_t *token, myhtml_namespace_t ns) { if(special->length >= special->size) { size_t nsize = special->size << 1; - myhtml_tree_special_token_t *tmp = (myhtml_tree_special_token_t *)myhtml_realloc(special->list, nsize * sizeof(myhtml_tree_special_token_t)); + myhtml_tree_special_token_t *tmp = (myhtml_tree_special_token_t *)mycore_realloc(special->list, nsize * sizeof(myhtml_tree_special_token_t)); if(tmp) { special->size = nsize; @@ -2628,14 +2532,14 @@ myhtml_tree_special_token_t * myhtml_tree_special_list_get_last(myhtml_tree_spec } /* incoming buffer */ -myhtml_incoming_buffer_t * myhtml_tree_incoming_buffer_first(myhtml_tree_t *tree) +mycore_incoming_buffer_t * myhtml_tree_incoming_buffer_first(myhtml_tree_t *tree) { return tree->incoming_buf_first; } const char * myhtml_tree_incomming_buffer_make_data(myhtml_tree_t *tree, size_t begin, size_t length) { - myhtml_incoming_buffer_t *buffer = myhtml_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); + mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); size_t relative_begin = begin - buffer->offset; if((relative_begin + length) <= buffer->size) { diff --git a/source/myhtml/tree.h b/source/myhtml/tree.h index 1e126f6..c3299de 100644 --- a/source/myhtml/tree.h +++ b/source/myhtml/tree.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -31,10 +31,11 @@ extern "C" { #include "myhtml/mystring.h" #include "myhtml/token.h" #include "myhtml/stream.h" -#include "myhtml/utils/mcsync.h" -#include "myhtml/utils/mchar_async.h" -#include "myhtml/utils/mcobject.h" -#include "myhtml/utils/mcobject_async.h" +#include "mycore/thread_queue.h" +#include "mycore/utils/mcsync.h" +#include "mycore/utils/mchar_async.h" +#include "mycore/utils/mcobject.h" +#include "mycore/utils/mcobject_async.h" #define myhtml_tree_get(tree, attr) tree->attr #define myhtml_tree_set(tree, attr) tree->attr @@ -164,7 +165,7 @@ struct myhtml_tree_temp_stream { size_t length; size_t size; - myhtml_encoding_result_t res; + myencoding_result_t res; struct myhtml_tree_temp_tag_name* current; }; @@ -185,6 +186,11 @@ struct myhtml_tree { size_t mcasync_rules_token_id; size_t mcasync_rules_attr_id; size_t mcasync_tree_id; + /* + * mchar_node_id + * for rules, or if single mode, + * or for main thread only after parsing + */ size_t mchar_node_id; myhtml_token_attr_t* attr_current; myhtml_tag_id_t tmp_tag_id; @@ -192,8 +198,8 @@ struct myhtml_tree { mythread_queue_node_t* current_qnode; mcobject_t* mcobject_incoming_buf; - myhtml_incoming_buffer_t* incoming_buf; - myhtml_incoming_buffer_t* incoming_buf_first; + mycore_incoming_buffer_t* incoming_buf; + mycore_incoming_buffer_t* incoming_buf_first; // ref for nodes myhtml_tree_node_t* document; @@ -227,10 +233,10 @@ struct myhtml_tree { volatile myhtml_tree_parse_flags_t parse_flags; bool foster_parenting; size_t global_offset; - myhtml_status_t tokenizer_status; + mystatus_t tokenizer_status; - myhtml_encoding_t encoding; - myhtml_encoding_t encoding_usereq; + myencoding_t encoding; + myencoding_t encoding_usereq; myhtml_tree_temp_tag_name_t temp_tag_name; /* callback */ @@ -249,7 +255,7 @@ struct myhtml_tree { // base myhtml_tree_t * myhtml_tree_create(void); -myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml); +mystatus_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml); void myhtml_tree_clean(myhtml_tree_t* tree); void myhtml_tree_clean_all(myhtml_tree_t* tree); myhtml_tree_t * myhtml_tree_destroy(myhtml_tree_t* tree); @@ -355,10 +361,6 @@ void myhtml_tree_node_clean(myhtml_tree_node_t* tree_node); void myhtml_tree_node_free(myhtml_tree_node_t* node); myhtml_tree_node_t * myhtml_tree_node_clone(myhtml_tree_node_t* node); -void myhtml_tree_print_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out); -void myhtml_tree_print_node_children(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out, size_t inc); -void myhtml_tree_print_by_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out, size_t inc); - void myhtml_tree_node_add_child(myhtml_tree_node_t* root, myhtml_tree_node_t* node); void myhtml_tree_node_insert_before(myhtml_tree_node_t* root, myhtml_tree_node_t* node); void myhtml_tree_node_insert_after(myhtml_tree_node_t* root, myhtml_tree_node_t* node); @@ -390,21 +392,21 @@ bool myhtml_tree_is_mathml_integration_point(myhtml_tree_t* tree, myhtml_tree_no bool myhtml_tree_is_html_integration_point(myhtml_tree_t* tree, myhtml_tree_node_t* node); // temp tag name -myhtml_status_t myhtml_tree_temp_tag_name_init(myhtml_tree_temp_tag_name_t* temp_tag_name); +mystatus_t myhtml_tree_temp_tag_name_init(myhtml_tree_temp_tag_name_t* temp_tag_name); void myhtml_tree_temp_tag_name_clean(myhtml_tree_temp_tag_name_t* temp_tag_name); myhtml_tree_temp_tag_name_t * myhtml_tree_temp_tag_name_destroy(myhtml_tree_temp_tag_name_t* temp_tag_name, bool self_destroy); -myhtml_status_t myhtml_tree_temp_tag_name_append(myhtml_tree_temp_tag_name_t* temp_tag_name, const char* name, size_t name_len); -myhtml_status_t myhtml_tree_temp_tag_name_append_one(myhtml_tree_temp_tag_name_t* temp_tag_name, const char name); +mystatus_t myhtml_tree_temp_tag_name_append(myhtml_tree_temp_tag_name_t* temp_tag_name, const char* name, size_t name_len); +mystatus_t myhtml_tree_temp_tag_name_append_one(myhtml_tree_temp_tag_name_t* temp_tag_name, const char name); /* special tonek list */ -myhtml_status_t myhtml_tree_special_list_init(myhtml_tree_special_token_list_t* special); -myhtml_status_t myhtml_tree_special_list_append(myhtml_tree_special_token_list_t* special, myhtml_token_node_t *token, myhtml_namespace_t ns); +mystatus_t myhtml_tree_special_list_init(myhtml_tree_special_token_list_t* special); +mystatus_t myhtml_tree_special_list_append(myhtml_tree_special_token_list_t* special, myhtml_token_node_t *token, myhtml_namespace_t ns); size_t myhtml_tree_special_list_length(myhtml_tree_special_token_list_t* special); myhtml_tree_special_token_t * myhtml_tree_special_list_get_last(myhtml_tree_special_token_list_t* special); size_t myhtml_tree_special_list_pop(myhtml_tree_special_token_list_t* special); /* incoming buffer */ -myhtml_incoming_buffer_t * myhtml_tree_incoming_buffer_first(myhtml_tree_t *tree); +mycore_incoming_buffer_t * myhtml_tree_incoming_buffer_first(myhtml_tree_t *tree); const char * myhtml_tree_incomming_buffer_make_data(myhtml_tree_t *tree, size_t begin, size_t length); #ifdef __cplusplus diff --git a/source/myhtml/utils.c b/source/myhtml/utils.c deleted file mode 100644 index 52a99da..0000000 --- a/source/myhtml/utils.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/utils.h" - -static const unsigned char myhtml_utils_chars_map[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, - 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, - 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, - 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, - 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, - 0x3f, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, - 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, - 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, - 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, - 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, - 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, - 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, - 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, - 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, - 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, - 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, - 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, - 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, - 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, - 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, - 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, - 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, - 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, - 0xfc, 0xfd, 0xfe, 0xff -}; - -size_t myhtml_strncasecmp(const char* str1, const char* str2, size_t size) -{ - if(str1 == NULL || str2 == NULL) { - if(str1 == str2) - return 0; - - return 1; - } - - const unsigned char *ustr1 = (const unsigned char *)str1; - const unsigned char *ustr2 = (const unsigned char *)str2; - - size_t i = 0; - while (i < size) { - if(myhtml_utils_chars_map[*ustr1] != myhtml_utils_chars_map[*ustr2]) - return (size - i); - - ustr1++; - ustr2++; - - i++; - } - - return 0; -} - -size_t myhtml_strcasecmp(const char* str1, const char* str2) -{ - if(str1 == NULL || str2 == NULL) { - if(str1 == str2) - return 0; - - return 1; - } - - const unsigned char *ustr1 = (const unsigned char *)str1; - const unsigned char *ustr2 = (const unsigned char *)str2; - - for (size_t i = 0;;) - { - if(myhtml_utils_chars_map[*ustr1] != myhtml_utils_chars_map[*ustr2]) - return (i + 1); - - if(*ustr1 == '\0') - return 0; - - ustr1++; - ustr2++; - - i++; - } -} - -size_t myhtml_strncmp(const char* str1, const char* str2, size_t size) -{ - if(str1 == NULL || str2 == NULL) { - if(str1 == str2) - return 0; - - return 1; - } - - const unsigned char *ustr1 = (const unsigned char *)str1; - const unsigned char *ustr2 = (const unsigned char *)str2; - - size_t i = 0; - while (i < size) { - if(*ustr1 != *ustr2) - return (size - i); - - ustr1++; - ustr2++; - - i++; - } - - return 0; -} - -size_t myhtml_strcmp(const char* str1, const char* str2) -{ - if(str1 == NULL || str2 == NULL) { - if(str1 == str2) - return 0; - - return 1; - } - - const unsigned char *ustr1 = (const unsigned char *)str1; - const unsigned char *ustr2 = (const unsigned char *)str2; - - for (size_t i = 0;;) - { - if(*ustr1 != *ustr2) - return (i + 1); - - if(*ustr1 == '\0') - return 0; - - ustr1++; - ustr2++; - - i++; - } -} - -size_t myhtml_strcmp_ws(const char* str1, const char* str2) -{ - if(str1 == NULL || str2 == NULL) { - if(str1 == str2) - return 0; - - return 1; - } - - const unsigned char *ustr1 = (const unsigned char *)str1; - const unsigned char *ustr2 = (const unsigned char *)str2; - - for (size_t i = 0;;) - { - if(*ustr1 != *ustr2) - return (i + 1); - - if(myhtml_utils_whithspace(*ustr1, ==, ||) || *ustr1 == '\0') - return 0; - - ustr1++; - ustr2++; - - i++; - } -} - -bool myhtml_ustrcasecmp_without_checks_by_secondary(const unsigned char* ustr1, const unsigned char* ustr2) -{ - while (*ustr1 != '\0') { - if(myhtml_utils_chars_map[*ustr1] != myhtml_utils_chars_map[*ustr2]) - return false; - - ustr1++; - ustr2++; - } - - return true; -} - - diff --git a/source/myhtml/utils.h b/source/myhtml/utils.h deleted file mode 100644 index bca879c..0000000 --- a/source/myhtml/utils.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_UTILS_H -#define MyHTML_UTILS_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "myhtml/myosi.h" - -#define myhtml_utils_whithspace(onechar, action, logic) \ - (onechar action ' ' logic \ - onechar action '\t' logic \ - onechar action '\n' logic \ - onechar action '\f' logic \ - onechar action '\r') - -size_t myhtml_strncasecmp(const char* str1, const char* str2, size_t size); -size_t myhtml_strcasecmp(const char* str1, const char* str2); -size_t myhtml_strncmp(const char* str1, const char* str2, size_t size); -size_t myhtml_strcmp(const char* str1, const char* str2); -size_t myhtml_strcmp_ws(const char* str1, const char* str2); -bool myhtml_ustrcasecmp_without_checks_by_secondary(const unsigned char* ustr1, const unsigned char* ustr2); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* utils_h */ diff --git a/source/myhtml/utils/avl_tree.c b/source/myhtml/utils/avl_tree.c deleted file mode 100644 index b8da055..0000000 --- a/source/myhtml/utils/avl_tree.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - Copyright (C) 2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin avl_treet, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/utils/avl_tree.h" - -myhtml_utils_avl_tree_t * myhtml_utils_avl_tree_create(void) -{ - return (myhtml_utils_avl_tree_t*)myhtml_calloc(1, sizeof(myhtml_utils_avl_tree_t)); -} - -myhtml_status_t myhtml_utils_avl_tree_init(myhtml_utils_avl_tree_t* avl_tree) -{ - /* for raw declaration style */ - avl_tree->mc_nodes = mcobject_create(); - if(avl_tree->mc_nodes == NULL) - return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; - - myhtml_status_t myhtml_status = mcobject_init(avl_tree->mc_nodes, 256, sizeof(myhtml_utils_avl_tree_node_t)); - if(myhtml_status) - return MyHTML_STATUS_ERROR; - - return MyHTML_STATUS_OK; -} - -void myhtml_utils_avl_tree_clean(myhtml_utils_avl_tree_t* avl_tree) -{ - mcobject_clean(avl_tree->mc_nodes); -} - -myhtml_utils_avl_tree_t * myhtml_utils_avl_tree_destroy(myhtml_utils_avl_tree_t* avl_tree, bool self_destroy) -{ - if(avl_tree == NULL) - return NULL; - - mcobject_destroy(avl_tree->mc_nodes, true); - - if(self_destroy) { - myhtml_free(avl_tree); - return NULL; - } - - return avl_tree; -} - -myhtml_utils_avl_tree_node_t * myhtml_utils_avl_tree_node_create_root(myhtml_utils_avl_tree_t* avl_tree, size_t type, void* value) -{ - myhtml_utils_avl_tree_node_t *node = mcobject_malloc(avl_tree->mc_nodes, NULL); - memset(node, 0, sizeof(myhtml_utils_avl_tree_node_t)); - - node->type = type; - node->value = value; - - return node; -} - -void myhtml_utils_avl_tree_node_clean(myhtml_utils_avl_tree_node_t* node) -{ - memset(node, 0, sizeof(myhtml_utils_avl_tree_node_t)); -} - -short myhtml_utils_avl_tree_node_height(myhtml_utils_avl_tree_node_t* node) -{ - return (node ? node->height : 0); -} - -short myhtml_utils_avl_tree_node_balance_factor(myhtml_utils_avl_tree_node_t* node) -{ - return (myhtml_utils_avl_tree_node_height(node->right) - myhtml_utils_avl_tree_node_height(node->left)); -} - -void myhtml_utils_avl_tree_node_set_height(myhtml_utils_avl_tree_node_t* node) -{ - short left_height = myhtml_utils_avl_tree_node_height(node->left); - short right_height = myhtml_utils_avl_tree_node_height(node->right); - - node->height = (left_height > right_height ? left_height : right_height) + 1; -} - -myhtml_utils_avl_tree_node_t * myhtml_utils_avl_tree_node_rotate_right(myhtml_utils_avl_tree_node_t* pos) -{ - myhtml_utils_avl_tree_node_t* node = pos->left; - - node->parent = pos->parent; - - if(node->right) - node->right->parent = pos; - - pos->left = node->right; - pos->parent = node; - - node->right = pos; - - myhtml_utils_avl_tree_node_set_height(pos); - myhtml_utils_avl_tree_node_set_height(node); - - return node; -} - -myhtml_utils_avl_tree_node_t * myhtml_utils_avl_tree_node_rotate_left(myhtml_utils_avl_tree_node_t* pos) -{ - myhtml_utils_avl_tree_node_t* node = pos->right; - - node->parent = pos->parent; - - if(node->left) - node->left->parent = pos; - - pos->right = node->left; - pos->parent = node; - - node->left = pos; - - myhtml_utils_avl_tree_node_set_height(pos); - myhtml_utils_avl_tree_node_set_height(node); - - return node; -} - -myhtml_utils_avl_tree_node_t * myhtml_utils_avl_tree_node_balance(myhtml_utils_avl_tree_node_t* node, myhtml_utils_avl_tree_node_t** root) -{ - /* set height */ - short left_height = myhtml_utils_avl_tree_node_height(node->left); - short right_height = myhtml_utils_avl_tree_node_height(node->right); - - node->height = (left_height > right_height ? left_height : right_height) + 1; - - /* check balance */ - switch ((right_height - left_height)) - { - case 2: { - if(myhtml_utils_avl_tree_node_balance_factor(node->right) < 0) - node->right = myhtml_utils_avl_tree_node_rotate_right(node->right); - - myhtml_utils_avl_tree_node_t* parent = node->parent; - - if(parent) { - if(parent->right == node) - return parent->right = myhtml_utils_avl_tree_node_rotate_left(node); - else - return parent->left = myhtml_utils_avl_tree_node_rotate_left(node); - } - - return myhtml_utils_avl_tree_node_rotate_left(node); - } - case -2: { - if(myhtml_utils_avl_tree_node_balance_factor(node->left) > 0) - node->left = myhtml_utils_avl_tree_node_rotate_left(node->left); - - myhtml_utils_avl_tree_node_t* parent = node->parent; - - if(parent) { - if(parent->right == node) - return parent->right = myhtml_utils_avl_tree_node_rotate_right(node); - else - return parent->left = myhtml_utils_avl_tree_node_rotate_right(node); - } - - return myhtml_utils_avl_tree_node_rotate_right(node); - } - default: - break; - } - - if(node->parent == NULL) - *root = node; - - return node->parent; -} - -void myhtml_utils_avl_tree_add(myhtml_utils_avl_tree_t* avl_tree, myhtml_utils_avl_tree_node_t** root, size_t type, void* value) -{ - if(*root == NULL) { - *root = myhtml_utils_avl_tree_node_create_root(avl_tree, type, value); - return; - } - - myhtml_utils_avl_tree_node_t* node = *root; - myhtml_utils_avl_tree_node_t* new_node = mcobject_malloc(avl_tree->mc_nodes, NULL); - myhtml_utils_avl_tree_node_clean(new_node); - - while(1) - { - if(type == node->type) { - node->value = value; - return; - } - else if(type < node->type) { - if(node->left == NULL) { - node->left = new_node; - - new_node->parent = node; - new_node->type = type; - new_node->value = value; - - node = new_node; - break; - } - - node = node->left; - } - else { - if(node->right == NULL) { - node->right = new_node; - - new_node->parent = node; - new_node->type = type; - new_node->value = value; - - node = new_node; - break; - } - - node = node->right; - } - } - - while(node) { - node = myhtml_utils_avl_tree_node_balance(node, root); - } -} - -myhtml_utils_avl_tree_node_t * myhtml_utils_avl_tree_find_min(myhtml_utils_avl_tree_node_t* node) -{ - if(node == NULL) - return NULL; - - while(node->right) { - node = node->right; - } - - return node; -} - -void myhtml_utils_avl_tree_rotate_for_delete(myhtml_utils_avl_tree_node_t* delete_node, myhtml_utils_avl_tree_node_t* node, myhtml_utils_avl_tree_node_t** root) -{ - myhtml_utils_avl_tree_node_t* balance_node; - - if(node) { - if(delete_node->left == node) { - balance_node = node->left ? node->left : node; - - node->parent = delete_node->parent; - node->right = delete_node->right; - - if(delete_node->right) - delete_node->right->parent = node; - } - else { - balance_node = node; - - node->parent->right = NULL; - node->parent = delete_node->parent; - node->right = delete_node->right; - node->left = delete_node->left; - - if(delete_node->left) - delete_node->left->parent = node; - if(delete_node->right) - delete_node->right->parent = node; - } - - if(delete_node->parent) { - if(delete_node->parent->left == delete_node) { delete_node->parent->left = node; } - else { delete_node->parent->right = node; } - } - else { - *root = node; - } - } - else { - balance_node = delete_node->parent; - - if(delete_node->parent) { - if(delete_node->parent->left == delete_node) { delete_node->parent->left = delete_node->right; } - else { delete_node->parent->right = delete_node->right; } - } - else { - *root = delete_node->right; - } - } - - while(balance_node) { - balance_node = myhtml_utils_avl_tree_node_balance(balance_node, root); - } -} - -void * myhtml_utils_avl_tree_delete(myhtml_utils_avl_tree_t *avl_tree, myhtml_utils_avl_tree_node_t** root, size_t type) -{ - myhtml_utils_avl_tree_node_t* node = *root; - - while(node) - { - if(type == node->type) { - myhtml_utils_avl_tree_rotate_for_delete(node, myhtml_utils_avl_tree_find_min(node->left), root); - - void *value = node->value; - mcobject_free(avl_tree->mc_nodes, node); - - return value; - } - else if(type < node->type) - node = node->left; - else - node = node->right; - } - - return NULL; -} - -myhtml_utils_avl_tree_node_t * myhtml_utils_avl_tree_search_by_type(myhtml_utils_avl_tree_t *avl_tree, myhtml_utils_avl_tree_node_t* node, size_t type) -{ - while(node) - { - if(type == node->type) - return node; - else if(type < node->type) - node = node->left; - else - node = node->right; - } - - return NULL; -} - -void myhtml_utils_avl_tree_list_all_nodes(myhtml_utils_avl_tree_t *avl_tree, myhtml_utils_avl_tree_node_t* root, myhtml_utils_avl_tree_node_callback_f callback, void* ctx) -{ - if(root == NULL) - return; - - callback(root, ctx); - - myhtml_utils_avl_tree_list_all_nodes(avl_tree, root->left, callback, ctx); - myhtml_utils_avl_tree_list_all_nodes(avl_tree, root->right, callback, ctx); -} - - diff --git a/source/myhtml/utils/avl_tree.h b/source/myhtml/utils/avl_tree.h deleted file mode 100644 index f2c837e..0000000 --- a/source/myhtml/utils/avl_tree.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright (C) 2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin avl_treet, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_UTILS_AVL_TREE_H -#define MyHTML_UTILS_AVL_TREE_H -#pragma once - -#include "myhtml/myosi.h" -#include "myhtml/utils/mcobject.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct myhtml_utils_avl_tree_node myhtml_utils_avl_tree_node_t; -typedef void (*myhtml_utils_avl_tree_node_callback_f)(myhtml_utils_avl_tree_node_t* avl_tree_node, void* ctx); - -struct myhtml_utils_avl_tree_node { - void *value; - size_t type; - - myhtml_utils_avl_tree_node_t* left; - myhtml_utils_avl_tree_node_t* right; - myhtml_utils_avl_tree_node_t* parent; - - short height; -}; - -struct myhtml_utils_avl_tree { - mcobject_t* mc_nodes; -} -typedef myhtml_utils_avl_tree_t; - -myhtml_utils_avl_tree_t * myhtml_utils_avl_tree_create(void); -myhtml_status_t myhtml_utils_avl_tree_init(myhtml_utils_avl_tree_t* avl_tree); -void myhtml_utils_avl_tree_clean(myhtml_utils_avl_tree_t* avl_tree); -myhtml_utils_avl_tree_t * myhtml_utils_avl_tree_destroy(myhtml_utils_avl_tree_t* avl_tree, bool self_destroy); - -myhtml_utils_avl_tree_node_t * myhtml_utils_avl_tree_node_create_root(myhtml_utils_avl_tree_t* avl_tree, size_t type, void* value); - -void myhtml_utils_avl_tree_add(myhtml_utils_avl_tree_t* avl_tree, myhtml_utils_avl_tree_node_t** root, size_t type, void* value); -void * myhtml_utils_avl_tree_delete(myhtml_utils_avl_tree_t *avl_tree, myhtml_utils_avl_tree_node_t** root, size_t type); -myhtml_utils_avl_tree_node_t * myhtml_utils_avl_tree_search_by_type(myhtml_utils_avl_tree_t *avl_tree, myhtml_utils_avl_tree_node_t* node, size_t type); - -void myhtml_utils_avl_tree_list_all_nodes(myhtml_utils_avl_tree_t *avl_tree, myhtml_utils_avl_tree_node_t* root, myhtml_utils_avl_tree_node_callback_f callback, void* ctx); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* MyHTML_UTILS_AVL_TREE_H */ diff --git a/source/myhtml/utils/mchar_async.c b/source/myhtml/utils/mchar_async.c deleted file mode 100644 index 37c8c57..0000000 --- a/source/myhtml/utils/mchar_async.c +++ /dev/null @@ -1,770 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/utils/mchar_async.h" - -mchar_async_t * mchar_async_create(size_t pos_size, size_t size) -{ - mchar_async_t *mcobj_async = (mchar_async_t*)myhtml_calloc(1, sizeof(mchar_async_t)); - - mchar_async_init(mcobj_async, pos_size, size); - - return mcobj_async; -} - -void mchar_async_init(mchar_async_t *mchar_async, size_t chunk_len, size_t char_size) -{ - if(char_size < 4096) - char_size = 4096; - - mchar_async->origin_size = char_size; - - mchar_async->chunks_size = chunk_len; - mchar_async->chunks_pos_size = 1024; - mchar_async->chunks = (mchar_async_chunk_t**)myhtml_calloc(mchar_async->chunks_pos_size, sizeof(mchar_async_chunk_t*)); - mchar_async->chunks[0] = (mchar_async_chunk_t*)myhtml_calloc(mchar_async->chunks_size, sizeof(mchar_async_chunk_t)); - - mchar_async_cache_init(&mchar_async->chunk_cache); - - mchar_async->nodes_length = 0; - mchar_async->nodes_size = 64; - mchar_async->nodes = (mchar_async_node_t*)myhtml_calloc(mchar_async->nodes_size, sizeof(mchar_async_node_t)); - - mchar_async->nodes_cache_length = 0; - mchar_async->nodes_cache_size = mchar_async->nodes_size; - mchar_async->nodes_cache = (size_t*)myhtml_malloc(mchar_async->nodes_cache_size * sizeof(size_t)); - - mchar_async_clean(mchar_async); - - mchar_async->mcsync = mcsync_create(); -} - -void mchar_async_clean(mchar_async_t *mchar_async) -{ - mchar_async->chunks_length = 0; - mchar_async->chunks_pos_length = 1; - - mchar_async_cache_clean(&mchar_async->chunk_cache); - - for (size_t node_idx = 0; node_idx < mchar_async->nodes_length; node_idx++) - { - mchar_async_node_t *node = &mchar_async->nodes[node_idx]; - mchar_async_cache_clean(&node->cache); - - node->chunk = mchar_async_chunk_malloc(mchar_async, node, mchar_async->origin_size); - node->chunk->prev = 0; - } -} - -mchar_async_t * mchar_async_destroy(mchar_async_t *mchar_async, int destroy_self) -{ - if(mchar_async == NULL) - return NULL; - - if(mchar_async->nodes) - { - for (size_t node_idx = 0; node_idx < mchar_async->nodes_length; node_idx++) - { - mchar_async_node_t *node = &mchar_async->nodes[node_idx]; - mchar_async_cache_destroy(&node->cache, false); - } - - myhtml_free(mchar_async->nodes); - mchar_async->nodes = NULL; - } - - if(mchar_async->nodes_cache) { - myhtml_free(mchar_async->nodes_cache); - } - - if(mchar_async->chunks) - { - for (size_t pos_idx = 0; pos_idx < mchar_async->chunks_pos_length; pos_idx++) { - if(mchar_async->chunks[pos_idx]) - { - for (size_t idx = 0; idx < mchar_async->chunks_size; idx++) { - if(mchar_async->chunks[pos_idx][idx].begin) - myhtml_free(mchar_async->chunks[pos_idx][idx].begin); - } - - myhtml_free(mchar_async->chunks[pos_idx]); - } - } - - myhtml_free(mchar_async->chunks); - mchar_async->chunks = NULL; - } - - mchar_async_cache_destroy(&mchar_async->chunk_cache, false); - - mchar_async->mcsync = mcsync_destroy(mchar_async->mcsync, 1); - - memset(mchar_async, 0, sizeof(mchar_async_t)); - - if(destroy_self) - myhtml_free(mchar_async); - else - return mchar_async; - - return NULL; -} - -void mchar_async_mem_malloc(mchar_async_t *mchar_async, mchar_async_node_t *node, mchar_async_chunk_t *chunk, size_t length) -{ - if(chunk == NULL) - return; - - if(chunk->begin) { - if(length > chunk->size) { - myhtml_free(chunk->begin); - - chunk->size = length + mchar_async->origin_size; - chunk->begin = (char*)myhtml_malloc(chunk->size * sizeof(char)); - } - } - else { - chunk->size = mchar_async->origin_size; - - if(length > chunk->size) - chunk->size = length; - - chunk->begin = (char*)myhtml_malloc(chunk->size * sizeof(char)); - } - - chunk->length = 0; -} - -mchar_async_chunk_t * mchar_async_chunk_malloc_without_lock(mchar_async_t *mchar_async, mchar_async_node_t *node, size_t length) -{ - if(mchar_async_cache_has_nodes(mchar_async->chunk_cache)) - { - size_t index = mchar_async_cache_delete(&mchar_async->chunk_cache, length); - - if(index) - return (mchar_async_chunk_t*)mchar_async->chunk_cache.nodes[index].value; - } - - if(mchar_async->chunks_length >= mchar_async->chunks_size) - { - size_t current_idx = mchar_async->chunks_pos_length; - mchar_async->chunks_pos_length++; - - if(mchar_async->chunks_pos_length >= mchar_async->chunks_pos_size) - { - mchar_async->chunks_pos_size <<= 1; - mchar_async_chunk_t **tmp_pos = myhtml_realloc(mchar_async->chunks, - sizeof(mchar_async_chunk_t*) * mchar_async->chunks_pos_size); - - if(tmp_pos) { - memset(&tmp_pos[mchar_async->chunks_pos_length], 0, (mchar_async->chunks_pos_size - mchar_async->chunks_pos_length) - * sizeof(mchar_async_chunk_t*)); - - mchar_async->chunks = tmp_pos; - } - } - - if(mchar_async->chunks[current_idx] == NULL) { - mchar_async_chunk_t *tmp = myhtml_calloc(mchar_async->chunks_size, sizeof(mchar_async_chunk_t)); - - if(tmp) - mchar_async->chunks[current_idx] = tmp; - } - - mchar_async->chunks_length = 0; - } - - mchar_async_chunk_t *chunk = &mchar_async->chunks[mchar_async->chunks_pos_length - 1][mchar_async->chunks_length]; - mchar_async->chunks_length++; - - mchar_async_mem_malloc(mchar_async, node, chunk, length); - - return chunk; -} - -mchar_async_chunk_t * mchar_async_chunk_malloc(mchar_async_t *mchar_async, mchar_async_node_t *node, size_t length) -{ - mcsync_lock(mchar_async->mcsync); - mchar_async_chunk_t *chunk = mchar_async_chunk_malloc_without_lock(mchar_async, node, length); - mcsync_unlock(mchar_async->mcsync); - - return chunk; -} - -size_t mchar_async_node_add(mchar_async_t *mchar_async) -{ - mcsync_lock(mchar_async->mcsync); - - size_t node_idx; - - if(mchar_async->nodes_cache_length) { - mchar_async->nodes_cache_length--; - - node_idx = mchar_async->nodes_cache[ mchar_async->nodes_cache_length ]; - } - else { - if(mchar_async->nodes_length >= mchar_async->nodes_size) { - mcsync_unlock(mchar_async->mcsync); - return 0; - } - - node_idx = mchar_async->nodes_length; - mchar_async->nodes_length++; - } - - mchar_async_node_t *node = &mchar_async->nodes[node_idx]; - - mchar_async_cache_init(&node->cache); - - node->chunk = mchar_async_chunk_malloc_without_lock(mchar_async, node, mchar_async->origin_size); - - node->chunk->next = NULL; - node->chunk->prev = NULL; - - mcsync_unlock(mchar_async->mcsync); - - return node_idx; -} - -void mchar_async_node_clean(mchar_async_t *mchar_async, size_t node_idx) -{ - if(mchar_async->nodes_length <= node_idx) - return; - - mchar_async_node_t *node = &mchar_async->nodes[node_idx]; - - while (node->chunk->prev) - node->chunk = node->chunk->prev; - - node->chunk->length = 0; - mchar_async_cache_clean(&node->cache); -} - -void mchar_async_node_delete(mchar_async_t *mchar_async, size_t node_idx) -{ - mcsync_lock(mchar_async->mcsync); - - if(mchar_async->nodes_length <= node_idx) { - mcsync_unlock(mchar_async->mcsync); - return; - } - - mchar_async_node_t *node = &mchar_async->nodes[node_idx]; - mchar_async_chunk_t *chunk = node->chunk; - - while (chunk->next) - chunk = chunk->next; - - while (chunk) - { - mchar_async_cache_add(&mchar_async->chunk_cache, (void*)chunk, chunk->size); - chunk = chunk->prev; - } - - if(node->cache.nodes) - mchar_async_cache_destroy(&node->cache, false); - - memset(node, 0, sizeof(mchar_async_node_t)); - - if(mchar_async->nodes_cache_length >= mchar_async->nodes_cache_size) { - size_t new_size = mchar_async->nodes_cache_size << 1; - - size_t *tmp = (size_t*)myhtml_realloc(mchar_async->nodes_cache, sizeof(size_t) * mchar_async->nodes_cache_size); - - if(tmp) { - mchar_async->nodes_cache = tmp; - mchar_async->nodes_cache_size = new_size; - } - } - - mchar_async->nodes_cache[ mchar_async->nodes_cache_length ] = node_idx; - mchar_async->nodes_cache_length++; - - mcsync_unlock(mchar_async->mcsync); -} - -mchar_async_chunk_t * mchar_sync_chunk_find_by_size(mchar_async_node_t *node, size_t size) -{ - mchar_async_chunk_t *chunk = node->chunk->next; - - while (chunk) { - if(chunk->size >= size) - return chunk; - - chunk = chunk->next; - } - - return NULL; -} - -void mchar_sync_chunk_insert_after(mchar_async_chunk_t *base, mchar_async_chunk_t *chunk) -{ - if(base->next == chunk) - return; - - if(chunk->prev) - chunk->prev->next = chunk->next; - - if(chunk->next) - chunk->next->prev = chunk->prev; - - if(base->next) - base->next->prev = chunk; - - chunk->next = base->next; - chunk->prev = base; - - base->next = chunk; -} - -char * mchar_async_malloc(mchar_async_t *mchar_async, size_t node_idx, size_t size) -{ - if(size == 0) - return NULL; - - mchar_async_node_t *node = &mchar_async->nodes[node_idx]; - mchar_async_chunk_t *chunk = node->chunk; - - if(mchar_async_cache_has_nodes(node->cache)) { - size_t index = mchar_async_cache_delete(&node->cache, size); - - if(index) { - return (char *)(node->cache.nodes[index].value); - } - } - - size_t new_size = chunk->length + size + sizeof(size_t); - - if(new_size > chunk->size) - { - if((chunk->length + sizeof(size_t)) < chunk->size) - { - size_t calc_size = (chunk->size - chunk->length) - sizeof(size_t); - - if(calc_size) { - char *tmp = &chunk->begin[(chunk->length + sizeof(size_t))]; - *(size_t*)(&chunk->begin[chunk->length]) = calc_size; - - chunk->length = chunk->size; - - mchar_async_cache_add(&node->cache, tmp, calc_size); - } - } - - chunk = mchar_sync_chunk_find_by_size(node, (size + sizeof(size_t))); - - if(chunk) - chunk->length = 0; - else { - if((size + sizeof(size_t)) > mchar_async->origin_size) - chunk = mchar_async_chunk_malloc(mchar_async, node, (size + sizeof(size_t) + mchar_async->origin_size)); - else - chunk = mchar_async_chunk_malloc(mchar_async, node, mchar_async->origin_size); - } - - mchar_sync_chunk_insert_after(node->chunk, chunk); - node->chunk = chunk; - } - - char *tmp = &chunk->begin[(chunk->length + sizeof(size_t))]; - *((size_t*)(&chunk->begin[chunk->length])) = size; - - chunk->length = chunk->length + size + sizeof(size_t); - - return tmp; -} - -char * mchar_async_realloc(mchar_async_t *mchar_async, size_t node_idx, char *data, size_t data_len, size_t new_size) -{ - if(data == NULL) - return NULL; - - size_t curr_size = *((size_t*)(data - sizeof(size_t))); - - if(curr_size >= new_size) - return data; - - mchar_async_node_t *node = &mchar_async->nodes[node_idx]; - - if(node->chunk->length >= curr_size && - &node->chunk->begin[ (node->chunk->length - curr_size) ] == data) - { - size_t next_size = (node->chunk->length - curr_size) + new_size; - - if(next_size <= node->chunk->size) { - /* it`s Magic */ - *((size_t*)(&node->chunk->begin[ ((node->chunk->length - curr_size) - sizeof(size_t)) ])) = new_size; - - node->chunk->length = next_size; - - return data; - } -// else { -// size_t re_size = next_size - node->chunk->length; -// -// /* a little Magic ;) */ -// *((size_t*)(&node->chunk->begin[ ((node->chunk->length - curr_size) - sizeof(size_t)) ])) = re_size; -// -// curr_size = re_size; -// } - } - - char *tmp = mchar_async_malloc(mchar_async, node_idx, new_size); - - if(tmp) { - memcpy(tmp, data, sizeof(char) * data_len); - - mchar_async_cache_add(&node->cache, data, curr_size); - } - - return tmp; -} - -char * mchar_async_crop_first_chars(mchar_async_t *mchar_async, size_t node_idx, char *data, size_t crop_len) -{ - if(data == NULL) - return NULL; - - size_t curr_size = *((size_t*)(data - sizeof(size_t))); - - char *tmp_old = data; - data = &data[crop_len]; - - *((size_t*)(data - sizeof(size_t))) = curr_size - crop_len; - - if((crop_len + 4) > sizeof(size_t)) { - crop_len = crop_len - sizeof(size_t); - *((size_t*)(tmp_old - sizeof(size_t))) = crop_len; - - mchar_async_node_t *node = &mchar_async->nodes[node_idx]; - mchar_async_cache_add(&node->cache, tmp_old, crop_len); - } - - return data; -} - -char * mchar_async_crop_first_chars_without_cache(char *data, size_t crop_len) -{ - if(data == NULL) - return NULL; - - size_t curr_size = *((size_t*)(data - sizeof(size_t))); - data = &data[crop_len]; - - *((size_t*)(data - sizeof(size_t))) = curr_size - crop_len; - - return data; -} - -size_t mchar_async_get_size_by_data(const char *data) -{ - if(data == NULL) - return 0; - - return *((size_t*)(data - sizeof(size_t))); -} - -void mchar_async_free(mchar_async_t *mchar_async, size_t node_idx, char *entry) -{ - if(entry) - mchar_async_cache_add(&mchar_async->nodes[node_idx].cache, entry, *(size_t*)(entry - sizeof(size_t))); -} - -void mchar_async_cache_init(mchar_async_cache_t *cache) -{ - cache->count = 0; - cache->nodes_root = 0; - cache->nodes_length = 1; - cache->nodes_size = 1024; - cache->nodes = (mchar_async_cache_node_t*)myhtml_malloc(sizeof(mchar_async_cache_node_t) * cache->nodes_size); - - cache->nodes[0].left = 0; - cache->nodes[0].right = 0; - cache->nodes[0].size = 0; - cache->nodes[0].value = NULL; - - cache->index_length = 0; - cache->index_size = cache->nodes_size; - cache->index = (size_t*)myhtml_malloc(sizeof(size_t) * cache->index_size); -} - -void mchar_async_cache_clean(mchar_async_cache_t *cache) -{ - cache->count = 0; - cache->nodes_root = 0; - cache->nodes_length = 1; - cache->index_length = 0; - - if(cache->nodes) { - cache->nodes[0].left = 0; - cache->nodes[0].right = 0; - cache->nodes[0].size = 0; - cache->nodes[0].value = NULL; - } -} - -mchar_async_cache_t * mchar_async_cache_destroy(mchar_async_cache_t *cache, bool self_destroy) -{ - if(cache == NULL) - return NULL; - - if(cache->nodes) - myhtml_free(cache->nodes); - - if(cache->index) - myhtml_free(cache->index); - - if(self_destroy) { - myhtml_free(cache); - return NULL; - } - - return cache; -} - -size_t mchar_async_cache_malloc(mchar_async_cache_t *cache) -{ - if(cache->index_length) { - cache->index_length--; - return cache->index[cache->index_length]; - } - - cache->nodes_length++; - - if(cache->nodes_length >= cache->nodes_size) { - cache->nodes_size <<= 1; - - mchar_async_cache_node_t *tmp = (mchar_async_cache_node_t*)myhtml_realloc(cache->nodes, sizeof(mchar_async_cache_node_t) * cache->nodes_size); - - if(tmp) - cache->nodes = tmp; - } - - return cache->nodes_length - 1; -} - -size_t mchar_async_cache_delete(mchar_async_cache_t *cache, size_t size) -{ - mchar_async_cache_node_t *list = cache->nodes; - size_t idx = cache->nodes_root; - - while (idx) - { - if(size <= list[idx].size) - { - while( list[ list[idx].right ].size == size ) - idx = list[idx].right; - - size_t parent = list[idx].parent; - - if(parent) { - if(list[parent].left == idx) - { - if(list[idx].right) { - if(list[idx].left) { - size_t last_left = list[ list[idx].right ].left; - - while( list[last_left].left ) - last_left = list[last_left].left; - - if(last_left) { - list[last_left].left = list[idx].left; - list[ list[idx].left ].parent = last_left; - } - else { - list[ list[idx].right ].left = list[idx].left; - } - } - - list[parent].left = list[idx].right; - list[ list[idx].right ].parent = parent; - } - else { - list[parent].left = list[idx].left; - list[ list[idx].left ].parent = parent; - } - } - else { - if(list[idx].left) { - if(list[idx].right) { - size_t last_right = list[ list[idx].left ].right; - - while( list[last_right].right ) - last_right = list[last_right].right; - - if(last_right) { - list[last_right].right = list[idx].right; - list[ list[idx].right ].parent = last_right; - } - else { - list[ list[idx].left ].right = list[idx].right; - } - } - - list[parent].right = list[idx].left; - list[ list[idx].left ].parent = parent; - } - else { - list[parent].right = list[idx].right; - list[ list[idx].right ].parent = parent; - } - } - } - else { - if(list[idx].left) { - if(list[idx].right) { - size_t last_right = list[ list[idx].left ].right; - - while( list[last_right].right ) - last_right = list[last_right].right; - - if(last_right) { - list[last_right].right = list[idx].right; - list[ list[idx].right ].parent = last_right; - } - else { - list[ list[idx].left ].right = list[idx].right; - } - } - - cache->nodes_root = list[idx].left; - list[ list[idx].left ].parent = 0; - } - else { - cache->nodes_root = list[idx].right; - list[ list[idx].right ].parent = 0; - } - } - - cache->index[cache->index_length] = idx; - - cache->index_length++; - if(cache->index_length >= cache->index_size) - { - size_t new_size = cache->index_size << 1; - size_t *tmp = (size_t*)myhtml_realloc(cache->index, sizeof(size_t) * new_size); - - if(tmp) { - cache->index = tmp; - cache->index_size = new_size; - } - } - - cache->count--; - - return idx; - } - else { - idx = list[idx].right; - } - } - - return 0; -} - -void mchar_async_cache_add(mchar_async_cache_t *cache, void* value, size_t size) -{ - cache->count++; - - if(cache->nodes_root == 0) { - mchar_async_cache_node_t *list = cache->nodes; - - cache->nodes_root = mchar_async_cache_malloc(cache); - - list[cache->nodes_root].parent = 0; - list[cache->nodes_root].left = 0; - list[cache->nodes_root].right = 0; - list[cache->nodes_root].size = size; - list[cache->nodes_root].value = value; - - return; - } - - size_t idx = cache->nodes_root; - size_t new_idx = mchar_async_cache_malloc(cache); - - mchar_async_cache_node_t *list = cache->nodes; - - while(idx) - { - if(size == list[idx].size) - { - if(list[idx].right) { - list[new_idx].right = list[idx].right; - list[ list[idx].right ].parent = new_idx; - } - else { - list[new_idx].right = 0; - } - - list[idx].right = new_idx; - - list[new_idx].parent = idx; - list[new_idx].left = 0; - list[new_idx].size = size; - list[new_idx].value = value; - - break; - } - else if(size < list[idx].size) - { - size_t parent = list[idx].parent; - - if(parent) { - if(list[parent].left == idx) - list[parent].left = new_idx; - else - list[parent].right = new_idx; - - list[new_idx].parent = parent; - } - else { - cache->nodes_root = new_idx; - list[new_idx].parent = 0; - } - - list[idx].parent = new_idx; - - list[new_idx].right = idx; - list[new_idx].left = 0; - list[new_idx].size = size; - list[new_idx].value = value; - - break; - } - else // size > list[idx].size - { - if(list[idx].right) - idx = list[idx].right; - else { - list[idx].right = new_idx; - - list[new_idx].right = 0; - list[new_idx].left = 0; - list[new_idx].parent = idx; - list[new_idx].size = size; - list[new_idx].value = value; - - break; - } - } - } -} - - diff --git a/source/myhtml/utils/mchar_async.h b/source/myhtml/utils/mchar_async.h deleted file mode 100644 index dcba9c5..0000000 --- a/source/myhtml/utils/mchar_async.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_UTILS_MCHAR_ASYNC_H -#define MyHTML_UTILS_MCHAR_ASYNC_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "myhtml/myosi.h" -#include "myhtml/utils/mcsync.h" - -#define mchar_async_cache_has_nodes(cache) cache.count - -typedef struct mchar_async_node mchar_async_node_t; - -struct mchar_async_cache_node { - void *value; - size_t size; - - size_t left; - size_t right; - size_t parent; -} -typedef mchar_async_cache_node_t; - -struct mchar_async_chunk { - char *begin; - size_t length; - size_t size; - - struct mchar_async_chunk *next; - struct mchar_async_chunk *prev; -} -typedef mchar_async_chunk_t; - -struct mchar_async_cache { - mchar_async_cache_node_t *nodes; - size_t nodes_size; - size_t nodes_length; - size_t nodes_root; - - size_t count; - - size_t *index; - size_t index_length; - size_t index_size; -} -typedef mchar_async_cache_t; - -struct mchar_async_node { - mchar_async_chunk_t *chunk; - mchar_async_cache_t cache; -}; - -struct mchar_async { - size_t origin_size; - - mchar_async_chunk_t **chunks; - size_t chunks_pos_size; - size_t chunks_pos_length; - size_t chunks_size; - size_t chunks_length; - - mchar_async_cache_t chunk_cache; - - mchar_async_node_t *nodes; - size_t nodes_length; - size_t nodes_size; - - size_t *nodes_cache; - size_t nodes_cache_length; - size_t nodes_cache_size; - - mcsync_t *mcsync; -} -typedef mchar_async_t; - - -mchar_async_t * mchar_async_create(size_t pos_size, size_t size); -void mchar_async_init(mchar_async_t *mchar_async, size_t chunk_len, size_t char_size); -void mchar_async_clean(mchar_async_t *mchar_async); -mchar_async_t * mchar_async_destroy(mchar_async_t *mchar_async, int destroy_self); - -char * mchar_async_malloc(mchar_async_t *mchar_async, size_t node_idx, size_t size); -char * mchar_async_realloc(mchar_async_t *mchar_async, size_t node_idx, char *data, size_t data_len, size_t new_size); -void mchar_async_free(mchar_async_t *mchar_async, size_t node_idx, char *entry); - -size_t mchar_async_node_add(mchar_async_t *mchar_async); -void mchar_async_node_clean(mchar_async_t *mchar_async, size_t node_idx); -void mchar_async_node_delete(mchar_async_t *mchar_async, size_t node_idx); - -mchar_async_chunk_t * mchar_async_chunk_malloc(mchar_async_t *mchar_async, mchar_async_node_t *node, size_t length); -char * mchar_async_crop_first_chars(mchar_async_t *mchar_async, size_t node_idx, char *data, size_t crop_len); -char * mchar_async_crop_first_chars_without_cache(char *data, size_t crop_len); - -size_t mchar_async_get_size_by_data(const char *data); - -// cache -void mchar_async_cache_init(mchar_async_cache_t *cache); -mchar_async_cache_t * mchar_async_cache_destroy(mchar_async_cache_t *cache, bool self_destroy); -void mchar_async_cache_clean(mchar_async_cache_t *cache); - -void mchar_async_cache_add(mchar_async_cache_t *cache, void* value, size_t size); -size_t mchar_async_cache_delete(mchar_async_cache_t *cache, size_t size); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* mchar_async_h */ - diff --git a/source/myhtml/utils/mcobject.c b/source/myhtml/utils/mcobject.c deleted file mode 100644 index 3353909..0000000 --- a/source/myhtml/utils/mcobject.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/utils/mcobject.h" - -mcobject_t * mcobject_create(void) -{ - return myhtml_calloc(1, sizeof(mcobject_t)); -} - -myhtml_status_t mcobject_init(mcobject_t *mcobject, size_t chunk_size, size_t struct_size) -{ - mcobject->struct_size = struct_size; - mcobject->chunk_size = chunk_size; - - mcobject->chunk = NULL; - - mcobject->cache_length = 0; - mcobject->cache_size = chunk_size; - mcobject->cache = (void**)myhtml_malloc(sizeof(void*) * mcobject->cache_size); - - if(mcobject->cache == NULL) - return MyHTML_STATUS_MCOBJECT_ERROR_CACHE_CREATE; - - return MyHTML_STATUS_OK; -} - -void mcobject_clean(mcobject_t *mcobject) -{ - if(mcobject->chunk == NULL) - return; - - mcobject_chunk_t* chunk = mcobject->chunk; - - while(chunk->next) - chunk = chunk->next; - - while(chunk) { - mcobject_chunk_t* tmp = chunk->prev; - - if(chunk->begin) { - myhtml_free(chunk->begin); - } - - myhtml_free(chunk); - - chunk = tmp; - } - - mcobject->chunk = NULL; - mcobject->cache_length = 0; -} - -mcobject_t * mcobject_destroy(mcobject_t *mcobject, bool destroy_self) -{ - if(mcobject == NULL) - return NULL; - - mcobject_clean(mcobject); - - if(mcobject->cache) { - myhtml_free(mcobject->cache); - mcobject->cache = NULL; - } - - if(destroy_self) { - myhtml_free(mcobject); - return NULL; - } - - return mcobject; -} - -void mcobject_chunk_malloc(mcobject_t* mcobject, myhtml_status_t* status) -{ - if(status) - *status = MyHTML_STATUS_OK; - - mcobject_chunk_t* chunk; - - if(mcobject->chunk && mcobject->chunk->next) { - mcobject->chunk = mcobject->chunk->next; - - mcobject->chunk->length = 0; - - return; - } - else { - chunk = myhtml_calloc(1, sizeof(mcobject_chunk_t)); - - if(chunk == NULL) { - if(status) - *status = MyHTML_STATUS_MCOBJECT_ERROR_CHUNK_CREATE; - - return; - } - - chunk->begin = myhtml_malloc(mcobject->struct_size * mcobject->chunk_size); - - if(chunk->begin == NULL) { - if(status) - *status = MyHTML_STATUS_MCOBJECT_ERROR_CHUNK_INIT; - - myhtml_free(chunk); - return; - } - - chunk->size = mcobject->chunk_size; - } - - if(mcobject->chunk == NULL) { - mcobject->chunk = chunk; - return; - } - - chunk->prev = mcobject->chunk; - mcobject->chunk->next = chunk; - - mcobject->chunk = chunk; -} - -void * mcobject_malloc(mcobject_t *mcobject, myhtml_status_t* status) -{ - if(mcobject->cache_length) { - if(status) - *status = MyHTML_STATUS_OK; - - mcobject->cache_length--; - return mcobject->cache[ mcobject->cache_length ]; - } - - mcobject_chunk_t* chunk = mcobject->chunk; - - if(chunk == NULL || chunk->length >= chunk->size) - { - myhtml_status_t ns_status; - mcobject_chunk_malloc(mcobject, &ns_status); - - if(ns_status) { - if(status) - *status = ns_status; - - return NULL; - } - - chunk = mcobject->chunk; - } - - if(status) - *status = MyHTML_STATUS_OK; - - chunk->length++; - return &chunk->begin[((chunk->length - 1) * mcobject->struct_size)]; -} - -myhtml_status_t mcobject_free(mcobject_t *mcobject, void *entry) -{ - if(mcobject->cache_length >= mcobject->cache_size) { - size_t new_size = mcobject->cache_size << 1; - - void **tmp = (void**)myhtml_realloc(mcobject->cache, sizeof(void*) * new_size); - - if(tmp) { - mcobject->cache = tmp; - mcobject->cache_size = new_size; - } - else - return MyHTML_STATUS_MCOBJECT_ERROR_CACHE_REALLOC; - } - - mcobject->cache[ mcobject->cache_length ] = entry; - mcobject->cache_length++; - - return MyHTML_STATUS_OK; -} - - diff --git a/source/myhtml/utils/mcobject.h b/source/myhtml/utils/mcobject.h deleted file mode 100644 index b68935b..0000000 --- a/source/myhtml/utils/mcobject.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_UTILS_MCOBJECT_H -#define MyHTML_UTILS_MCOBJECT_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "myhtml/myosi.h" - -struct mcobject_chunk { - unsigned char *begin; - size_t length; - size_t size; - - struct mcobject_chunk *next; - struct mcobject_chunk *prev; -} -typedef mcobject_chunk_t; - -struct mcobject { - mcobject_chunk_t *chunk; - - void **cache; - size_t cache_size; - size_t cache_length; - - size_t struct_size; - size_t chunk_size; -} -typedef mcobject_t; - - -mcobject_t * mcobject_create(void); -myhtml_status_t mcobject_init(mcobject_t *mcobject, size_t chunk_size, size_t struct_size); -void mcobject_clean(mcobject_t *mcobject); -mcobject_t * mcobject_destroy(mcobject_t *mcobject, bool destroy_self); - -void mcobject_chunk_malloc(mcobject_t* mcobject, myhtml_status_t* status); - -void * mcobject_malloc(mcobject_t *mcobject, myhtml_status_t* status); -myhtml_status_t mcobject_free(mcobject_t *mcobject, void *entry); - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* MyHTML_UTILS_MCOBJECT_H */ - diff --git a/source/myhtml/utils/mcobject_async.c b/source/myhtml/utils/mcobject_async.c deleted file mode 100644 index 21e36e8..0000000 --- a/source/myhtml/utils/mcobject_async.c +++ /dev/null @@ -1,475 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/utils/mcobject_async.h" - -mcobject_async_t * mcobject_async_create(void) -{ - return (mcobject_async_t*)myhtml_calloc(1, sizeof(mcobject_async_t)); -} - -mcobject_async_status_t mcobject_async_chunk_up(mcobject_async_t *mcobj_async) -{ - mcobj_async->chunks_length = 0; - - if(mcobj_async->chunks[ mcobj_async->chunks_pos_length ] == NULL) { - mcobj_async->chunks[ mcobj_async->chunks_pos_length ] = (mcobject_async_chunk_t*)myhtml_calloc(mcobj_async->chunks_size, sizeof(mcobject_async_chunk_t)); - - if(mcobj_async->chunks[ mcobj_async->chunks_pos_length ] == NULL) - return MCOBJECT_ASYNC_STATUS_CHUNK_ERROR_MEMORY_ALLOCATION; - } - - return MCOBJECT_ASYNC_STATUS_OK; -} - -mcobject_async_status_t mcobject_async_init(mcobject_async_t *mcobj_async, size_t chunk_len, size_t obj_size_by_one_chunk, size_t struct_size) -{ - mcobj_async->origin_size = obj_size_by_one_chunk; - mcobj_async->struct_size = struct_size; - mcobj_async->struct_size_sn = struct_size + sizeof(size_t); - - mcobj_async->chunks_pos_length = 0; - mcobj_async->chunks_pos_size = 128; - mcobj_async->chunks_size = chunk_len; - mcobj_async->chunks = (mcobject_async_chunk_t**)myhtml_calloc(mcobj_async->chunks_pos_size, sizeof(mcobject_async_chunk_t*)); - - if(mcobj_async->chunks == NULL) - return MCOBJECT_ASYNC_STATUS_CHUNK_ERROR_MEMORY_ALLOCATION; - - mcobject_async_chunk_up(mcobj_async); - - mcobj_async->chunk_cache_size = mcobj_async->chunks_size; - mcobj_async->chunk_cache = (mcobject_async_chunk_t**)myhtml_calloc(mcobj_async->chunk_cache_size, sizeof(mcobject_async_chunk_t*)); - - if(mcobj_async->chunk_cache == NULL) - return MCOBJECT_ASYNC_STATUS_CHUNK_CACHE_ERROR_MEMORY_ALLOCATION; - - mcobj_async->nodes_length = 0; - mcobj_async->nodes_size = 64; - mcobj_async->nodes = (mcobject_async_node_t*)myhtml_calloc(mcobj_async->nodes_size, sizeof(mcobject_async_node_t)); - - if(mcobj_async->nodes == NULL) - return MCOBJECT_ASYNC_STATUS_NODES_ERROR_MEMORY_ALLOCATION; - - mcobj_async->nodes_cache_length = 0; - mcobj_async->nodes_cache_size = mcobj_async->nodes_size; - mcobj_async->nodes_cache = (size_t*)myhtml_malloc(mcobj_async->nodes_cache_size * sizeof(size_t)); - - if(mcobj_async->nodes_cache == NULL) - return MCOBJECT_ASYNC_STATUS_NODES_ERROR_MEMORY_ALLOCATION; - - mcobject_async_clean(mcobj_async); - - mcobj_async->mcsync = mcsync_create(); - - return MCOBJECT_ASYNC_STATUS_OK; -} - -void mcobject_async_clean(mcobject_async_t *mcobj_async) -{ - if(mcobj_async->chunks[0] != NULL) - mcobj_async->chunks_pos_length = 1; - else - mcobj_async->chunks_pos_length = 0; - - mcobj_async->chunks_length = 0; - mcobj_async->chunk_cache_length = 0; - - size_t node_idx; - for (node_idx = 0; node_idx < mcobj_async->nodes_length; node_idx++) - { - mcobject_async_node_t *node = &mcobj_async->nodes[node_idx]; - node->cache_length = 0; - - if(node->chunk) { - node->chunk = mcobject_async_chunk_malloc(mcobj_async, mcobj_async->origin_size, NULL); - } - } -} - -mcobject_async_t * mcobject_async_destroy(mcobject_async_t *mcobj_async, int destroy_self) -{ - if(mcobj_async == NULL) - return NULL; - - if(mcobj_async->nodes) - { - for (size_t node_idx = 0; node_idx < mcobj_async->nodes_length; node_idx++) - { - mcobject_async_node_t *node = &mcobj_async->nodes[node_idx]; - - if(node->cache) - myhtml_free(node->cache); - } - - myhtml_free(mcobj_async->nodes); - } - - if(mcobj_async->nodes_cache) { - myhtml_free(mcobj_async->nodes_cache); - } - - if(mcobj_async->chunks) { - for (size_t pos_idx = 0; pos_idx < mcobj_async->chunks_pos_length; pos_idx++) { - if(mcobj_async->chunks[pos_idx]) - { - for (size_t idx = 0; idx < mcobj_async->chunks_size; idx++) { - if(mcobj_async->chunks[pos_idx][idx].begin) - myhtml_free(mcobj_async->chunks[pos_idx][idx].begin); - } - - myhtml_free(mcobj_async->chunks[pos_idx]); - } - } - - myhtml_free(mcobj_async->chunks); - } - - if(mcobj_async->chunk_cache) { - myhtml_free(mcobj_async->chunk_cache); - } - - mcobj_async->mcsync = mcsync_destroy(mcobj_async->mcsync, 1); - - memset(mcobj_async, 0, sizeof(mcobject_async_t)); - - if(destroy_self) - myhtml_free(mcobj_async); - else - return mcobj_async; - - return NULL; -} - -mcobject_async_status_t mcobject_async_mem_malloc(mcobject_async_t *mcobj_async, mcobject_async_chunk_t *chunk, size_t length) -{ - if(chunk->begin) { - if(length > chunk->size) { - myhtml_free(chunk->begin); - - chunk->size = length + mcobj_async->origin_size; - chunk->begin = (unsigned char*)myhtml_malloc(chunk->size * mcobj_async->struct_size_sn); - } - } - else { - chunk->size = mcobj_async->origin_size; - - if(length > chunk->size) - chunk->size += length; - - chunk->begin = (unsigned char*)myhtml_malloc(chunk->size * mcobj_async->struct_size_sn); - } - - chunk->length = 0; - - if(chunk->begin == NULL) - return MCOBJECT_ASYNC_STATUS_CHUNK_ERROR_MEMORY_ALLOCATION; - - return MCOBJECT_ASYNC_STATUS_OK; -} - -mcobject_async_chunk_t * mcobject_async_chunk_malloc_without_lock(mcobject_async_t *mcobj_async, size_t length, mcobject_async_status_t *status) -{ - if(status) - *status = MCOBJECT_ASYNC_STATUS_OK; - - if(mcobj_async->chunk_cache_length) - { - mcobj_async->chunk_cache_length--; - - mcobj_async->chunk_cache[ mcobj_async->chunk_cache_length ]->length = 0; - mcobj_async->chunk_cache[ mcobj_async->chunk_cache_length ]->next = NULL; - mcobj_async->chunk_cache[ mcobj_async->chunk_cache_length ]->prev = NULL; - - return mcobj_async->chunk_cache[ mcobj_async->chunk_cache_length ]; - } - - if(mcobj_async->chunks_length >= mcobj_async->chunks_size) - { - if(mcobj_async->chunks_pos_length >= mcobj_async->chunks_pos_size) - { - size_t tmp_pos_size = mcobj_async->chunks_pos_size << 1; - mcobject_async_chunk_t **tmp_pos = myhtml_realloc(mcobj_async->chunks, - sizeof(mcobject_async_chunk_t*) * tmp_pos_size); - - if(tmp_pos) - { - memset(&tmp_pos[mcobj_async->chunks_pos_length], 0, (tmp_pos_size - mcobj_async->chunks_pos_length) - * sizeof(mcobject_async_chunk_t*)); - - mcobj_async->chunks_pos_size = tmp_pos_size; - mcobj_async->chunks = tmp_pos; - } - else { - if(status) - *status = MCOBJECT_ASYNC_STATUS_CHUNK_ERROR_MEMORY_ALLOCATION; - - return NULL; - } - } - - if(mcobject_async_chunk_up(mcobj_async)) { - if(status) - *status = MCOBJECT_ASYNC_STATUS_CHUNK_ERROR_MEMORY_ALLOCATION; - - return NULL; - } - - mcobj_async->chunks_pos_length++; - } - - mcobject_async_chunk_t* chunk = &mcobj_async->chunks[mcobj_async->chunks_pos_length - 1][mcobj_async->chunks_length]; - mcobj_async->chunks_length++; - - chunk->next = NULL; - chunk->prev = NULL; - - if(status) - *status = mcobject_async_mem_malloc(mcobj_async, chunk, length); - else - mcobject_async_mem_malloc(mcobj_async, chunk, length); - - return chunk; -} - -mcobject_async_chunk_t * mcobject_async_chunk_malloc(mcobject_async_t *mcobj_async, size_t length, mcobject_async_status_t *status) -{ - mcsync_lock(mcobj_async->mcsync); - mcobject_async_chunk_t* chunk = mcobject_async_chunk_malloc_without_lock(mcobj_async, length, status); - mcsync_unlock(mcobj_async->mcsync); - - return chunk; -} - -size_t mcobject_async_node_add(mcobject_async_t *mcobj_async, mcobject_async_status_t *status) -{ - mcsync_lock(mcobj_async->mcsync); - - if(status) - *status = MCOBJECT_ASYNC_STATUS_OK; - - size_t node_idx; - - if(mcobj_async->nodes_cache_length) { - mcobj_async->nodes_cache_length--; - - node_idx = mcobj_async->nodes_cache[ mcobj_async->nodes_cache_length ]; - } - else { - if(mcobj_async->nodes_length >= mcobj_async->nodes_size) { - mcsync_unlock(mcobj_async->mcsync); - return 0; - } - - node_idx = mcobj_async->nodes_length; - mcobj_async->nodes_length++; - } - - mcobject_async_node_t *node = &mcobj_async->nodes[node_idx]; - - node->chunk = mcobject_async_chunk_malloc_without_lock(mcobj_async, mcobj_async->origin_size, status); - - if(status && *status) { - mcsync_unlock(mcobj_async->mcsync); - return 0; - } - - node->chunk->next = NULL; - node->chunk->prev = NULL; - - node->cache_length = 0; - node->cache_size = mcobj_async->origin_size; - node->cache = (void**)myhtml_malloc(sizeof(void*) * node->cache_size); - - if(node->cache == NULL) { - if(status) - *status = MCOBJECT_ASYNC_STATUS_CHUNK_CACHE_ERROR_MEMORY_ALLOCATION; - - mcsync_unlock(mcobj_async->mcsync); - return 0; - } - - mcsync_unlock(mcobj_async->mcsync); - - return node_idx; -} - -void mcobject_async_node_clean(mcobject_async_t *mcobj_async, size_t node_idx) -{ - if(mcobj_async->nodes_length <= node_idx) - return; - - mcobject_async_node_t *node = &mcobj_async->nodes[node_idx]; - node->cache_length = 0; - - if(node->chunk == NULL) - return; - - while (node->chunk->prev) - node->chunk = node->chunk->prev; - - node->chunk->length = 0; - node->cache_length = 0; -} - -void mcobject_async_node_all_clean(mcobject_async_t *mcobj_async) -{ - for (size_t node_idx = 0; node_idx < mcobj_async->nodes_length; node_idx++) { - mcobject_async_node_clean(mcobj_async, node_idx); - } -} - -void mcobject_async_node_delete(mcobject_async_t *mcobj_async, size_t node_idx) -{ - mcsync_lock(mcobj_async->mcsync); - - if(mcobj_async->nodes_length <= node_idx) { - mcsync_unlock(mcobj_async->mcsync); - return; - } - - mcobject_async_node_t *node = &mcobj_async->nodes[node_idx]; - mcobject_async_chunk_t *chunk = node->chunk; - - while (chunk->next) - chunk = chunk->next; - - while (chunk) - { - if(mcobj_async->chunk_cache_length >= mcobj_async->chunk_cache_size) { - size_t new_size = mcobj_async->chunk_cache_size << 1; - - mcobject_async_chunk_t **tmp = (mcobject_async_chunk_t**)myhtml_realloc(mcobj_async->chunk_cache, - sizeof(mcobject_async_chunk_t*) * new_size); - - if(tmp) { - mcobj_async->chunk_cache_size = new_size; - mcobj_async->chunk_cache = tmp; - } - else { - // TODO: add return status - mcsync_unlock(mcobj_async->mcsync); - return; - } - } - - mcobj_async->chunk_cache[ mcobj_async->chunk_cache_length ] = chunk; - mcobj_async->chunk_cache_length++; - - chunk = chunk->prev; - } - - if(node->cache) - myhtml_free(node->cache); - - memset(node, 0, sizeof(mcobject_async_node_t)); - - if(mcobj_async->nodes_cache_length >= mcobj_async->nodes_cache_size) { - size_t new_size = mcobj_async->nodes_cache_size << 1; - - size_t *tmp = (size_t*)myhtml_realloc(mcobj_async->nodes_cache, sizeof(size_t) * mcobj_async->nodes_cache_size); - - if(tmp) { - mcobj_async->nodes_cache = tmp; - mcobj_async->nodes_cache_size = new_size; - } - } - - mcobj_async->nodes_cache[ mcobj_async->nodes_cache_length ] = node_idx; - mcobj_async->nodes_cache_length++; - - mcsync_unlock(mcobj_async->mcsync); -} - -void * mcobject_async_malloc(mcobject_async_t *mcobj_async, size_t node_idx, mcobject_async_status_t *status) -{ - mcobject_async_node_t *node = &mcobj_async->nodes[node_idx]; - - if(node->cache_length) { - if(status) - *status = MCOBJECT_ASYNC_STATUS_OK; - - node->cache_length--; - return node->cache[ node->cache_length ]; - } - - if(node->chunk->length >= node->chunk->size) - { - if(node->chunk->next) { - node->chunk = node->chunk->next; - node->chunk->length = 0; - } - else { - mcobject_async_status_t mystatus; - mcobject_async_chunk_t *chunk = mcobject_async_chunk_malloc(mcobj_async, mcobj_async->origin_size, &mystatus); - - if(mystatus) { - if(status) - *status = mystatus; - - return NULL; - } - - chunk->prev = node->chunk; - node->chunk->next = chunk; - - node->chunk = chunk; - } - } - - if(status) - *status = MCOBJECT_ASYNC_STATUS_OK; - - size_t offset = node->chunk->length * mcobj_async->struct_size_sn; - *((size_t*)(&node->chunk->begin[offset])) = node_idx; - - node->chunk->length++; - return &node->chunk->begin[(offset + sizeof(size_t))]; -} - -mcobject_async_status_t mcobject_async_free(mcobject_async_t *mcobj_async, void *entry) -{ - size_t node_idx = *((size_t*)((unsigned char*)entry - sizeof(size_t))); - - if(node_idx >= mcobj_async->nodes_length) - return MCOBJECT_ASYNC_STATUS_NODES_ERROR_BAD_NODE_ID; - - mcobject_async_node_t *node = &mcobj_async->nodes[node_idx]; - - if(node->cache_length >= node->cache_size) { - size_t new_size = node->cache_size << 1; - - void **tmp = (void**)myhtml_realloc(node->cache, sizeof(void*) * new_size); - - if(tmp) { - node->cache = tmp; - node->cache_size = new_size; - } - else - return MCOBJECT_ASYNC_STATUS_CACHE_ERROR_MEMORY_REALLOC; - } - - node->cache[ node->cache_length ] = entry; - node->cache_length++; - - return MCOBJECT_ASYNC_STATUS_OK; -} - - diff --git a/source/myhtml/utils/mcobject_async.h b/source/myhtml/utils/mcobject_async.h deleted file mode 100644 index 8ee1c78..0000000 --- a/source/myhtml/utils/mcobject_async.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_UTILS_MCOBJECT_ASYNC_H -#define MyHTML_UTILS_MCOBJECT_ASYNC_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "myhtml/myosi.h" -#include "myhtml/utils/mcsync.h" - -enum mcobject_async_status { - MCOBJECT_ASYNC_STATUS_OK = 0, - MCOBJECT_ASYNC_STATUS_ERROR_MEMORY_ALLOCATION = 1, - MCOBJECT_ASYNC_STATUS_CHUNK_ERROR_MEMORY_ALLOCATION = 2, - MCOBJECT_ASYNC_STATUS_CHUNK_CACHE_ERROR_MEMORY_ALLOCATION = 3, - MCOBJECT_ASYNC_STATUS_NODES_ERROR_MEMORY_ALLOCATION = 4, - MCOBJECT_ASYNC_STATUS_NODES_ERROR_BAD_NODE_ID = 5, - MCOBJECT_ASYNC_STATUS_CACHE_ERROR_MEMORY_REALLOC = 6 -} -typedef mcobject_async_status_t; - -struct mcobject_async_chunk { - unsigned char *begin; - size_t length; - size_t size; - - struct mcobject_async_chunk *next; - struct mcobject_async_chunk *prev; -} -typedef mcobject_async_chunk_t; - -struct mcobject_async_node { - mcobject_async_chunk_t *chunk; - - void **cache; - size_t cache_size; - size_t cache_length; -} -typedef mcobject_async_node_t; - -struct mcobject_async { - size_t origin_size; - size_t struct_size; - size_t struct_size_sn; - - mcobject_async_chunk_t **chunk_cache; - size_t chunk_cache_size; - size_t chunk_cache_length; - - mcobject_async_chunk_t **chunks; - size_t chunks_pos_size; - size_t chunks_pos_length; - size_t chunks_size; - size_t chunks_length; - - mcobject_async_node_t *nodes; - size_t nodes_length; - size_t nodes_size; - - size_t *nodes_cache; - size_t nodes_cache_length; - size_t nodes_cache_size; - - mcsync_t *mcsync; -} -typedef mcobject_async_t; - -mcobject_async_t * mcobject_async_create(void); -mcobject_async_status_t mcobject_async_init(mcobject_async_t *mcobj_async, size_t chunk_len, size_t obj_size_by_one_chunk, size_t struct_size); - -void mcobject_async_clean(mcobject_async_t *mcobj_async); -mcobject_async_t * mcobject_async_destroy(mcobject_async_t *mcobj_async, int destroy_self); - -size_t mcobject_async_node_add(mcobject_async_t *mcobj_async, mcobject_async_status_t *status); -void mcobject_async_node_clean(mcobject_async_t *mcobj_async, size_t node_idx); -void mcobject_async_node_all_clean(mcobject_async_t *mcobj_async); -void mcobject_async_node_delete(mcobject_async_t *mcobj_async, size_t node_idx); - -void * mcobject_async_malloc(mcobject_async_t *mcobj_async, size_t node_idx, mcobject_async_status_t *status); -mcobject_async_status_t mcobject_async_free(mcobject_async_t *mcobj_async, void *entry); - -mcobject_async_chunk_t * mcobject_async_chunk_malloc(mcobject_async_t *mcobj_async, size_t length, mcobject_async_status_t *status); -mcobject_async_chunk_t * mcobject_async_chunk_malloc_without_lock(mcobject_async_t *mcobj_async, size_t length, mcobject_async_status_t *status); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* mcobject_async_h */ - - - - diff --git a/source/myhtml/utils/mcsimple.c b/source/myhtml/utils/mcsimple.c deleted file mode 100644 index 8103b21..0000000 --- a/source/myhtml/utils/mcsimple.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) - */ - -#include "myhtml/utils/mcsimple.h" - -mcsimple_t * mcsimple_create(void) -{ - return myhtml_calloc(1, sizeof(mcsimple_t)); -} - -void mcsimple_init(mcsimple_t *mcsimple, size_t pos_size, size_t list_size, size_t struct_size) -{ - mcsimple->struct_size = struct_size; - - mcsimple->list_pos_length_used = 0; - mcsimple->list_pos_length = 0; - mcsimple->list_pos_size = pos_size; - mcsimple->list = (uint8_t**)myhtml_calloc(pos_size, sizeof(uint8_t*)); - - if(mcsimple->list == NULL) { - return; - } - - mcsimple->list_size = list_size * struct_size; - - if((mcsimple_init_list_entries(mcsimple, mcsimple->list_pos_length) == NULL)) { - return; - } -} - -void mcsimple_clean(mcsimple_t *mcsimple) -{ - mcsimple->list_length = 0; - mcsimple->list_pos_length = 0; -} - -mcsimple_t * mcsimple_destroy(mcsimple_t *mcsimple, bool destroy_self) -{ - if(mcsimple == NULL) - return NULL; - - if(mcsimple->list) { - for(size_t i = 0; i < mcsimple->list_pos_length_used; i++) { - if(mcsimple->list[i]) - free(mcsimple->list[i]); - } - - free(mcsimple->list); - } - - if(destroy_self) { - free(mcsimple); - return NULL; - } - - return mcsimple; -} - -uint8_t * mcsimple_init_list_entries(mcsimple_t *mcsimple, size_t pos) -{ - if(mcsimple->list_pos_length >= mcsimple->list_pos_size) - { - size_t new_size = mcsimple->list_pos_size + 128; - uint8_t **list = (uint8_t**)myhtml_realloc(mcsimple->list, mcsimple->list_pos_size * sizeof(uint8_t*)); - - if(list) { - mcsimple->list = list; - memset(&mcsimple->list[pos], 0, (new_size - mcsimple->list_pos_size) * sizeof(uint8_t*)); - - mcsimple->list_pos_size = new_size; - } - else - return NULL; - } - - mcsimple->list_length = 0; - - if(mcsimple->list[pos] == NULL) { - mcsimple->list_pos_length_used++; - mcsimple->list[pos] = (uint8_t*)myhtml_malloc(mcsimple->list_size * sizeof(uint8_t)); - } - - return mcsimple->list[pos]; -} - -void * mcsimple_malloc(mcsimple_t *mcsimple) -{ - if(mcsimple->list_length >= mcsimple->list_size) - { - mcsimple->list_pos_length++; - if((mcsimple_init_list_entries(mcsimple, mcsimple->list_pos_length) == NULL)) { - return NULL; - } - } - - size_t current = mcsimple->list_length; - mcsimple->list_length += mcsimple->struct_size; - - return &mcsimple->list[mcsimple->list_pos_length][current]; -} - -void * mcsimple_get_by_absolute_position(mcsimple_t *mcsimple, size_t pos) -{ - pos *= mcsimple->struct_size; - return &mcsimple->list[ (pos / mcsimple->list_size) ][ (pos % mcsimple->list_size) ]; -} - - diff --git a/source/myhtml/utils/mcsimple.h b/source/myhtml/utils/mcsimple.h deleted file mode 100644 index be972d9..0000000 --- a/source/myhtml/utils/mcsimple.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_UTILS_MCSIMPLE_H -#define MyHTML_UTILS_MCSIMPLE_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "myhtml/myosi.h" - -struct mcsimple { - size_t struct_size; - - uint8_t **list; - - size_t list_pos_size; - size_t list_pos_length; - size_t list_pos_length_used; - size_t list_size; - size_t list_length; -} -typedef mcsimple_t; - - -mcsimple_t * mcsimple_create(void); -void mcsimple_init(mcsimple_t *mcsimple, size_t pos_size, size_t list_size, size_t struct_size); -void mcsimple_clean(mcsimple_t *mcsimple); -mcsimple_t * mcsimple_destroy(mcsimple_t *mcsimple, bool destroy_self); - -uint8_t * mcsimple_init_list_entries(mcsimple_t *mcsimple, size_t pos); - -void * mcsimple_malloc(mcsimple_t *mcsimple); -void * mcsimple_get_by_absolute_position(mcsimple_t *mcsimple, size_t pos); - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* MyHTML_UTILS_MCSIMPLE_H */ - diff --git a/source/myhtml/utils/mcsync.c b/source/myhtml/utils/mcsync.c deleted file mode 100644 index 58b263b..0000000 --- a/source/myhtml/utils/mcsync.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/utils/mcsync.h" - -#if !defined(MyHTML_BUILD_WITHOUT_THREADS) && ((defined(__GNUC__) && __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) || \ - defined(__ATOMIC_SEQ_CST)) -#define MyHTML_FORCE_SPINLOCK -#endif - -#if defined(MyHTML_FORCE_SPINLOCK) -static int mcsync_atomic_compare_exchange(int* ptr, int compare, int exchange) -{ - return __atomic_compare_exchange_n(ptr, &compare, exchange, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); -} - -static void mcsync_atomic_store(int* ptr, int value) -{ - __atomic_store_n(ptr, 0, __ATOMIC_SEQ_CST); -} -#endif - -#if !defined(MyHTML_BUILD_WITHOUT_THREADS) && defined(IS_OS_WINDOWS) -static int pthread_mutex_lock(pthread_mutex_t *mutex) -{ - EnterCriticalSection(mutex); - return 0; -} - -static int pthread_mutex_unlock(pthread_mutex_t *mutex) -{ - LeaveCriticalSection(mutex); - return 0; -} - -static int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr) -{ - (void)attr; - InitializeCriticalSection(mutex); - return 0; -} - -static int pthread_mutex_destroy(pthread_mutex_t *mutex) -{ - DeleteCriticalSection(mutex); - return 0; -} -#endif - -mcsync_t * mcsync_create(void) -{ - return calloc(1, sizeof(mcsync_t)); -} - -mcsync_status_t mcsync_init(mcsync_t* mcsync) -{ - mcsync_clean(mcsync); - return MCSYNC_STATUS_OK; -} - -mcsync_t * mcsync_destroy(mcsync_t* mcsync, int destroy_self) -{ - if(mcsync == NULL) - return NULL; - -#if !defined(MyHTML_BUILD_WITHOUT_THREADS) && !defined(MyHTML_FORCE_SPINLOCK) - if(mcsync->mutex) { - pthread_mutex_destroy(mcsync->mutex); - myhtml_free(mcsync->mutex); - } -#endif - - if(destroy_self) - myhtml_free(mcsync); - - return NULL; -} - -void mcsync_clean(mcsync_t* mcsync) -{ - mcsync->spinlock = 0; -} - -mcsync_status_t mcsync_lock(mcsync_t* mcsync) -{ -#if defined(MyHTML_FORCE_SPINLOCK) - while (!mcsync_atomic_compare_exchange(&mcsync->spinlock, 0, 1)) {} -#elif !defined(MyHTML_BUILD_WITHOUT_THREADS) - mcsync_mutex_lock(mcsync); -#endif - - return MCSYNC_STATUS_OK; -} - -mcsync_status_t mcsync_unlock(mcsync_t* mcsync) -{ -#if defined(MyHTML_FORCE_SPINLOCK) - mcsync_atomic_store(&mcsync->spinlock, 0); -#elif !defined(MyHTML_BUILD_WITHOUT_THREADS) - mcsync_mutex_unlock(mcsync); -#endif - - return MCSYNC_STATUS_OK; -} - -mcsync_status_t mcsync_mutex_lock(mcsync_t* mcsync) -{ -#if !defined(MyHTML_BUILD_WITHOUT_THREADS) && !defined(MyHTML_FORCE_SPINLOCK) - if(mcsync->mutex == NULL) { - mcsync->mutex = (pthread_mutex_t*)myhtml_malloc(sizeof(pthread_mutex_t)); - - if(mcsync->mutex == NULL) - return MCSYNC_STATUS_ERROR_MEM_ALLOCATE; - - pthread_mutex_init(mcsync->mutex, NULL); - } - - if(pthread_mutex_lock(mcsync->mutex) == 0) - return MCSYNC_STATUS_OK; - else - return MCSYNC_STATUS_NOT_OK; -#else - return MCSYNC_STATUS_NOT_OK; -#endif -} - -mcsync_status_t mcsync_mutex_unlock(mcsync_t* mcsync) -{ -#if !defined(MyHTML_BUILD_WITHOUT_THREADS) && !defined(MyHTML_FORCE_SPINLOCK) - if(pthread_mutex_unlock(mcsync->mutex) == 0) - return MCSYNC_STATUS_OK; - else - return MCSYNC_STATUS_NOT_OK; -#else - return MCSYNC_STATUS_NOT_OK; -#endif -} - diff --git a/source/myhtml/utils/mcsync.h b/source/myhtml/utils/mcsync.h deleted file mode 100644 index 671aa1a..0000000 --- a/source/myhtml/utils/mcsync.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_UTILS_MCSYNC_H -#define MyHTML_UTILS_MCSYNC_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -#include "myhtml/myosi.h" - -#if !defined(MyHTML_BUILD_WITHOUT_THREADS) -#if defined(IS_OS_WINDOWS) - typedef CRITICAL_SECTION pthread_mutex_t; - typedef unsigned long pthread_mutexattr_t; -#else -# include -#endif -#endif - -enum mcsync_status { - MCSYNC_STATUS_OK = 0, - MCSYNC_STATUS_NOT_OK = 1, - MCSYNC_STATUS_ERROR_MEM_ALLOCATE = 2 -} -typedef mcsync_status_t; - -struct mcsync { - int spinlock; -#if !defined(MyHTML_BUILD_WITHOUT_THREADS) - pthread_mutex_t *mutex; -#endif -} -typedef mcsync_t; - -mcsync_t * mcsync_create(void); -mcsync_status_t mcsync_init(mcsync_t* mcsync); -void mcsync_clean(mcsync_t* mcsync); -mcsync_t * mcsync_destroy(mcsync_t* mcsync, int destroy_self); - -mcsync_status_t mcsync_lock(mcsync_t* mclock); -mcsync_status_t mcsync_unlock(mcsync_t* mclock); - -mcsync_status_t mcsync_mutex_lock(mcsync_t* mclock); -mcsync_status_t mcsync_mutex_unlock(mcsync_t* mclock); - -#if !defined(MyHTML_BUILD_WITHOUT_THREADS) && defined(IS_OS_WINDOWS) - static int pthread_mutex_lock(pthread_mutex_t *mutex); - static int pthread_mutex_unlock(pthread_mutex_t *mutex); - static int pthread_mutex_init(pthread_mutex_t *m, pthread_mutexattr_t *a); - static int pthread_mutex_destroy(pthread_mutex_t *m); -#endif - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* mcsync_h */ diff --git a/source/myhtml/utils/mctree.c b/source/myhtml/utils/mctree.c deleted file mode 100644 index 526560c..0000000 --- a/source/myhtml/utils/mctree.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/utils/resources.h" -#include "myhtml/utils/mctree.h" - - -mctree_t * mctree_create(size_t start_size) -{ - mctree_t* mctree = (mctree_t*)myhtml_malloc(sizeof(mctree_t)); - - mctree->nodes_size = start_size + 512; - mctree->nodes_length = start_size + 1; - mctree->nodes = (mctree_node_t*)myhtml_calloc(mctree->nodes_size, sizeof(mctree_node_t)); - - mctree->start_size = start_size; - - return mctree; -} - -void mctree_clean(mctree_t* mctree) -{ - mctree->nodes_length = mctree->start_size + 1; - memset(mctree->nodes, 0, sizeof(mctree_node_t) * mctree->nodes_size); -} - -mctree_t * mctree_destroy(mctree_t* mctree) -{ - if(mctree == NULL) - return NULL; - - if(mctree->nodes) - myhtml_free(mctree->nodes); - - myhtml_free(mctree); - - return NULL; -} - -mctree_index_t __mtree_search_lowercase_to_start(mctree_t* mctree, mctree_index_t idx, const char* key, size_t key_size) -{ - mctree_node_t* nodes = mctree->nodes; - - while (idx) - { - if(key_size == nodes[idx].str_size) { - if(myhtml_strncasecmp(key, nodes[idx].str, key_size) == 0) { - return idx; - } - - idx = nodes[idx].child; - } - else if(key_size > nodes[idx].str_size) - { - if(key_size < nodes[ nodes[idx].next ].str_size) { - return 0; - } - - idx = nodes[idx].next; - } - else { - if(key_size > nodes[ nodes[idx].prev ].str_size) { - return 0; - } - - idx = nodes[idx].prev; - } - } - - return 0; -} - -mctree_index_t __mtree_search_to_start(mctree_t* mctree, mctree_index_t idx, const char* key, size_t key_size) -{ - mctree_node_t* nodes = mctree->nodes; - - while (idx) - { - if(key_size == nodes[idx].str_size) { - if(memcmp((const void *)key, (const void *)(nodes[idx].str), key_size) == 0) { - return idx; - } - - idx = nodes[idx].child; - } - else if(key_size > nodes[idx].str_size) - { - if(key_size < nodes[ nodes[idx].next ].str_size) { - return 0; - } - - idx = nodes[idx].next; - } - else { - if(key_size > nodes[ nodes[idx].prev ].str_size) { - return 0; - } - - idx = nodes[idx].prev; - } - } - - return 0; -} - -mctree_index_t mctree_insert_child(mctree_t* mctree, mctree_index_t idx, const char* key, size_t key_size, void* value) -{ - mctree_node_t* nodes = mctree->nodes; - mctree_index_t new_idx = mctree_node_get_free_id(mctree); - - nodes[idx].child = new_idx; - - nodes[new_idx].str = key; - nodes[new_idx].str_size = key_size; - nodes[new_idx].value = value; - - mctree_node_add(mctree); - - return new_idx; -} - -mctree_index_t mctree_insert_after(mctree_t* mctree, mctree_index_t idx, const char* key, size_t key_size, void* value) -{ - mctree_node_t* nodes = mctree->nodes; - mctree_index_t new_idx = mctree_node_get_free_id(mctree); - - if(nodes[idx].next) { - nodes[ nodes[idx].next ].prev = new_idx; - nodes[new_idx].next = nodes[idx].next; - } - - nodes[idx].next = new_idx; - nodes[new_idx].prev = idx; - - nodes[new_idx].str = key; - nodes[new_idx].str_size = key_size; - nodes[new_idx].value = value; - - mctree_node_add(mctree); - - return new_idx; -} - -mctree_index_t mctree_insert_before(mctree_t* mctree, mctree_index_t idx, const char* key, size_t key_size, void* value) -{ - mctree_node_t* nodes = mctree->nodes; - mctree_index_t new_idx = mctree_node_get_free_id(mctree); - - if(nodes[idx].prev) { - nodes[ nodes[idx].prev ].next = new_idx; - nodes[new_idx].prev = nodes[idx].prev; - } - - nodes[idx].prev = new_idx; - nodes[new_idx].next = idx; - - nodes[new_idx].str = key; - nodes[new_idx].str_size = key_size; - nodes[new_idx].value = value; - - mctree_node_add(mctree); - - return new_idx; -} - -mctree_index_t __mtree_insert_to_start(mctree_t* mctree, mctree_index_t idx, const char* key, size_t key_size, void* value, mctree_before_insert_f b_insert) -{ - mctree_node_t* nodes = mctree->nodes; - - while (idx) - { - if(key_size == nodes[idx].str_size) { - if(memcmp((const void *)key, (const void *)nodes[idx].str, key_size) == 0) - { - if(value) - nodes[idx].value = value; - - return idx; - } - - if(nodes[idx].child == 0) { - if(b_insert) - b_insert(key, key_size, &value); - - return mctree_insert_child(mctree, idx, key, key_size, value); - } - - idx = nodes[idx].child; - } - else if(key_size > nodes[idx].str_size) - { - if(nodes[idx].next == 0 || key_size < nodes[ nodes[idx].next ].str_size) { - if(b_insert) - b_insert(key, key_size, &value); - - return mctree_insert_after(mctree, idx, key, key_size, value); - } - - idx = nodes[idx].next; - } - else { - if(nodes[idx].prev == 0 || key_size > nodes[ nodes[idx].prev ].str_size) { - if(b_insert) - b_insert(key, key_size, &value); - - return mctree_insert_before(mctree, idx, key, key_size, value); - } - - idx = nodes[idx].prev; - } - } - - return 0; -} - -mctree_index_t mctree_insert(mctree_t* mctree, const char* key, size_t key_size, void* value, mctree_before_insert_f b_insert) -{ - mctree_node_t* start = mctree->nodes; - - if(key_size > 0) { - mctree_index_t idx = mctree_make_first_idx(mctree, key, key_size); - - if(start[idx].child) { - return __mtree_insert_to_start(mctree, start[idx].child, key, key_size, value, b_insert); - } - else { - if(b_insert) - b_insert(key, key_size, &value); - - return mctree_insert_child(mctree, idx, key, key_size, value); - } - } - - return 0; -} - -mctree_index_t mctree_search(mctree_t* mctree, const char* key, size_t key_size) -{ - mctree_node_t* start = mctree->nodes; - - if(key_size > 0) { - mctree_index_t idx = mctree_make_first_idx(mctree, key, key_size); - - if(start[idx].child) { - return __mtree_search_to_start(mctree, start[idx].child, key, key_size); - } - } - - return 0; -} - -mctree_index_t mctree_search_lowercase(mctree_t* mctree, const char* key, size_t key_size) -{ - mctree_node_t* start = mctree->nodes; - - if(key_size > 0) { - mctree_index_t idx = mctree_make_first_idx(mctree, key, key_size); - - if(start[idx].child) { - return __mtree_search_lowercase_to_start(mctree, start[idx].child, key, key_size); - } - } - - return 0; -} - - - diff --git a/source/myhtml/utils/mctree.h b/source/myhtml/utils/mctree.h deleted file mode 100644 index bd3054f..0000000 --- a/source/myhtml/utils/mctree.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_UTILS_MCTREE_H -#define MyHTML_UTILS_MCTREE_H -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include "myhtml/myosi.h" -#include "myhtml/utils.h" - -#include -#include -#include - -#define mctree_node_get_free_id(mctree) mctree->nodes_length - -#define mctree_node_clean(mctree, idx) \ - mctree->nodes[idx].str = NULL; \ - mctree->nodes[idx].value = NULL; \ - mctree->nodes[idx].child_count = 0; \ - mctree->nodes[idx].prev = 0; \ - mctree->nodes[idx].next = 0; \ - mctree->nodes[idx].child = 0 - -#define mctree_node_add(mctree) \ - mctree->nodes_length++; \ - if(mctree->nodes_length >= mctree->nodes_size) { \ - mctree->nodes_size = mctree->nodes_length + 4096; \ - mctree->nodes = (mctree_node_t*)myhtml_realloc(mctree->nodes, \ - sizeof(mctree_node_t) * mctree->nodes_size); \ - } \ - mctree_node_clean(mctree, mctree->nodes_length) - -#define mctree_make_first_idx(mctree, key, size) \ - ((myhtml_string_chars_lowercase_map[ (const unsigned char)(key[0]) ] + myhtml_string_chars_lowercase_map[ (const unsigned char)(key[size - 1]) ]) % mctree->start_size) + 1 - - -typedef size_t mctree_index_t; - -struct mctree_node { - const char* str; - size_t str_size; - void* value; - - size_t child_count; - mctree_index_t prev; - mctree_index_t next; - mctree_index_t child; -} -typedef mctree_node_t; - -struct mctree_tree { - mctree_node_t* nodes; - size_t nodes_length; - size_t nodes_size; - - size_t start_size; -} -typedef mctree_t; - -typedef void (*mctree_before_insert_f)(const char* key, size_t key_size, void **value); - - -mctree_t * mctree_create(size_t start_size); -void mctree_clean(mctree_t* mctree); -mctree_t * mctree_destroy(mctree_t* mctree); - -mctree_index_t mctree_insert(mctree_t* mctree, const char* key, size_t key_size, void* value, mctree_before_insert_f b_insert); -mctree_index_t mctree_search(mctree_t* mctree, const char* key, size_t key_size); -mctree_index_t mctree_search_lowercase(mctree_t* mctree, const char* key, size_t key_size); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* defined(__mhtml__mtree__) */ diff --git a/source/myhtml/utils/mhash.c b/source/myhtml/utils/mhash.c deleted file mode 100644 index 1929f26..0000000 --- a/source/myhtml/utils/mhash.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - Copyright (C) 2017 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin avl_treet, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#include "myhtml/utils/mhash.h" - -size_t myhtml_utils_mhash_hash(const char* key, size_t key_size, size_t table_size) -{ - size_t hash, i; - - for(hash = i = 0; i < key_size; i++) - { - hash += key[i]; - hash += (hash << 10); - hash ^= (hash >> 6); - } - - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - - return hash % table_size; -} - -myhtml_utils_mhash_t * myhtml_utils_mhash_create(void) -{ - return myhtml_calloc(1, sizeof(myhtml_utils_mhash_t)); -}; - -myhtml_status_t myhtml_utils_mhash_init(myhtml_utils_mhash_t* mhash, size_t table_size, size_t max_depth) -{ - mhash->mchar_obj = mchar_async_create(128, 4096); - if(mhash->mchar_obj == NULL) - return MyHTML_STATUS_ATTR_ERROR_ALLOCATION; - - mhash->mchar_node = mchar_async_node_add(mhash->mchar_obj); - - if(table_size < 128) - table_size = 128; - - mhash->table = myhtml_calloc(table_size, sizeof(myhtml_utils_mhash_entry_t*)); - if(mhash->table == NULL) - return MyHTML_STATUS_ATTR_ERROR_ALLOCATION; - - if(max_depth < 1) - max_depth = 1; - - mhash->table_max_depth = max_depth; - mhash->table_size = table_size; - - return MyHTML_STATUS_OK; -}; - -void myhtml_utils_mhash_clean(myhtml_utils_mhash_t* mhash) -{ - mchar_async_clean(mhash->mchar_obj); - memset(mhash->table, 0, (sizeof(myhtml_utils_mhash_entry_t*) * mhash->table_size)); -} - -myhtml_utils_mhash_t * myhtml_utils_mhash_destroy(myhtml_utils_mhash_t* mhash, bool self_destroy) -{ - if(mhash == NULL) - return NULL; - - if(mhash->table) { - myhtml_free(mhash->table); - mhash->table = NULL; - } - - if(self_destroy) { - myhtml_free(mhash->table); - return NULL; - } - - return mhash; -} - -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_create_entry(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value) -{ - myhtml_utils_mhash_entry_t *entry = (myhtml_utils_mhash_entry_t*) - mchar_async_malloc(mhash->mchar_obj, mhash->mchar_node, sizeof(myhtml_utils_mhash_entry_t)); - - entry->key = mchar_async_malloc(mhash->mchar_obj, mhash->mchar_node, (sizeof(char) * key_size) + 1); - - if(entry->key == NULL) { - mchar_async_free(mhash->mchar_obj, mhash->mchar_node, (char*)entry); - return NULL; - } - - memcpy(entry->key, key, (sizeof(char) * key_size)); - entry->key[key_size] = '\0'; - - entry->key_length = key_size; - entry->value = value; - entry->next = NULL; - - return entry; -} - -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_add_with_choice(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size) -{ - if(key == NULL || key_size == 0) - return NULL; - - size_t hash_id = myhtml_utils_mhash_hash(key, key_size, mhash->table_size); - - - myhtml_utils_mhash_entry_t *entry; - - if(mhash->table[hash_id] == NULL) { - /* rebuild table if need */ - if(mhash->table_length >= (mhash->table_size - (mhash->table_size / 4))) { - myhtml_utils_mhash_rebuld(mhash); - } - - mhash->table[hash_id] = myhtml_utils_mhash_create_entry(mhash, key, key_size, NULL); - return mhash->table[hash_id]; - } - - size_t depth = 0; - entry = mhash->table[hash_id]; - - do { - if(entry->key_length == key_size) { - if(strncmp(entry->key, key, key_size) == 0) - return entry; - } - - if(entry->next == NULL) { - entry->next = myhtml_utils_mhash_create_entry(mhash, key, key_size, NULL); - - if(depth > mhash->table_max_depth) { - myhtml_utils_mhash_entry_t *entry_new = entry->next; - myhtml_utils_mhash_rebuld(mhash); - - return entry_new; - } - - return entry->next; - } - - depth++; - entry = entry->next; - } - while(1); -} - -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_add(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value) -{ - myhtml_utils_mhash_entry_t *entry = myhtml_utils_mhash_add_with_choice(mhash, key, key_size); - - if(entry) - entry->value = value; - - return entry; -} - -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_search(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value) -{ - if(key == NULL || key_size == 0) - return NULL; - - size_t hash_id = myhtml_utils_mhash_hash(key, key_size, mhash->table_size); - - myhtml_utils_mhash_entry_t *entry = mhash->table[hash_id]; - - while(entry) { - if(entry->key_length == key_size) { - if(strncmp(entry->key, key, key_size) == 0) - return entry; - } - - entry = entry->next; - } - - return NULL; -} - -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_entry_by_id(myhtml_utils_mhash_t* mhash, size_t id) -{ - if(mhash->table_size > id) - return mhash->table[id]; - - return NULL; -} - -size_t myhtml_utils_mhash_get_table_size(myhtml_utils_mhash_t* mhash) -{ - return mhash->table_size; -} - -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_rebuild_add_entry(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, myhtml_utils_mhash_entry_t *ext_entry) -{ - if(key == NULL || key_size == 0) - return NULL; - - ext_entry->next = NULL; - - size_t hash_id = myhtml_utils_mhash_hash(key, key_size, mhash->table_size); - - if(mhash->table[hash_id] == NULL) { - mhash->table[hash_id] = ext_entry; - return ext_entry; - } - - myhtml_utils_mhash_entry_t *entry = mhash->table[hash_id]; - - do { - if(entry->next == NULL) { - entry->next = ext_entry; - break; - } - - entry = entry->next; - } - while(1); - - return ext_entry; -} - -myhtml_utils_mhash_entry_t ** myhtml_utils_mhash_rebuld(myhtml_utils_mhash_t* mhash) -{ - myhtml_utils_mhash_entry_t **table = mhash->table; - size_t size = mhash->table_size; - - mhash->table_size = mhash->table_size << 1; - mhash->table = myhtml_calloc(mhash->table_size, sizeof(myhtml_utils_mhash_entry_t*)); - - if(mhash->table == NULL) { - mhash->table = table; - mhash->table_size = size; - - return NULL; - } - - for(size_t i = 0; i < mhash->table_size; i++) { - myhtml_utils_mhash_entry_t *entry = table[i]; - - while(entry) { - myhtml_utils_mhash_rebuild_add_entry(mhash, entry->key, entry->key_length, entry); - - entry = entry->next; - } - } - - myhtml_free(table); - - return mhash->table; -} - - diff --git a/source/myhtml/utils/mhash.h b/source/myhtml/utils/mhash.h deleted file mode 100644 index c3c8aec..0000000 --- a/source/myhtml/utils/mhash.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - Copyright (C) 2017 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin avl_treet, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef html_mhash_h -#define html_mhash_h - -#include "myhtml/myosi.h" -#include "myhtml/utils/mchar_async.h" - -typedef struct myhtml_utils_mhash_entry myhtml_utils_mhash_entry_t; - -struct myhtml_utils_mhash_entry { - char* key; - size_t key_length; - - void *value; - - myhtml_utils_mhash_entry_t* next; -}; - -struct myhtml_utils_mhash { - mchar_async_t* mchar_obj; - size_t mchar_node; - - myhtml_utils_mhash_entry_t** table; - size_t table_size; - size_t table_length; - - size_t table_max_depth; -} -typedef myhtml_utils_mhash_t; - -myhtml_utils_mhash_t * myhtml_utils_mhash_create(void); -myhtml_status_t myhtml_utils_mhash_init(myhtml_utils_mhash_t* mhash, size_t table_size, size_t depth); -void myhtml_utils_mhash_clean(myhtml_utils_mhash_t* mhash); -myhtml_utils_mhash_t * myhtml_utils_mhash_destroy(myhtml_utils_mhash_t* mhash, bool self_destroy); -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_create_entry(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value); - -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_add(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value); -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_search(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value); -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_add_with_choice(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size); - -myhtml_utils_mhash_entry_t * myhtml_utils_mhash_entry_by_id(myhtml_utils_mhash_t* mhash, size_t id); -size_t myhtml_utils_mhash_get_table_size(myhtml_utils_mhash_t* mhash); - -myhtml_utils_mhash_entry_t ** myhtml_utils_mhash_rebuld(myhtml_utils_mhash_t* mhash); - -#endif diff --git a/source/myhtml/utils/resources.h b/source/myhtml/utils/resources.h deleted file mode 100644 index beee061..0000000 --- a/source/myhtml/utils/resources.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - Copyright (C) 2015-2016 Alexander Borisov - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - - Author: lex.borisov@gmail.com (Alexander Borisov) -*/ - -#ifndef MyHTML_UTILS_RESOURCES_H -#define MyHTML_UTILS_RESOURCES_H -#pragma once - -#include - -#define MyHTML_TOKENIZER_CHAR_OTHER '\000' -#define MyHTML_TOKENIZER_CHAR_A_Z_a_z '\001' -#define MyHTML_TOKENIZER_CHAR_WHITESPACE '\002' - -static const unsigned char myhtml_string_chars_num_map[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, - 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff -}; - -static const unsigned char myhtml_string_chars_hex_map[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, - 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff -}; - -static const unsigned char myhtml_string_chars_lowercase_map[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, - 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, - 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, - 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, - 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, - 0x3f, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, - 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, - 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, - 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, - 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, - 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, - 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, - 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, - 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, - 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, - 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, - 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, - 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, - 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, - 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, - 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, - 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, - 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, - 0xfc, 0xfd, 0xfe, 0xff -}; - -static const size_t replacement_character[] = { - 65533, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, - 63, 64, 65, 66, 67, 68, 69, 70, 71, - 72, 73, 74, 75, 76, 77, 78, 79, 80, - 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 105, 106, 107, - 108, 109, 110, 111, 112, 113, 114, 115, 116, - 117, 118, 119, 120, 121, 122, 123, 124, 125, - 126, 127, 8364, 129, 8218, 402, 8222, 8230, 8224, - 8225, 710, 8240, 352, 8249, 338, 141, 381, 143, - 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, - 8482, 353, 8250, 339, 157, 382, 376 -}; - -static const size_t myhtml_string_alphanumeric_character[] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, - 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0a, - 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0a, 0x0b, 0x0c, 0x0d, - 0x0e, 0x0f, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0a, - 0x0b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0c, 0x0d, - 0x0e, 0x0f, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0a, - 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0a, 0x0b, 0x0c, 0x0d, - 0x0e, 0x0f, 0x0a, 0x0b, 0x0c, 0x0d, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff -}; - -static const unsigned char myhtml_tokenizer_chars_map[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x02, 0x00, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00 -}; - -static const unsigned char myhtml_string_hex_to_char_map[] = { - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, - 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x00 -}; - -#endif /* MyHTML_UTILS_RESOURCES_H */ -- cgit v1.2.3