Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/lexborisov/Modest.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'source/myhtml')
-rw-r--r--source/myhtml/Makefile53
-rwxr-xr-xsource/myhtml/api.h343
-rw-r--r--source/myhtml/encoding.c64
-rw-r--r--source/myhtml/encoding.h36
-rw-r--r--source/myhtml/encoding_detect.c563
-rw-r--r--source/myhtml/encoding_detect_resource.h90
-rw-r--r--source/myhtml/myhtml.c200
-rw-r--r--source/myhtml/myhtml.h24
-rw-r--r--source/myhtml/mynamespace.c18
-rw-r--r--source/myhtml/mynamespace.h1
-rw-r--r--source/myhtml/mynamespace_resource.h5
-rw-r--r--source/myhtml/myosi.h114
-rw-r--r--source/myhtml/mystring.h2
-rw-r--r--source/myhtml/rules.c723
-rwxr-xr-xsource/myhtml/serialization.c49
-rw-r--r--source/myhtml/serialization.h15
-rwxr-xr-xsource/myhtml/tag.c165
-rw-r--r--source/myhtml/tag.h37
-rwxr-xr-xsource/myhtml/tag_init.c2
-rw-r--r--source/myhtml/thread.c12
-rw-r--r--source/myhtml/thread.h2
-rw-r--r--source/myhtml/token.c38
-rw-r--r--source/myhtml/token.h12
-rw-r--r--source/myhtml/tokenizer.c313
-rw-r--r--source/myhtml/tokenizer.h4
-rw-r--r--source/myhtml/tokenizer_doctype.c114
-rw-r--r--source/myhtml/tokenizer_end.c186
-rw-r--r--source/myhtml/tokenizer_end.h2
-rw-r--r--source/myhtml/tokenizer_script.c36
-rw-r--r--source/myhtml/tree.c291
-rw-r--r--source/myhtml/tree.h57
-rw-r--r--source/myhtml/utils.c13
-rw-r--r--source/myhtml/utils.h1
-rw-r--r--source/myhtml/utils/mcobject.c9
-rw-r--r--source/myhtml/utils/mctree.c2
-rw-r--r--source/myhtml/utils/mhash.c267
-rw-r--r--source/myhtml/utils/mhash.h65
-rw-r--r--source/myhtml/utils/resources.h4
38 files changed, 2717 insertions, 1215 deletions
diff --git a/source/myhtml/Makefile b/source/myhtml/Makefile
deleted file mode 100644
index f24973c..0000000
--- a/source/myhtml/Makefile
+++ /dev/null
@@ -1,53 +0,0 @@
-TARGET := source/myhtml
-SRCDIR := source/myhtml
-
-CC ?= gcc
-
-LIBPOSTFIX := .so
-LIBNAME := myhtml
-LIBSTATIC_POSTFIX := _static
-
-MyHTML_OPTIMIZATION_LEVEL ?= -O2
-
-CFLAGS ?= -Wall -Werror
-CFLAGS += $(MyHTML_OPTIMIZATION_LEVEL) -fPIC --std=c99 -I..
-
-MyHTML_BUILD_WITHOUT_THREADS ?= NO
-ifeq ($(MyHTML_BUILD_WITHOUT_THREADS),YES)
- $(info Build without POSIX Threads)
- CFLAGS += -DMyHTML_BUILD_WITHOUT_THREADS
-else
- $(info Build with POSIX Threads)
- CFLAGS += -pthread
-endif
-
-ifeq ($(OS),Windows_NT)
-else
- UNAM := $(shell uname -s)
- ifeq ($(UNAM),Darwin)
- LIBPOSTFIX := .dylib
- else
- CFLAGS += -D_POSIX_C_SOURCE=199309L
- endif
-endif
-
-SRCS := $(wildcard *.c)
-SRCS += $(wildcard utils/*.c)
-HDRS := $(wildcard *.h)
-HDRS += $(wildcard utils/*.h)
-OBJS := $(patsubst %.c, %.o, $(SRCS))
-
-all: shared static
-
-shared: $(OBJS) $(HDRS)
- $(CC) -shared $(LDFLAGS) $(OBJS) -o lib$(LIBNAME)$(LIBPOSTFIX)
-
-static: shared
- $(AR) crus lib$(LIBNAME)$(LIBSTATIC_POSTFIX).a $(OBJS)
-
-clean:
- rm -rf *.o
- rm -rf utils/*.o
- rm -rf *lib$(LIBNAME)*
-
-.PHONY: all clean
diff --git a/source/myhtml/api.h b/source/myhtml/api.h
index 794b9c7..e7eb4a6 100755
--- a/source/myhtml/api.h
+++ b/source/myhtml/api.h
@@ -34,20 +34,20 @@
*
*/
-#define MyHTML_VERSION_MAJOR 1
+#define MyHTML_VERSION_MAJOR 3
#define MyHTML_VERSION_MINOR 0
-#define MyHTML_VERSION_PATCH 5
+#define MyHTML_VERSION_PATCH 0
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#if defined(_MSC_VER)
-# define MyHTML_DEPRECATED(func, message) __declspec(deprecated(message)) func
+# define MyHTML_DEPRECATED(func) __declspec(deprecated) func
#elif defined(__GNUC__) || defined(__INTEL_COMPILER)
-# define MyHTML_DEPRECATED(func, message) func __attribute__((deprecated(message)))
+# define MyHTML_DEPRECATED(func) func __attribute__((deprecated))
#else
-# define MyHTML_DEPRECATED(func, message) func
+# define MyHTML_DEPRECATED(func) func
#endif
#ifdef __cplusplus
@@ -58,49 +58,49 @@ extern "C" {
* encodings type
*/
enum myhtml_encoding_list {
- MyHTML_ENCODING_DEFAULT = 0x00,
-// MyHTML_ENCODING_AUTO = 0x01, // future
-// MyHTML_ENCODING_CUSTOM = 0x02, // future
- MyHTML_ENCODING_UTF_8 = 0x00, // default encoding
- MyHTML_ENCODING_UTF_16LE = 0x04,
- MyHTML_ENCODING_UTF_16BE = 0x05,
- MyHTML_ENCODING_X_USER_DEFINED = 0x06,
- MyHTML_ENCODING_BIG5 = 0x07,
- MyHTML_ENCODING_EUC_KR = 0x08,
- MyHTML_ENCODING_GB18030 = 0x09,
- MyHTML_ENCODING_IBM866 = 0x0a,
- MyHTML_ENCODING_ISO_8859_10 = 0x0b,
- MyHTML_ENCODING_ISO_8859_13 = 0x0c,
- MyHTML_ENCODING_ISO_8859_14 = 0x0d,
- MyHTML_ENCODING_ISO_8859_15 = 0x0e,
- MyHTML_ENCODING_ISO_8859_16 = 0x0f,
- MyHTML_ENCODING_ISO_8859_2 = 0x10,
- MyHTML_ENCODING_ISO_8859_3 = 0x11,
- MyHTML_ENCODING_ISO_8859_4 = 0x12,
- MyHTML_ENCODING_ISO_8859_5 = 0x13,
- MyHTML_ENCODING_ISO_8859_6 = 0x14,
- MyHTML_ENCODING_ISO_8859_7 = 0x15,
- MyHTML_ENCODING_ISO_8859_8 = 0x16,
- MyHTML_ENCODING_KOI8_R = 0x17,
- MyHTML_ENCODING_KOI8_U = 0x18,
- MyHTML_ENCODING_MACINTOSH = 0x19,
- MyHTML_ENCODING_WINDOWS_1250 = 0x1a,
- MyHTML_ENCODING_WINDOWS_1251 = 0x1b,
- MyHTML_ENCODING_WINDOWS_1252 = 0x1c,
- MyHTML_ENCODING_WINDOWS_1253 = 0x1d,
- MyHTML_ENCODING_WINDOWS_1254 = 0x1e,
- MyHTML_ENCODING_WINDOWS_1255 = 0x1f,
- MyHTML_ENCODING_WINDOWS_1256 = 0x20,
- MyHTML_ENCODING_WINDOWS_1257 = 0x21,
- MyHTML_ENCODING_WINDOWS_1258 = 0x22,
- MyHTML_ENCODING_WINDOWS_874 = 0x23,
- MyHTML_ENCODING_X_MAC_CYRILLIC = 0x24,
- MyHTML_ENCODING_ISO_2022_JP = 0x25,
- MyHTML_ENCODING_GBK = 0x26,
- MyHTML_ENCODING_SHIFT_JIS = 0x27,
- MyHTML_ENCODING_EUC_JP = 0x28,
- MyHTML_ENCODING_ISO_8859_8_I = 0x29,
- MyHTML_ENCODING_LAST_ENTRY = 0x2a
+ MyHTML_ENCODING_DEFAULT = 0x00,
+// MyHTML_ENCODING_AUTO = 0x01, // future
+ MyHTML_ENCODING_NOT_DETERMINED = 0x02,
+ MyHTML_ENCODING_UTF_8 = 0x00, // default encoding
+ MyHTML_ENCODING_UTF_16LE = 0x04,
+ MyHTML_ENCODING_UTF_16BE = 0x05,
+ MyHTML_ENCODING_X_USER_DEFINED = 0x06,
+ MyHTML_ENCODING_BIG5 = 0x07,
+ MyHTML_ENCODING_EUC_JP = 0x08,
+ MyHTML_ENCODING_EUC_KR = 0x09,
+ MyHTML_ENCODING_GB18030 = 0x0a,
+ MyHTML_ENCODING_GBK = 0x0b,
+ MyHTML_ENCODING_IBM866 = 0x0c,
+ MyHTML_ENCODING_ISO_2022_JP = 0x0d,
+ MyHTML_ENCODING_ISO_8859_10 = 0x0e,
+ MyHTML_ENCODING_ISO_8859_13 = 0x0f,
+ MyHTML_ENCODING_ISO_8859_14 = 0x10,
+ MyHTML_ENCODING_ISO_8859_15 = 0x11,
+ MyHTML_ENCODING_ISO_8859_16 = 0x12,
+ MyHTML_ENCODING_ISO_8859_2 = 0x13,
+ MyHTML_ENCODING_ISO_8859_3 = 0x14,
+ MyHTML_ENCODING_ISO_8859_4 = 0x15,
+ MyHTML_ENCODING_ISO_8859_5 = 0x16,
+ MyHTML_ENCODING_ISO_8859_6 = 0x17,
+ MyHTML_ENCODING_ISO_8859_7 = 0x18,
+ MyHTML_ENCODING_ISO_8859_8 = 0x19,
+ MyHTML_ENCODING_ISO_8859_8_I = 0x1a,
+ MyHTML_ENCODING_KOI8_R = 0x1b,
+ MyHTML_ENCODING_KOI8_U = 0x1c,
+ MyHTML_ENCODING_MACINTOSH = 0x1d,
+ MyHTML_ENCODING_SHIFT_JIS = 0x1e,
+ MyHTML_ENCODING_WINDOWS_1250 = 0x1f,
+ MyHTML_ENCODING_WINDOWS_1251 = 0x20,
+ MyHTML_ENCODING_WINDOWS_1252 = 0x21,
+ MyHTML_ENCODING_WINDOWS_1253 = 0x22,
+ MyHTML_ENCODING_WINDOWS_1254 = 0x23,
+ MyHTML_ENCODING_WINDOWS_1255 = 0x24,
+ MyHTML_ENCODING_WINDOWS_1256 = 0x25,
+ MyHTML_ENCODING_WINDOWS_1257 = 0x26,
+ MyHTML_ENCODING_WINDOWS_1258 = 0x27,
+ MyHTML_ENCODING_WINDOWS_874 = 0x28,
+ MyHTML_ENCODING_X_MAC_CYRILLIC = 0x29,
+ MyHTML_ENCODING_LAST_ENTRY = 0x2a
}
typedef myhtml_encoding_t;
@@ -474,7 +474,7 @@ enum myhtml_tree_parse_flags {
MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003,
MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, /* skip ws token, but not for RCDATA, RAWTEXT, CDATA and PLAINTEXT */
- MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008,
+ MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008
}
typedef myhtml_tree_parse_flags_t;
@@ -500,11 +500,6 @@ typedef struct myhtml_tree_node myhtml_tree_node_t;
*
*/
typedef size_t myhtml_tag_id_t;
-
-typedef struct myhtml_tag_index_node myhtml_tag_index_node_t;
-typedef struct myhtml_tag_index_entry myhtml_tag_index_entry_t;
-typedef struct myhtml_tag_index myhtml_tag_index_t;
-
typedef struct myhtml_tag myhtml_tag_t;
/**
@@ -826,32 +821,29 @@ myhtml_tree_clean(myhtml_tree_t* tree);
/**
* Add child node to node. If children already exists it will be added to the last
*
- * @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t* The node to which we add child node
* @param[in] myhtml_tree_node_t* The node which adds
*/
void
-myhtml_tree_node_add_child(myhtml_tree_t* tree, myhtml_tree_node_t* root, myhtml_tree_node_t* node);
+myhtml_tree_node_add_child(myhtml_tree_node_t* root, myhtml_tree_node_t* node);
/**
* Add a node immediately before the existing node
*
- * @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t* add for this node
* @param[in] myhtml_tree_node_t* add this node
*/
void
-myhtml_tree_node_insert_before(myhtml_tree_t* myhtml_tree, myhtml_tree_node_t* root, myhtml_tree_node_t* node);
+myhtml_tree_node_insert_before(myhtml_tree_node_t* root, myhtml_tree_node_t* node);
/**
* Add a node immediately after the existing node
*
- * @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t* add for this node
* @param[in] myhtml_tree_node_t* add this node
*/
void
-myhtml_tree_node_insert_after(myhtml_tree_t* myhtml_tree, myhtml_tree_node_t* root, myhtml_tree_node_t* node);
+myhtml_tree_node_insert_after(myhtml_tree_node_t* root, myhtml_tree_node_t* node);
/**
* Destroy of a MyHTML_TREE structure
@@ -884,16 +876,6 @@ myhtml_tag_t*
myhtml_tree_get_tag(myhtml_tree_t* tree);
/**
- * Get myhtml_tag_index_t* from a myhtml_tree_t*
- *
- * @param[in] myhtml_tree_t*
- *
- * @return myhtml_tag_index_t* if exists, otherwise a NULL value
- */
-myhtml_tag_index_t*
-myhtml_tree_get_tag_index(myhtml_tree_t* tree);
-
-/**
* Get Tree Document (Root of Tree)
*
* @param[in] myhtml_tree_t*
@@ -1312,16 +1294,14 @@ myhtml_node_create(myhtml_tree_t* tree, myhtml_tag_id_t tag_id,
/**
* Release allocated resources
*
- * @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t*
*/
void
-myhtml_node_free(myhtml_tree_t* tree, myhtml_tree_node_t *node);
+myhtml_node_free(myhtml_tree_node_t *node);
/**
* Remove node of tree
*
- * @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t*
*
* @return myhtml_tree_node_t* if successful, otherwise a NULL value
@@ -1332,78 +1312,67 @@ myhtml_node_remove(myhtml_tree_node_t *node);
/**
* Remove node of tree and release allocated resources
*
- * @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t*
*/
void
-myhtml_node_delete(myhtml_tree_t* tree, myhtml_tree_node_t *node);
+myhtml_node_delete(myhtml_tree_node_t *node);
/**
* Remove nodes of tree recursively and release allocated resources
*
- * @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t*
*/
void
-myhtml_node_delete_recursive(myhtml_tree_t* tree, myhtml_tree_node_t *node);
+myhtml_node_delete_recursive(myhtml_tree_node_t *node);
/**
* The appropriate place for inserting a node. Insertion with validation.
* If try insert <a> node to <table> node, then <a> node inserted before <table> node
*
- * @param[in] myhtml_tree_t*
* @param[in] target node
* @param[in] insertion node
*
* @return insertion node if successful, otherwise a NULL value
*/
myhtml_tree_node_t*
-myhtml_node_insert_to_appropriate_place(myhtml_tree_t* tree, myhtml_tree_node_t *target,
- myhtml_tree_node_t *node);
+myhtml_node_insert_to_appropriate_place(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
/**
* Append to target node as last child. Insertion without validation.
*
- * @param[in] myhtml_tree_t*
* @param[in] target node
* @param[in] insertion node
*
* @return insertion node if successful, otherwise a NULL value
*/
myhtml_tree_node_t*
-myhtml_node_append_child(myhtml_tree_t* tree, myhtml_tree_node_t *target,
- myhtml_tree_node_t *node);
+myhtml_node_append_child(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
/**
* Append sibling node after target node. Insertion without validation.
*
- * @param[in] myhtml_tree_t*
* @param[in] target node
* @param[in] insertion node
*
* @return insertion node if successful, otherwise a NULL value
*/
myhtml_tree_node_t*
-myhtml_node_insert_after(myhtml_tree_t* tree, myhtml_tree_node_t *target,
- myhtml_tree_node_t *node);
+myhtml_node_insert_after(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
/**
* Append sibling node before target node. Insertion without validation.
*
- * @param[in] myhtml_tree_t*
* @param[in] target node
* @param[in] insertion node
*
* @return insertion node if successful, otherwise a NULL value
*/
myhtml_tree_node_t*
-myhtml_node_insert_before(myhtml_tree_t* tree, myhtml_tree_node_t *target,
- myhtml_tree_node_t *node);
+myhtml_node_insert_before(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
/**
* Add text for a node with convert character encoding.
*
- * @param[in] myhtml_tree_t*
* @param[in] target node
* @param[in] text
* @param[in] text length
@@ -1412,13 +1381,12 @@ myhtml_node_insert_before(myhtml_tree_t* tree, myhtml_tree_node_t *target,
* @return myhtml_string_t* if successful, otherwise a NULL value
*/
myhtml_string_t*
-myhtml_node_text_set(myhtml_tree_t* tree, myhtml_tree_node_t *node,
- const char* text, size_t length, myhtml_encoding_t encoding);
+myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length,
+ myhtml_encoding_t encoding);
/**
* Add text for a node with convert character encoding.
*
- * @param[in] myhtml_tree_t*
* @param[in] target node
* @param[in] text
* @param[in] text length
@@ -1427,8 +1395,8 @@ myhtml_node_text_set(myhtml_tree_t* tree, myhtml_tree_node_t *node,
* @return myhtml_string_t* if successful, otherwise a NULL value
*/
myhtml_string_t*
-myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_tree_node_t *node,
- const char* text, size_t length, myhtml_encoding_t encoding);
+myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length,
+ myhtml_encoding_t encoding);
/**
* Get token node
@@ -1563,6 +1531,16 @@ myhtml_node_get_data(myhtml_tree_node_t *node);
void
myhtml_node_set_data(myhtml_tree_node_t *node, void* data);
+/**
+ * Get current tree (myhtml_tree_t*) from node
+ *
+ * @param[in] myhtml_tree_node_t*
+ *
+ * @return myhtml_tree_t*
+ */
+myhtml_tree_t*
+myhtml_node_tree(myhtml_tree_node_t *node);
+
/***********************************************************************************
*
* MyHTML_ATTRIBUTE
@@ -1666,7 +1644,6 @@ myhtml_attribute_by_key(myhtml_tree_node_t *node,
/**
* Added attribute to tree node
*
- * @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t*
* @param[in] attr key name
* @param[in] attr key name length
@@ -1677,7 +1654,7 @@ myhtml_attribute_by_key(myhtml_tree_node_t *node,
* @return created myhtml_tree_attr_t* if successful, otherwise a NULL value
*/
myhtml_tree_attr_t*
-myhtml_attribute_add(myhtml_tree_t *tree, myhtml_tree_node_t *node,
+myhtml_attribute_add(myhtml_tree_node_t *node,
const char *key, size_t key_len,
const char *value, size_t value_len,
myhtml_encoding_t encoding);
@@ -1891,138 +1868,6 @@ myhtml_tag_id_by_name(myhtml_tree_t* tree,
/***********************************************************************************
*
- * MyHTML_TAG_INDEX
- *
- ***********************************************************************************/
-
-/**
- * Create tag index structure
- *
- * @return myhtml_tag_index_t* if successful, otherwise a NULL value
- */
-myhtml_tag_index_t*
-myhtml_tag_index_create(void);
-
-/**
- * Allocating and Initialization resources for a tag index structure
- *
- * @param[in] myhtml_tag_t*
- * @param[in] myhtml_tag_index_t*
- *
- * @return MyHTML_STATUS_OK if successful, otherwise an error status.
- */
-myhtml_status_t
-myhtml_tag_index_init(myhtml_tag_t* tag, myhtml_tag_index_t* tag_index);
-
-/**
- * Clears tag index
- *
- * @param[in] myhtml_tag_t*
- * @param[in] myhtml_tag_index_t*
- *
- */
-void
-myhtml_tag_index_clean(myhtml_tag_t* tag, myhtml_tag_index_t* tag_index);
-
-/**
- * Free allocated resources
- *
- * @param[in] myhtml_tag_t*
- * @param[in] myhtml_tag_index_t*
- *
- * @return NULL if successful, otherwise an myhtml_tag_index_t* structure
- */
-myhtml_tag_index_t*
-myhtml_tag_index_destroy(myhtml_tag_t* tag, myhtml_tag_index_t* tag_index);
-
-/**
- * Adds myhtml_tree_node_t* to tag index
- *
- * @param[in] myhtml_tag_t*
- * @param[in] myhtml_tag_index_t*
- * @param[in] myhtml_tree_node_t*
- *
- * @return MyHTML_STATUS_OK if successful, otherwise an error status.
- */
-myhtml_status_t
-myhtml_tag_index_add(myhtml_tag_t* tag, myhtml_tag_index_t* tag_index, myhtml_tree_node_t* node);
-
-/**
- * Get root tag index. Is the initial entry for a tag. It contains statistics and other items by tag
- *
- * @param[in] myhtml_tag_index_t*
- * @param[in] myhtml_tag_id_t
- *
- * @return myhtml_tag_index_entry_t* if successful, otherwise a NULL value.
- */
-myhtml_tag_index_entry_t*
-myhtml_tag_index_entry(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id);
-
-/**
- * Get first index node for tag
- *
- * @param[in] myhtml_tag_index_t*
- * @param[in] myhtml_tag_id_t
- *
- * @return myhtml_tag_index_node_t* if exists, otherwise a NULL value.
- */
-myhtml_tag_index_node_t*
-myhtml_tag_index_first(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id);
-
-/**
- * Get last index node for tag
- *
- * @param[in] myhtml_tag_index_t*
- * @param[in] myhtml_tag_id_t
- *
- * @return myhtml_tag_index_node_t* if exists, otherwise a NULL value.
- */
-myhtml_tag_index_node_t*
-myhtml_tag_index_last(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id);
-
-/**
- * Get next index node for tag, by index node
- *
- * @param[in] myhtml_tag_index_node_t*
- *
- * @return myhtml_tag_index_node_t* if exists, otherwise a NULL value.
- */
-myhtml_tag_index_node_t*
-myhtml_tag_index_next(myhtml_tag_index_node_t *index_node);
-
-/**
- * Get previous index node for tag, by index node
- *
- * @param[in] myhtml_tag_index_node_t*
- *
- * @return myhtml_tag_index_node_t* if exists, otherwise a NULL value.
- */
-myhtml_tag_index_node_t*
-myhtml_tag_index_prev(myhtml_tag_index_node_t *index_node);
-
-/**
- * Get myhtml_tree_node_t* by myhtml_tag_index_node_t*
- *
- * @param[in] myhtml_tag_index_node_t*
- *
- * @return myhtml_tree_node_t* if exists, otherwise a NULL value.
- */
-myhtml_tree_node_t*
-myhtml_tag_index_tree_node(myhtml_tag_index_node_t *index_node);
-
-/**
- * Get count of elements in index by tag id
- *
- * @param[in] myhtml_tag_index_t*
- * @param[in] tag id
- *
- * @return count of elements
- */
-size_t
-myhtml_tag_index_entry_count(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id);
-
-/***********************************************************************************
- *
* MyHTML_COLLECTION
*
***********************************************************************************/
@@ -2212,6 +2057,30 @@ myhtml_encoding_detect_and_cut_bom(const char *text, size_t length, myhtml_encod
bool
myhtml_encoding_by_name(const char *name, size_t length, myhtml_encoding_t *encoding);
+/**
+ * Get Encoding name by myhtml_encoding_t (by id)
+ *
+ * @param[in] myhtml_encoding_t, encoding id
+ * @param[out] return name length
+ *
+ * @return encoding name, otherwise NULL value
+ */
+const char*
+myhtml_encoding_name_by_id(myhtml_encoding_t encoding, size_t *length);
+
+/**
+ * Detect encoding in meta tag (<meta ...>) before start parsing
+ *
+ * See https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
+ *
+ * @param[in] html data bytes
+ * @param[in] html data length
+ *
+ * @return detected encoding if encoding found, otherwise MyHTML_ENCODING_NOT_DETERMINED
+ */
+myhtml_encoding_t
+myhtml_encoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size);
+
/***********************************************************************************
*
* MyHTML_STRING
@@ -2719,16 +2588,14 @@ myhtml_strncasecmp(const char* str1, const char* str2, size_t size);
* The same as myhtml_serialization_tree_buffer function
*/
bool
-myhtml_serialization(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node,
- myhtml_string_raw_t* str);
+myhtml_serialization(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
/**
* Only one tree node serialization
* The same as myhtml_serialization_node_buffer function
*/
bool
-myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node,
- myhtml_string_raw_t* str);
+myhtml_serialization_node(myhtml_tree_node_t* node, myhtml_string_raw_t* str);
/**
* Serialize tree to an output string
@@ -2740,8 +2607,7 @@ myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node,
* @return true if successful, otherwise false
*/
bool
-myhtml_serialization_tree_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node,
- myhtml_string_raw_t* str);
+myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
/**
* Serialize node to an output string
@@ -2753,8 +2619,7 @@ myhtml_serialization_tree_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* scope_
* @return true if successful, otherwise false
*/
bool
-myhtml_serialization_node_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* node,
- myhtml_string_raw_t* str);
+myhtml_serialization_node_buffer(myhtml_tree_node_t* node, myhtml_string_raw_t* str);
/**
* The serialize function for an entire tree
@@ -2767,7 +2632,7 @@ myhtml_serialization_node_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* node,
* @return true if successful, otherwise false
*/
bool
-myhtml_serialization_tree_callback(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node,
+myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node,
myhtml_callback_serialize_f callback, void* ptr);
/**
@@ -2781,7 +2646,7 @@ myhtml_serialization_tree_callback(myhtml_tree_t* tree, myhtml_tree_node_t* scop
* @return true if successful, otherwise false
*/
bool
-myhtml_serialization_node_callback(myhtml_tree_t* tree, myhtml_tree_node_t* node,
+myhtml_serialization_node_callback(myhtml_tree_node_t* node,
myhtml_callback_serialize_f callback, void* ptr);
/***********************************************************************************
diff --git a/source/myhtml/encoding.c b/source/myhtml/encoding.c
index 078ac5c..d7a7807 100644
--- a/source/myhtml/encoding.c
+++ b/source/myhtml/encoding.c
@@ -970,6 +970,31 @@ void myhtml_encoding_result_clean(myhtml_encoding_result_t *res)
memset(res, 0, sizeof(myhtml_encoding_result_t));
}
+size_t myhtml_encoding_codepoint_ascii_length(size_t codepoint)
+{
+ if (codepoint <= 0x0000007F) {
+ return 1;
+ }
+ else if (codepoint <= 0x000007FF) {
+ return 2;
+ }
+ else if (codepoint <= 0x0000FFFF) {
+ return 3;
+ }
+ else if (codepoint <= 0x001FFFFF) {
+ return 4;
+ }
+ /* not uses in unicode */
+ else if (codepoint <= 0x03FFFFFF) {
+ return 5;
+ }
+ else if (codepoint <= 0x7FFFFFFF) {
+ return 6;
+ }
+
+ return 0;
+}
+
size_t myhtml_encoding_codepoint_to_ascii_utf_8(size_t codepoint, char *data)
{
/* 0x80 -- 10xxxxxx */
@@ -1155,6 +1180,43 @@ size_t myhtml_encoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data)
return 2;
}
-
+size_t myhtml_encoding_convert_to_ascii_utf_8(myhtml_string_raw_t* raw_str, const char* buff, size_t length, myhtml_encoding_t encoding)
+{
+ if(raw_str->data == NULL) {
+ raw_str->size = length + 1;
+ raw_str->length = 0;
+ raw_str->data = myhtml_malloc(sizeof(char) * raw_str->size);
+
+ if(raw_str->data == NULL)
+ return 0;
+ }
+
+ myhtml_encoding_result_t res = {0};
+
+ unsigned const char* u_buff = (unsigned const char*)buff;
+ const myhtml_encoding_custom_f func = myhtml_encoding_get_function_by_id(encoding);
+
+ size_t i;
+ for (i = 0; i < length; i++)
+ {
+ if(func(u_buff[i], &res) == MyHTML_ENCODING_STATUS_OK) {
+ if((raw_str->length + 6) >= raw_str->size) {
+ size_t new_size = raw_str->length + 6 + (length / 2);
+ char *new_data = myhtml_realloc(raw_str->data, sizeof(char) * new_size);
+
+ if(new_data == NULL) {
+ return 0;
+ }
+
+ raw_str->data = new_data;
+ raw_str->size = new_size;
+ }
+
+ raw_str->length += myhtml_encoding_codepoint_to_ascii_utf_8(res.result, &raw_str->data[raw_str->length]);
+ }
+ }
+
+ return i;
+}
diff --git a/source/myhtml/encoding.h b/source/myhtml/encoding.h
index 8c7b679..e575690 100644
--- a/source/myhtml/encoding.h
+++ b/source/myhtml/encoding.h
@@ -28,6 +28,7 @@ extern "C" {
#include "myhtml/myosi.h"
#include "myhtml/utils.h"
+#include "myhtml/mystring.h"
enum myhtml_encoding_status {
MyHTML_ENCODING_STATUS_OK = 0x00,
@@ -44,27 +45,23 @@ struct myhtml_encoding_result {
unsigned long result;
unsigned long result_aux;
unsigned long flag;
-}
-typedef myhtml_encoding_result_t;
+};
struct myhtml_encoding_trigram {
const unsigned char trigram[3];
size_t value;
-}
-typedef myhtml_encoding_trigram_t;
+};
struct myhtml_encoding_trigram_result {
size_t count;
size_t value;
-}
-typedef myhtml_encoding_trigram_result_t;
+};
struct myhtml_encoding_unicode_result {
size_t count_ascii;
size_t count_good;
size_t count_bad;
-}
-typedef myhtml_encoding_unicode_result_t;
+};
struct myhtml_encoding_detect_name_entry {
const char* name;
@@ -76,8 +73,21 @@ struct myhtml_encoding_detect_name_entry {
size_t next;
size_t curr;
-}
-typedef myhtml_encoding_detect_name_entry_t;
+};
+
+struct myhtml_encoding_detect_attr {
+ size_t key_begin;
+ size_t key_length;
+ size_t value_begin;
+ size_t value_length;
+
+ myhtml_encoding_detect_attr_t *next;
+};
+
+struct myhtml_encoding_entry_name_index {
+ const char *name;
+ size_t length;
+};
typedef myhtml_encoding_status_t (*myhtml_encoding_custom_f)(unsigned const char data, myhtml_encoding_result_t *res);
@@ -124,6 +134,7 @@ enum myhtml_encoding_status myhtml_encoding_decode_utf_16be(unsigned const char
enum myhtml_encoding_status myhtml_encoding_decode_utf_16le(unsigned const char data, myhtml_encoding_result_t *res);
enum myhtml_encoding_status myhtml_encoding_decode_x_user_defined(unsigned const char data, myhtml_encoding_result_t *res);
+size_t myhtml_encoding_codepoint_ascii_length(size_t codepoint);
size_t myhtml_encoding_codepoint_to_ascii_utf_8(size_t codepoint, char *data);
size_t myhtml_encoding_codepoint_to_lowercase_ascii_utf_8(size_t codepoint, char *data);
size_t myhtml_encoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data);
@@ -137,8 +148,13 @@ bool myhtml_encoding_detect_unicode(const char *text, size_t length, myhtml_enco
bool myhtml_encoding_detect_bom(const char *text, size_t length, myhtml_encoding_t *encoding);
bool myhtml_encoding_detect_and_cut_bom(const char *text, size_t length, myhtml_encoding_t *encoding, const char **new_text, size_t *new_size);
+size_t myhtml_encoding_convert_to_ascii_utf_8(myhtml_string_raw_t* raw_str, const char* buff, size_t length, myhtml_encoding_t encoding);
+
const myhtml_encoding_detect_name_entry_t * myhtml_encoding_name_entry_by_name(const char* name, size_t length);
bool myhtml_encoding_by_name(const char *name, size_t length, myhtml_encoding_t *encoding);
+const char * myhtml_encoding_name_by_id(myhtml_encoding_t encoding, size_t *length);
+
+myhtml_encoding_t myhtml_encoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/source/myhtml/encoding_detect.c b/source/myhtml/encoding_detect.c
index adceabd..4dc0842 100644
--- a/source/myhtml/encoding_detect.c
+++ b/source/myhtml/encoding_detect.c
@@ -379,7 +379,570 @@ bool myhtml_encoding_by_name(const char *name, size_t length, myhtml_encoding_t
return false;
}
+const char * myhtml_encoding_name_by_id(myhtml_encoding_t encoding, size_t *length)
+{
+ if(encoding >= MyHTML_ENCODING_LAST_ENTRY) {
+ if(length) {
+ *length = 0;
+ }
+
+ return NULL;
+ }
+
+ const myhtml_encoding_entry_name_index_t *entry = &myhtml_encoding_entry_name_index_static_list_index[encoding];
+
+ if(length) {
+ *length = entry->length;
+ }
+
+ return entry->name;
+}
+
+/*
+ When an algorithm requires a user agent to prescan a byte stream to determine its encoding,
+ given some defined end condition, then it must run the following steps.
+ These steps either abort unsuccessfully or return a character encoding.
+ If at any point during these steps (including during instances of the get an attribute algorithm invoked by this one)
+ the user agent either runs out of bytes (meaning the position pointer created in the first step below goes beyond the end of the byte stream obtained so far)
+ or reaches its end condition, then abort the prescan a byte stream to determine its encoding algorithm unsuccessfully.
+*/
+
+bool myhtml_encoding_algorithm_extracting_character_encoding_from_meta_element(const char *data, size_t data_size, myhtml_encoding_t *encoding)
+{
+ *encoding = MyHTML_ENCODING_NOT_DETERMINED;
+
+ /* 1 */
+ size_t length = 0;
+ size_t charset_length = strlen("charset");
+
+ bool is_get_pos = false;
+ const unsigned char *udata = (const unsigned char *)data;
+
+ /* 2 */
+ while((length + charset_length) < data_size) {
+ if(myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"charset", &udata[length]))
+ {
+ length += charset_length;
+
+ /* 2 */
+ while(length < data_size) {
+ if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C &&
+ udata[length] != 0x0D && udata[length] != 0x20)
+ {
+ break;
+ }
+
+ length++;
+ }
+
+ /* 4 */
+ if(udata[length] == 0x3D) { /* EQUALS SIGN (=) */
+ is_get_pos = true;
+
+ length++;
+ break;
+ }
+ }
+
+ length++;
+ }
+
+ if(is_get_pos == false || length >= data_size)
+ return false;
+
+ /* 5 */
+ while(length < data_size) {
+ if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C &&
+ udata[length] != 0x0D && udata[length] != 0x20)
+ {
+ break;
+ }
+
+ length++;
+ }
+
+ if(length >= data_size)
+ return false;
+
+ /* 6 */
+ /* " */
+ if(udata[length] == 0x22)
+ {
+ length++;
+ size_t begin = length;
+
+ while(length < data_size) {
+ if(udata[length] == 0x22)
+ return myhtml_encoding_by_name(&data[begin], (length - begin), encoding);
+
+ length++;
+ }
+
+ return false;
+ }
+
+ /* ' */
+ if(udata[length] == 0x27)
+ {
+ length++;
+ size_t begin = length;
+
+ while(length < data_size) {
+ if(udata[length] == 0x27)
+ return myhtml_encoding_by_name(&data[begin], (length - begin), encoding);
+
+ length++;
+ }
+
+ return false;
+ }
+
+ /* other */
+ while(length < data_size) {
+ if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C &&
+ udata[length] != 0x0D && udata[length] != 0x20)
+ {
+ size_t begin = length;
+
+ while(length < data_size) {
+ /* SEMICOLON character (;) */
+ if(udata[length] == 0x3B) {
+ return myhtml_encoding_by_name(&data[begin], (length - begin), encoding);
+ }
+
+ length++;
+ }
+
+ return myhtml_encoding_by_name(&data[begin], (length - begin), encoding);
+ }
+
+ length++;
+ }
+
+ return false;
+}
+
+bool myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_spaces(const unsigned char *udata, size_t *data_length, size_t data_size, myhtml_encoding_detect_attr_t *attr)
+{
+ size_t length = *data_length;
+
+ /* set position */
+ attr->key_length = length - attr->key_begin;
+
+ /* 6 */
+ while(length < data_size) {
+ if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C &&
+ udata[length] != 0x0D && udata[length] != 0x20 && udata[length] != 0x2F)
+ {
+ break;
+ }
+
+ length++;
+ }
+
+ if(length >= data_size) {
+ *data_length = length;
+ return false;
+ }
+
+ /* 7 */
+ if(udata[length] != 0x3D) {
+ *data_length = length;
+ return false;
+ }
+
+ /* 8 */
+ *data_length = (length + 1);
+ return true;
+}
+
+size_t myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_value(const unsigned char *udata, size_t length, size_t data_size, myhtml_encoding_detect_attr_t *attr, bool *it_last)
+{
+ /* 9 */
+ while(length < data_size) {
+ if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C &&
+ udata[length] != 0x0D && udata[length] != 0x20)
+ {
+ break;
+ }
+
+ length++;
+ }
+
+ if(length >= data_size) {
+ *it_last = true;
+ return length;
+ }
+
+ /* 10 */
+ switch (udata[length]) {
+ case 0x22: /* (ASCII ") */
+ length++;
+ attr->value_begin = length;
+
+ while(length < data_size) {
+ if(udata[length] == 0x22)
+ {
+ attr->value_length = length - attr->value_begin;
+ return (length + 1);
+ }
+
+ length++;
+ }
+
+ break;
+
+ case 0x27: /* (ASCII ') */
+ length++;
+ attr->value_begin = length;
+
+ while(length < data_size) {
+ if(udata[length] == 0x27)
+ {
+ attr->value_length = length - attr->value_begin;
+ return (length + 1);
+ }
+
+ length++;
+ }
+
+ break;
+
+ case 0x3E: /* (ASCII >) */
+ *it_last = true;
+ return (length + 1);
+
+ default:
+ attr->value_begin = length;
+
+ while(length < data_size) {
+ if(udata[length] == 0x09 || udata[length] == 0x0A || udata[length] == 0x0C ||
+ udata[length] == 0x0D || udata[length] == 0x20 || udata[length] == 0x3E)
+ {
+ attr->value_length = length - attr->value_begin;
+ return (length + 1);
+ }
+
+ length++;
+ }
+
+ break;
+ }
+
+ attr->value_length = length - attr->value_begin;
+ return length;
+}
+
+size_t myhtml_encoding_prescan_stream_to_determine_encoding_get_attr(const unsigned char *udata, size_t length, size_t data_size, myhtml_encoding_detect_attr_t *attr, bool *it_last)
+{
+ memset(attr, 0, sizeof(myhtml_encoding_detect_attr_t));
+
+ /*
+ If the byte at position is one of 0x09 (ASCII TAB), 0x0A (ASCII LF), 0x0C (ASCII FF), 0x0D (ASCII CR),
+ 0x20 (ASCII space), or 0x2F (ASCII /) then advance position to the next byte and redo this step.
+ */
+ /* 1 */
+ while(length < data_size) {
+ if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C &&
+ udata[length] != 0x0D && udata[length] != 0x20 && udata[length] != 0x2F)
+ {
+ break;
+ }
+
+ length++;
+ }
+
+ if(length >= data_size) {
+ *it_last = true;
+ return length;
+ }
+
+ /* 2 */
+ if(udata[length] == 0x3E) { /* (ASCII >) */
+ *it_last = true;
+ return (length + 1);
+ }
+
+ attr->key_begin = length;
+
+ /* 3, 4 */
+ while(length < data_size) {
+ switch (udata[length]) {
+ case 0x3D: /* (ASCII =) */
+ if(attr->key_begin != (length - 1)) {
+ attr->key_length = length - attr->key_begin;
+
+ length++;
+ return myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last);
+ }
+
+ /* fall through */
+
+ case 0x09: /* (ASCII TAB) */
+ case 0x0A: /* (ASCII LF) */
+ case 0x0C: /* (ASCII FF) */
+ case 0x0D: /* (ASCII CR) */
+ case 0x20: /* (ASCII space) */
+ length++;
+
+ if(myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_spaces(udata, &length, data_size, attr) == false) {
+ *it_last = true;
+ return length;
+ }
+
+ return myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last);
+
+ case 0x2F: /* (ASCII /) */
+ case 0x3E: /* (ASCII >) */
+ *it_last = true;
+ attr->key_length = length - attr->key_begin;
+
+ return (length + 1);
+
+ default:
+ break;
+ }
+
+ length++;
+ }
+
+ if(myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_spaces(udata, &length, data_size, attr) == false) {
+ *it_last = true;
+ return length;
+ }
+
+ return myhtml_encoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last);
+}
+
+bool myhtml_encoding_prescan_stream_to_determine_encoding_check_meta(const unsigned char *udata, size_t *length, size_t data_size, myhtml_encoding_t *encoding)
+{
+ myhtml_encoding_detect_attr_t attr;
+
+ bool got_pragma = false;
+ bool it_last = false;
+
+ unsigned int need_pragma = 0; /* 0 = NULL, 1 = false, 2 = true */
+
+ /*
+ http-equiv = 1
+ content = 2
+ charset = 4
+ */
+ /* If the attribute's name is already in attribute list, then return to the step labeled attributes. */
+ size_t is_exists = 0;
+
+ while(*length < data_size) {
+ *length = myhtml_encoding_prescan_stream_to_determine_encoding_get_attr(udata, *length, data_size, &attr, &it_last);
+
+ /* 9 */
+ if(attr.key_length == strlen("http-equiv") &&
+ myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"http-equiv", &udata[ attr.key_begin ]))
+ {
+ if((is_exists & 1) == 0) {
+ is_exists |= 1;
+
+ if(attr.value_length == strlen("content-type") &&
+ myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"content-type", &udata[ attr.value_begin ]))
+ {
+ got_pragma = true;
+ }
+ }
+ }
+ else if(attr.key_length == strlen("content") &&
+ myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"content", &udata[ attr.key_begin ]))
+ {
+ if((is_exists & 2) == 0) {
+ is_exists |= 2;
+
+ if(myhtml_encoding_algorithm_extracting_character_encoding_from_meta_element((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding)) {
+ need_pragma = 2;
+ }
+ }
+ }
+ else if(attr.key_length == strlen("charset") &&
+ myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"charset", &udata[ attr.key_begin ]))
+ {
+ if((is_exists & 4) == 0) {
+ is_exists |= 4;
+
+ myhtml_encoding_by_name((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding);
+ need_pragma = 1;
+ }
+ }
+
+ if(it_last)
+ break;
+ }
+
+ /* 11, 12, 13 */
+ if(need_pragma == 0 || (need_pragma == 2 && got_pragma == false)) {
+ *encoding = MyHTML_ENCODING_NOT_DETERMINED;
+ return false;
+ }
+
+ /* 14 */
+ if(*encoding == MyHTML_ENCODING_UTF_16BE || *encoding == MyHTML_ENCODING_UTF_16LE) {
+ *encoding = MyHTML_ENCODING_UTF_8;
+ }
+
+ /* 15 */
+ if(*encoding == MyHTML_ENCODING_X_USER_DEFINED) {
+ *encoding = MyHTML_ENCODING_WINDOWS_1252;
+ }
+
+ /* 16 */
+ return true;
+}
+
+size_t myhtml_encoding_prescan_stream_to_determine_encoding_skip_name(const unsigned char *udata, size_t length, size_t data_size)
+{
+ while(length < data_size) {
+ if(udata[length] != 0x09 && udata[length] != 0x0A && udata[length] != 0x0C &&
+ udata[length] != 0x0D && udata[length] != 0x20)
+ {
+ break;
+ }
+
+ length++;
+ }
+
+ if(length >= data_size)
+ return length;
+
+ if(udata[length] == 0x3E) {
+ return (length + 1);
+ }
+
+ myhtml_encoding_detect_attr_t attr;
+ bool it_last = false;
+
+ while(length < data_size) {
+ length = myhtml_encoding_prescan_stream_to_determine_encoding_get_attr(udata, length, data_size, &attr, &it_last);
+
+ if(it_last) {
+ return length;
+ }
+ }
+
+ return length;
+}
+size_t myhtml_encoding_prescan_stream_to_determine_encoding_skip_other(const unsigned char *udata, size_t length, size_t data_size)
+{
+ if(udata[length] == 0x2F) { /* / */
+ length++;
+
+ if(length >= data_size)
+ return length;
+
+ if(myhtml_tokenizer_chars_map[ udata[length] ] == MyHTML_TOKENIZER_CHAR_A_Z_a_z) {
+ return myhtml_encoding_prescan_stream_to_determine_encoding_skip_name(udata, length, data_size);
+ }
+
+ while(length < data_size) {
+ if(udata[length] != 0x3E) {
+ return (length + 1);
+ }
+
+ length++;
+ }
+
+ return length;
+ }
+ else if(udata[length] == 0x21) { /* ! */
+ length++;
+
+ if((length + 2) < data_size && udata[length] == 0x2D && udata[(length+1)] == 0x2D) {
+ while(length < data_size) {
+ if(udata[length] != 0x3E) {
+ if(udata[(length - 1)] == 0x2D && udata[(length - 2)] == 0x2D)
+ return (length + 1);
+
+ length++;
+ }
+ }
+
+ return length;
+ }
+
+ while(length < data_size) {
+ if(udata[length] != 0x3E) {
+ return (length + 1);
+ }
+
+ length++;
+ }
+
+ return length;
+ }
+ else if(udata[length] == 0x3F) { /* ? */
+ length++;
+
+ while(length < data_size) {
+ if(udata[length] != 0x3E) {
+ return (length + 1);
+ }
+
+ length++;
+ }
+
+ return length;
+ }
+
+
+ return myhtml_encoding_prescan_stream_to_determine_encoding_skip_name(udata, length, data_size);
+}
+myhtml_encoding_t myhtml_encoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size)
+{
+ const unsigned char* udata = (const unsigned char*)data;
+ myhtml_encoding_t encoding = MyHTML_ENCODING_NOT_DETERMINED;
+
+ size_t i = 0;
+ while(i < data_size) {
+ /* 0x3C = '<' */
+ if(data[i] == 0x3C)
+ {
+ if((i + 5) >= data_size)
+ return encoding;
+
+ i++;
+
+ switch (data[i]) {
+ /*
+ A sequence of bytes starting with:
+ 0x3C, 0x4D or 0x6D, 0x45 or 0x65, 0x54 or 0x74, 0x41 or 0x61,
+ and one of 0x09, 0x0A, 0x0C, 0x0D, 0x20, 0x2F
+ (case-insensitive ASCII '<meta' followed by a space or slash)
+ */
+ case 0x4D:
+ case 0x6D:
+ if(myhtml_ustrcasecmp_without_checks_by_secondary((const unsigned char*)"meta", &udata[i])) {
+ i += 4;
+
+ if(udata[i] == 0x09 || udata[i] == 0x0A || udata[i] == 0x0C ||
+ udata[i] == 0x0D || udata[i] == 0x20 || udata[i] == 0x2F)
+ {
+ i++;
+
+ if(myhtml_encoding_prescan_stream_to_determine_encoding_check_meta(udata, &i, data_size, &encoding))
+ return encoding;
+ }
+ }
+
+ break;
+
+ default:
+ i = myhtml_encoding_prescan_stream_to_determine_encoding_skip_other(udata, i, data_size);
+ break;
+ }
+ }
+ else {
+ i++;
+ }
+ }
+
+ return encoding;
+}
diff --git a/source/myhtml/encoding_detect_resource.h b/source/myhtml/encoding_detect_resource.h
index a30b6ba..361f6ae 100644
--- a/source/myhtml/encoding_detect_resource.h
+++ b/source/myhtml/encoding_detect_resource.h
@@ -23,7 +23,7 @@
#pragma once
#ifdef __cplusplus
-extern "C" {
+//extern "C" {
#endif
#define MyHTML_ENCODING_DETECT_NAME_STATIC_SIZE 419
@@ -49,10 +49,10 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
{"shift_jis", 9, "ms932", 5, MyHTML_ENCODING_SHIFT_JIS, 0, 16},
- {"windows-1256", 12, "windows-1256", 12, MyHTML_ENCODING_WINDOWS_1256, 421, 17},
+ {"gbk", 3, "gb_2312", 7, MyHTML_ENCODING_GBK, 421, 17},
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
- {"iso-8859-5", 10, "csisolatincyrillic", 18, MyHTML_ENCODING_ISO_8859_5, 422, 20},
+ {"iso-8859-7", 10, "iso8859-7", 9, MyHTML_ENCODING_ISO_8859_7, 422, 20},
{"windows-1250", 12, "cp1250", 6, MyHTML_ENCODING_WINDOWS_1250, 423, 21},
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
@@ -126,7 +126,7 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
- {"iso-8859-8", 10, "hebrew", 6, MyHTML_ENCODING_ISO_8859_8, 428, 94},
+ {"gbk", 3, "x-gbk", 5, MyHTML_ENCODING_GBK, 428, 94},
{"iso-8859-7", 10, "csisolatingreek", 15, MyHTML_ENCODING_ISO_8859_7, 0, 95},
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
@@ -139,11 +139,11 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{NULL, 0, NULL, 0, 0, 0, 0},
{"iso-8859-4", 10, "iso88594", 8, MyHTML_ENCODING_ISO_8859_4, 0, 105},
{NULL, 0, NULL, 0, 0, 0, 0},
- {"iso-8859-5", 10, "iso88595", 8, MyHTML_ENCODING_ISO_8859_5, 430, 107},
+ {"shift_jis", 9, "sjis", 4, MyHTML_ENCODING_SHIFT_JIS, 430, 107},
{NULL, 0, NULL, 0, 0, 0, 0},
{"iso-8859-6", 10, "iso88596", 8, MyHTML_ENCODING_ISO_8859_6, 431, 109},
{"windows-1252", 12, "l1", 2, MyHTML_ENCODING_WINDOWS_1252, 432, 110},
- {"iso-8859-6", 10, "csisolatinarabic", 16, MyHTML_ENCODING_ISO_8859_6, 433, 111},
+ {"macintosh", 9, "mac", 3, MyHTML_ENCODING_MACINTOSH, 433, 111},
{NULL, 0, NULL, 0, 0, 0, 0},
{"iso-8859-8", 10, "iso88598", 8, MyHTML_ENCODING_ISO_8859_8, 435, 113},
{"koi8-u", 6, "koi8-u", 6, MyHTML_ENCODING_KOI8_U, 0, 114},
@@ -159,7 +159,7 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
{"iso-8859-2", 10, "iso-8859-2", 10, MyHTML_ENCODING_ISO_8859_2, 439, 126},
- {"iso-8859-8", 10, "iso8859-8", 9, MyHTML_ENCODING_ISO_8859_8, 440, 127},
+ {"windows-1253", 12, "cp1253", 6, MyHTML_ENCODING_WINDOWS_1253, 440, 127},
{NULL, 0, NULL, 0, 0, 0, 0},
{"windows-1258", 12, "x-cp1258", 8, MyHTML_ENCODING_WINDOWS_1258, 0, 129},
{NULL, 0, NULL, 0, 0, 0, 0},
@@ -168,7 +168,7 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{"iso-8859-10", 11, "iso-8859-10", 11, MyHTML_ENCODING_ISO_8859_10, 0, 133},
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
- {"iso-8859-6", 10, "iso-8859-6", 10, MyHTML_ENCODING_ISO_8859_6, 446, 136},
+ {"windows-1254", 12, "l5", 2, MyHTML_ENCODING_WINDOWS_1254, 446, 136},
{NULL, 0, NULL, 0, 0, 0, 0},
{"iso-8859-2", 10, "latin2", 6, MyHTML_ENCODING_ISO_8859_2, 0, 138},
{NULL, 0, NULL, 0, 0, 0, 0},
@@ -259,7 +259,7 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{NULL, 0, NULL, 0, 0, 0, 0},
{"iso-8859-5", 10, "iso8859-5", 9, MyHTML_ENCODING_ISO_8859_5, 460, 225},
{NULL, 0, NULL, 0, 0, 0, 0},
- {"macintosh", 9, "x-mac-roman", 11, MyHTML_ENCODING_MACINTOSH, 461, 227},
+ {"windows-1252", 12, "ascii", 5, MyHTML_ENCODING_WINDOWS_1252, 461, 227},
{NULL, 0, NULL, 0, 0, 0, 0},
{"euc-kr", 6, "korean", 6, MyHTML_ENCODING_EUC_KR, 0, 229},
{NULL, 0, NULL, 0, 0, 0, 0},
@@ -278,7 +278,7 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
- {"iso-8859-13", 11, "iso-8859-13", 11, MyHTML_ENCODING_ISO_8859_13, 466, 246},
+ {"big5", 4, "big5", 4, MyHTML_ENCODING_BIG5, 466, 246},
{NULL, 0, NULL, 0, 0, 0, 0},
{"windows-1250", 12, "windows-1250", 12, MyHTML_ENCODING_WINDOWS_1250, 0, 248},
{"euc-kr", 6, "ksc5601", 7, MyHTML_ENCODING_EUC_KR, 0, 249},
@@ -336,7 +336,7 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{NULL, 0, NULL, 0, 0, 0, 0},
{"windows-1254", 12, "cp1254", 6, MyHTML_ENCODING_WINDOWS_1254, 470, 302},
{NULL, 0, NULL, 0, 0, 0, 0},
- {"iso-8859-6", 10, "iso-8859-6-e", 12, MyHTML_ENCODING_ISO_8859_6, 471, 304},
+ {"windows-1256", 12, "x-cp1256", 8, MyHTML_ENCODING_WINDOWS_1256, 471, 304},
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
{NULL, 0, NULL, 0, 0, 0, 0},
@@ -453,32 +453,32 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{"gbk", 3, "gb_2312-80", 10, MyHTML_ENCODING_GBK, 0, 418},
{"windows-1251", 12, "windows-1251", 12, MyHTML_ENCODING_WINDOWS_1251, 0, 419},
{"iso-8859-13", 11, "iso885913", 9, MyHTML_ENCODING_ISO_8859_13, 0, 420},
- {"gbk", 3, "gb_2312", 7, MyHTML_ENCODING_GBK, 0, 421},
- {"iso-8859-7", 10, "iso8859-7", 9, MyHTML_ENCODING_ISO_8859_7, 0, 422},
+ {"windows-1256", 12, "windows-1256", 12, MyHTML_ENCODING_WINDOWS_1256, 0, 421},
+ {"iso-8859-5", 10, "csisolatincyrillic", 18, MyHTML_ENCODING_ISO_8859_5, 0, 422},
{"gbk", 3, "chinese", 7, MyHTML_ENCODING_GBK, 0, 423},
{"shift_jis", 9, "shift_jis", 9, MyHTML_ENCODING_SHIFT_JIS, 0, 424},
{"windows-874", 11, "iso-8859-11", 11, MyHTML_ENCODING_WINDOWS_874, 0, 425},
{"iso-8859-15", 11, "iso_8859-15", 11, MyHTML_ENCODING_ISO_8859_15, 0, 426},
{"big5", 4, "csbig5", 6, MyHTML_ENCODING_BIG5, 0, 427},
- {"gbk", 3, "x-gbk", 5, MyHTML_ENCODING_GBK, 429, 428},
+ {"iso-8859-8", 10, "hebrew", 6, MyHTML_ENCODING_ISO_8859_8, 429, 428},
{"windows-1254", 12, "windows-1254", 12, MyHTML_ENCODING_WINDOWS_1254, 0, 429},
- {"shift_jis", 9, "sjis", 4, MyHTML_ENCODING_SHIFT_JIS, 0, 430},
+ {"iso-8859-5", 10, "iso88595", 8, MyHTML_ENCODING_ISO_8859_5, 0, 430},
{"iso-8859-10", 11, "iso885910", 9, MyHTML_ENCODING_ISO_8859_10, 0, 431},
{"windows-1254", 12, "iso_8859-9:1989", 15, MyHTML_ENCODING_WINDOWS_1254, 0, 432},
- {"macintosh", 9, "mac", 3, MyHTML_ENCODING_MACINTOSH, 434, 433},
- {"iso-8859-7", 10, "iso88597", 8, MyHTML_ENCODING_ISO_8859_7, 0, 434},
+ {"iso-8859-7", 10, "iso88597", 8, MyHTML_ENCODING_ISO_8859_7, 434, 433},
+ {"iso-8859-6", 10, "csisolatinarabic", 16, MyHTML_ENCODING_ISO_8859_6, 0, 434},
{"windows-1251", 12, "x-cp1251", 8, MyHTML_ENCODING_WINDOWS_1251, 0, 435},
{"iso-8859-14", 11, "iso885914", 9, MyHTML_ENCODING_ISO_8859_14, 0, 436},
{"iso-8859-10", 11, "iso8859-10", 10, MyHTML_ENCODING_ISO_8859_10, 438, 437},
{"windows-1252", 12, "iso-ir-100", 10, MyHTML_ENCODING_WINDOWS_1252, 0, 438},
{"iso-8859-2", 10, "iso_8859-2", 10, MyHTML_ENCODING_ISO_8859_2, 0, 439},
- {"windows-1253", 12, "cp1253", 6, MyHTML_ENCODING_WINDOWS_1253, 441, 440},
+ {"iso-8859-8", 10, "iso8859-8", 9, MyHTML_ENCODING_ISO_8859_8, 441, 440},
{"gbk", 3, "iso-ir-58", 9, MyHTML_ENCODING_GBK, 442, 441},
{"macintosh", 9, "csmacintosh", 11, MyHTML_ENCODING_MACINTOSH, 0, 442},
{"iso-8859-4", 10, "iso_8859-4", 10, MyHTML_ENCODING_ISO_8859_4, 444, 443},
{"iso-8859-5", 10, "iso-ir-144", 10, MyHTML_ENCODING_ISO_8859_5, 445, 444},
{"iso-8859-14", 11, "iso8859-14", 10, MyHTML_ENCODING_ISO_8859_14, 0, 445},
- {"windows-1254", 12, "l5", 2, MyHTML_ENCODING_WINDOWS_1254, 447, 446},
+ {"iso-8859-6", 10, "iso-8859-6", 10, MyHTML_ENCODING_ISO_8859_6, 447, 446},
{"iso-8859-6", 10, "iso_8859-6", 10, MyHTML_ENCODING_ISO_8859_6, 448, 447},
{"iso-8859-7", 10, "iso-ir-126", 10, MyHTML_ENCODING_ISO_8859_7, 0, 448},
{"iso-8859-8", 10, "iso-ir-138", 10, MyHTML_ENCODING_ISO_8859_8, 450, 449},
@@ -493,17 +493,17 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{"windows-874", 11, "iso885911", 9, MyHTML_ENCODING_WINDOWS_874, 459, 458},
{"windows-1252", 12, "iso8859-1", 9, MyHTML_ENCODING_WINDOWS_1252, 0, 459},
{"iso-8859-15", 11, "iso885915", 9, MyHTML_ENCODING_ISO_8859_15, 0, 460},
- {"windows-1252", 12, "ascii", 5, MyHTML_ENCODING_WINDOWS_1252, 0, 461},
+ {"macintosh", 9, "x-mac-roman", 11, MyHTML_ENCODING_MACINTOSH, 0, 461},
{"koi8-r", 6, "cskoi8r", 7, MyHTML_ENCODING_KOI8_R, 463, 462},
{"euc-kr", 6, "cseuckr", 7, MyHTML_ENCODING_EUC_KR, 0, 463},
{"x-mac-cyrillic", 14, "x-mac-ukrainian", 15, MyHTML_ENCODING_X_MAC_CYRILLIC, 0, 464},
{"windows-1252", 12, "x-cp1252", 8, MyHTML_ENCODING_WINDOWS_1252, 0, 465},
- {"big5", 4, "big5", 4, MyHTML_ENCODING_BIG5, 0, 466},
+ {"iso-8859-13", 11, "iso-8859-13", 11, MyHTML_ENCODING_ISO_8859_13, 0, 466},
{"gb18030", 7, "gb18030", 7, MyHTML_ENCODING_GB18030, 0, 467},
{"utf-16le", 8, "utf-16le", 8, MyHTML_ENCODING_UTF_16LE, 0, 468},
{"koi8-r", 6, "koi8_r", 6, MyHTML_ENCODING_KOI8_R, 0, 469},
{"shift_jis", 9, "csshiftjis", 10, MyHTML_ENCODING_SHIFT_JIS, 0, 470},
- {"windows-1256", 12, "x-cp1256", 8, MyHTML_ENCODING_WINDOWS_1256, 472, 471},
+ {"iso-8859-6", 10, "iso-8859-6-e", 12, MyHTML_ENCODING_ISO_8859_6, 472, 471},
{"iso-8859-8", 10, "iso-8859-8-e", 12, MyHTML_ENCODING_ISO_8859_8, 0, 472},
{"iso-8859-6", 10, "iso_8859-6:1987", 15, MyHTML_ENCODING_ISO_8859_6, 474, 473},
{"iso-8859-7", 10, "iso_8859-7:1987", 15, MyHTML_ENCODING_ISO_8859_7, 475, 474},
@@ -528,6 +528,52 @@ static const myhtml_encoding_detect_name_entry_t myhtml_encoding_detect_name_ent
{"iso-8859-7", 10, "elot_928", 8, MyHTML_ENCODING_ISO_8859_7, 0, 493},
};
+static const myhtml_encoding_entry_name_index_t myhtml_encoding_entry_name_index_static_list_index[(MyHTML_ENCODING_LAST_ENTRY + 1)] =
+{
+ {"UTF-8", 5},
+ {"AUTO", 4},
+ {"NOT-DETERMINED", 14},
+ {"UTF-16LE", 8},
+ {"UTF-16BE", 8},
+ {"X-USER-DEFINED", 14},
+ {"BIG5", 4},
+ {"EUC-JP", 6},
+ {"EUC-KR", 6},
+ {"GB18030", 7},
+ {"GBK", 3},
+ {"IBM866", 6},
+ {"ISO-2022-JP", 11},
+ {"ISO-8859-10", 11},
+ {"ISO-8859-13", 11},
+ {"ISO-8859-14", 11},
+ {"ISO-8859-15", 11},
+ {"ISO-8859-16", 11},
+ {"ISO-8859-2", 10},
+ {"ISO-8859-3", 10},
+ {"ISO-8859-4", 10},
+ {"ISO-8859-5", 10},
+ {"ISO-8859-6", 10},
+ {"ISO-8859-7", 10},
+ {"ISO-8859-8", 10},
+ {"ISO-8859-8-I", 12},
+ {"KOI8-R", 6},
+ {"KOI8-U", 6},
+ {"MACINTOSH", 9},
+ {"SHIFT_JIS", 9},
+ {"WINDOWS-1250", 12},
+ {"WINDOWS-1251", 12},
+ {"WINDOWS-1252", 12},
+ {"WINDOWS-1253", 12},
+ {"WINDOWS-1254", 12},
+ {"WINDOWS-1255", 12},
+ {"WINDOWS-1256", 12},
+ {"WINDOWS-1257", 12},
+ {"WINDOWS-1258", 12},
+ {"WINDOWS-874", 11},
+ {"X-MAC-CYRILLIC", 14},
+ {"", 0},
+};
+
static const myhtml_encoding_trigram_t myhtml_encoding_detect_trigrams_index_ibm866[] = {
{{0xA5, 0xAD, 0xA8}, 991}, /* ени */
{{0xAE, 0xA3, 0xAE}, 806}, /* ого */
diff --git a/source/myhtml/myhtml.c b/source/myhtml/myhtml.c
index 6b3bbd2..767603e 100644
--- a/source/myhtml/myhtml.c
+++ b/source/myhtml/myhtml.c
@@ -369,48 +369,50 @@ myhtml_collection_t * myhtml_get_nodes_by_name_in_scope(myhtml_tree_t* tree, myh
myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tag_id_t tag_id, myhtml_status_t *status)
{
- myhtml_tag_index_entry_t *index_tag = myhtml_tag_index_entry(tree->indexes->tags, tag_id);
- myhtml_tag_index_node_t *index_node = myhtml_tag_index_first(tree->indexes->tags, tag_id);
-
- if(index_tag->count == 0) {
- if(status)
- *status = MyHTML_STATUS_OK;
-
- return collection;
- }
-
- myhtml_status_t mystatus = MyHTML_STATUS_OK;
- size_t idx = 0;
-
if(collection == NULL) {
- collection = myhtml_collection_create((index_tag->count + 128), &mystatus);
+ collection = myhtml_collection_create(1024, NULL);
- collection->length += index_tag->count;
- }
- else {
- idx = collection->length;
- mystatus = myhtml_collection_check_size(collection, index_tag->count, 128);
+ if(collection == NULL)
+ return NULL;
}
- if(mystatus) {
- if(status)
- *status = mystatus;
-
- return collection;
- }
+ myhtml_tree_node_t *node = tree->node_html;
- while (index_node)
+ while(node)
{
- collection->list[idx] = index_node->node;
- idx++;
+ if(node->tag_id == tag_id)
+ {
+ if(myhtml_collection_check_size(collection, 1, 1024) == MyHTML_STATUS_OK) {
+ collection->list[ collection->length ] = node;
+ collection->length++;
+ }
+ else {
+ if(status)
+ *status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
+
+ return collection;
+ }
+ }
- index_node = index_node->next;
+ if(node->child)
+ node = node->child;
+ else {
+ while(node != tree->node_html && node->next == NULL)
+ node = node->parent;
+
+ if(node == tree->node_html)
+ break;
+
+ node = node->next;
+ }
}
- collection->list[idx] = NULL;
-
- if(status)
- *status = mystatus;
+ if(myhtml_collection_check_size(collection, 1, 1024) == MyHTML_STATUS_OK) {
+ collection->list[ collection->length ] = NULL;
+ }
+ else if(status) {
+ *status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
+ }
return collection;
}
@@ -478,73 +480,73 @@ myhtml_tree_node_t * myhtml_node_create(myhtml_tree_t* tree, myhtml_tag_id_t tag
return node;
}
-myhtml_tree_node_t * myhtml_node_remove(myhtml_tree_t* tree, myhtml_tree_node_t *node)
+myhtml_tree_node_t * myhtml_node_remove(myhtml_tree_node_t *node)
{
- return myhtml_tree_node_remove(tree, node);
+ return myhtml_tree_node_remove(node);
}
-void myhtml_node_delete(myhtml_tree_t* tree, myhtml_tree_node_t *node)
+void myhtml_node_delete(myhtml_tree_node_t *node)
{
- myhtml_tree_node_delete(tree, node);
+ myhtml_tree_node_delete(node);
}
-void myhtml_node_delete_recursive(myhtml_tree_t* tree, myhtml_tree_node_t *node)
+void myhtml_node_delete_recursive(myhtml_tree_node_t *node)
{
- myhtml_tree_node_delete_recursive(tree, node);
+ myhtml_tree_node_delete_recursive(node);
}
-void myhtml_node_free(myhtml_tree_t* tree, myhtml_tree_node_t *node)
+void myhtml_node_free(myhtml_tree_node_t *node)
{
- myhtml_tree_node_free(tree, node);
+ myhtml_tree_node_free(node);
}
-myhtml_tree_node_t * myhtml_node_insert_before(myhtml_tree_t* tree, myhtml_tree_node_t *target, myhtml_tree_node_t *node)
+myhtml_tree_node_t * myhtml_node_insert_before(myhtml_tree_node_t *target, myhtml_tree_node_t *node)
{
if(target == NULL || node == NULL)
return NULL;
- myhtml_tree_node_insert_before(tree, target, node);
+ myhtml_tree_node_insert_before(target, node);
return node;
}
-myhtml_tree_node_t * myhtml_node_insert_after(myhtml_tree_t* tree, myhtml_tree_node_t *target, myhtml_tree_node_t *node)
+myhtml_tree_node_t * myhtml_node_insert_after(myhtml_tree_node_t *target, myhtml_tree_node_t *node)
{
if(target == NULL || node == NULL)
return NULL;
- myhtml_tree_node_insert_after(tree, target, node);
+ myhtml_tree_node_insert_after(target, node);
return node;
}
-myhtml_tree_node_t * myhtml_node_append_child(myhtml_tree_t* tree, myhtml_tree_node_t *target, myhtml_tree_node_t *node)
+myhtml_tree_node_t * myhtml_node_append_child(myhtml_tree_node_t *target, myhtml_tree_node_t *node)
{
if(target == NULL || node == NULL)
return NULL;
- myhtml_tree_node_add_child(tree, target, node);
+ myhtml_tree_node_add_child(target, node);
return node;
}
-myhtml_tree_node_t * myhtml_node_insert_to_appropriate_place(myhtml_tree_t* tree, myhtml_tree_node_t *target, myhtml_tree_node_t *node)
+myhtml_tree_node_t * myhtml_node_insert_to_appropriate_place(myhtml_tree_node_t *target, myhtml_tree_node_t *node)
{
if(target == NULL || node == NULL)
return NULL;
enum myhtml_tree_insertion_mode mode;
- tree->foster_parenting = true;
- target = myhtml_tree_appropriate_place_inserting_in_tree(tree, target, &mode);
- tree->foster_parenting = false;
+ target->tree->foster_parenting = true;
+ target = myhtml_tree_appropriate_place_inserting_in_tree(target, &mode);
+ target->tree->foster_parenting = false;
- myhtml_tree_node_insert_by_mode(tree, target, node, mode);
+ myhtml_tree_node_insert_by_mode(target, node, mode);
return node;
}
-myhtml_string_t * myhtml_node_text_set(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding)
+myhtml_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding)
{
if(node == NULL)
return NULL;
@@ -552,14 +554,13 @@ myhtml_string_t * myhtml_node_text_set(myhtml_tree_t* tree, myhtml_tree_node_t *
if(encoding >= MyHTML_ENCODING_LAST_ENTRY)
return NULL;
+ myhtml_tree_t* tree = node->tree;
+
if(node->token == NULL) {
- mcobject_async_status_t mcstatus;
- node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
+ node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
- if(mcstatus)
+ if(node->token == NULL)
return NULL;
-
- myhtml_token_node_clean(node->token);
}
if(node->token->str.data == NULL) {
@@ -587,7 +588,7 @@ myhtml_string_t * myhtml_node_text_set(myhtml_tree_t* tree, myhtml_tree_node_t *
return &node->token->str;
}
-myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding)
+myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding)
{
if(node == NULL)
return NULL;
@@ -595,14 +596,13 @@ myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_t
if(encoding >= MyHTML_ENCODING_LAST_ENTRY)
return NULL;
+ myhtml_tree_t* tree = node->tree;
+
if(node->token == NULL) {
- mcobject_async_status_t mcstatus;
- node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
+ node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
- if(mcstatus)
+ if(node->token == NULL)
return NULL;
-
- myhtml_token_node_clean(node->token);
}
if(node->token->str.data == NULL) {
@@ -660,15 +660,15 @@ const char * myhtml_tag_name_by_id(myhtml_tree_t* tree, myhtml_tag_id_t tag_id,
if(tree == NULL || tree->tags == NULL)
return NULL;
- const myhtml_tag_context_t *ctx = myhtml_tag_get_by_id(tree->tags, tag_id);
+ const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(tree->tags, tag_id);
- if(ctx == NULL)
+ if(tag_ctx == NULL)
return NULL;
if(length)
- *length = ctx->name_length;
+ *length = tag_ctx->name_length;
- return ctx->name;
+ return tag_ctx->name;
}
myhtml_tag_id_t myhtml_tag_id_by_name(myhtml_tree_t* tree, const char *tag_name, size_t length)
@@ -758,7 +758,12 @@ void * myhtml_node_get_data(myhtml_tree_node_t *node)
return node->data;
}
-myhtml_status_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_t *tree, myhtml_tree_node_t* node, myhtml_collection_t* collection, const char* key, size_t key_len)
+myhtml_tree_t * myhtml_node_tree(myhtml_tree_node_t *node)
+{
+ return node->tree;
+}
+
+myhtml_status_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_node_t* node, myhtml_collection_t* collection, const char* key, size_t key_len)
{
while(node)
{
@@ -785,7 +790,7 @@ myhtml_status_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_t *tree,
}
if(node->child) {
- myhtml_status_t status = myhtml_get_nodes_by_attribute_key_recursion(tree, node->child, collection, key, key_len);
+ myhtml_status_t status = myhtml_get_nodes_by_attribute_key_recursion(node->child, collection, key, key_len);
if(status)
return status;
@@ -809,7 +814,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myh
if(scope_node == NULL)
scope_node = tree->node_html;
- myhtml_status_t rec_status = myhtml_get_nodes_by_attribute_key_recursion(tree, scope_node, collection, key, key_len);
+ myhtml_status_t rec_status = myhtml_get_nodes_by_attribute_key_recursion(scope_node, collection, key, key_len);
if(rec_status && status)
*status = rec_status;
@@ -978,8 +983,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i(myhtml_str
}
/* find by attribute value; basic functions */
-myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_t *tree, myhtml_tree_node_t* node,
- myhtml_collection_t* collection,
+myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_node_t* node, myhtml_collection_t* collection,
myhtml_attribute_value_find_f func_eq,
const char* value, size_t value_len)
{
@@ -1008,7 +1012,7 @@ myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_t *tre
}
if(node->child) {
- myhtml_status_t status = myhtml_get_nodes_by_attribute_value_recursion(tree, node->child, collection, func_eq, value, value_len);
+ myhtml_status_t status = myhtml_get_nodes_by_attribute_value_recursion(node->child, collection, func_eq, value, value_len);
if(status)
return status;
@@ -1020,8 +1024,7 @@ myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_t *tre
return MyHTML_STATUS_OK;
}
-myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion_by_key(myhtml_tree_t *tree, myhtml_tree_node_t* node,
- myhtml_collection_t* collection,
+myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion_by_key(myhtml_tree_node_t* node, myhtml_collection_t* collection,
myhtml_attribute_value_find_f func_eq,
const char* key, size_t key_len,
const char* value, size_t value_len)
@@ -1055,7 +1058,7 @@ myhtml_status_t myhtml_get_nodes_by_attribute_value_recursion_by_key(myhtml_tree
}
if(node->child) {
- myhtml_status_t status = myhtml_get_nodes_by_attribute_value_recursion_by_key(tree, node->child, collection, func_eq,
+ myhtml_status_t status = myhtml_get_nodes_by_attribute_value_recursion_by_key(node->child, collection, func_eq,
key, key_len, value, value_len);
if(status)
@@ -1087,9 +1090,9 @@ myhtml_collection_t * _myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree,
myhtml_status_t rec_status;
if(key && key_len)
- rec_status = myhtml_get_nodes_by_attribute_value_recursion_by_key(tree, node, collection, func_eq, key, key_len, value, value_len);
+ rec_status = myhtml_get_nodes_by_attribute_value_recursion_by_key(node, collection, func_eq, key, key_len, value, value_len);
else
- rec_status = myhtml_get_nodes_by_attribute_value_recursion(tree, node, collection, func_eq, value, value_len);
+ rec_status = myhtml_get_nodes_by_attribute_value_recursion(node, collection, func_eq, value, value_len);
if(rec_status && status)
*status = rec_status;
@@ -1278,23 +1281,22 @@ myhtml_tree_attr_t * myhtml_attribute_by_key(myhtml_tree_node_t *node, const cha
return myhtml_token_attr_by_name(node->token, key, key_len);
}
-myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_t *tree, myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, myhtml_encoding_t encoding)
+myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, myhtml_encoding_t encoding)
{
if(node == NULL)
return NULL;
+ myhtml_tree_t *tree = node->tree;
+
if(node->token == NULL) {
- mcobject_async_status_t mcstatus;
- node->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
+ node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
- if(mcstatus)
+ if(node->token == NULL)
return NULL;
-
- myhtml_token_node_clean(node->token);
}
return myhtml_token_node_attr_append_with_convert_encoding(tree->token, node->token, key, key_len,
- value, value_len, tree->mcasync_token_id, encoding);
+ value, value_len, tree->mcasync_rules_token_id, encoding);
}
myhtml_tree_attr_t * myhtml_attribute_remove(myhtml_tree_node_t *node, myhtml_tree_attr_t *attr)
@@ -1389,9 +1391,8 @@ myhtml_status_t myhtml_collection_check_size(myhtml_collection_t *collection, si
myhtml_tree_node_t **tmp = (myhtml_tree_node_t **)myhtml_realloc(collection->list, sizeof(myhtml_tree_node_t*) * tmp_size);
if(tmp) {
- collection->length = upto_length;
- collection->size = tmp_size;
- collection->list = tmp;
+ collection->size = tmp_size;
+ collection->list = tmp;
}
else
return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
@@ -1420,8 +1421,10 @@ myhtml_collection_t * myhtml_collection_destroy(myhtml_collection_t *collection)
}
/* queue */
-void myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* token)
+myhtml_status_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* token)
{
+ // TODO: need refactoring this code
+ // too many conditions
mythread_queue_node_t *qnode = tree->current_qnode;
if(tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN) {
@@ -1430,14 +1433,14 @@ void myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* to
myhtml_token_node_clean(token);
token->raw_begin = token->element_begin = (tree->global_offset + begin);
- return;
+ return MyHTML_STATUS_OK;
}
}
#ifndef MyHTML_BUILD_WITHOUT_THREADS
if(tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) {
- if(qnode) {
+ if(qnode && token) {
qnode->token = token;
myhtml_parser_worker(0, qnode);
@@ -1455,7 +1458,7 @@ void myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* to
#else
- f(qnode) {
+ if(qnode && token) {
qnode->token = token;
myhtml_parser_worker(0, qnode);
@@ -1467,8 +1470,7 @@ void myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* to
#endif /* MyHTML_BUILD_WITHOUT_THREADS */
if(tree->current_qnode == NULL) {
- // TODO: add return status
- return;
+ return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
}
tree->current_qnode->tree = tree;
@@ -1477,10 +1479,14 @@ void myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* to
if(qnode && token)
myhtml_tokenizer_calc_current_namespace(tree, token);
- // TODO: add check created node
- myhtml_token_node_malloc(tree->token, tree->current_token_node, tree->token->mcasync_token_id);
+ tree->current_token_node = myhtml_token_node_create(tree->token, tree->token->mcasync_token_id);
+
+ if(tree->current_token_node == NULL)
+ return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
tree->current_token_node->raw_begin = tree->current_token_node->element_begin = (tree->global_offset + begin);
+
+ return MyHTML_STATUS_OK;
}
bool myhtml_utils_strcmp(const char* ab, const char* to_lowercase, size_t size)
diff --git a/source/myhtml/myhtml.h b/source/myhtml/myhtml.h
index 1cc2721..9a049e2 100644
--- a/source/myhtml/myhtml.h
+++ b/source/myhtml/myhtml.h
@@ -153,16 +153,16 @@ myhtml_tree_node_t * myhtml_node_parent(myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_child(myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_last_child(myhtml_tree_node_t *node);
-myhtml_tree_node_t * myhtml_node_insert_to_appropriate_place(myhtml_tree_t* tree, myhtml_tree_node_t *target, myhtml_tree_node_t *node);
-myhtml_tree_node_t * myhtml_node_append_child(myhtml_tree_t* tree, myhtml_tree_node_t *target, myhtml_tree_node_t *node);
-myhtml_tree_node_t * myhtml_node_insert_after(myhtml_tree_t* tree, myhtml_tree_node_t *target, myhtml_tree_node_t *node);
-myhtml_tree_node_t * myhtml_node_insert_before(myhtml_tree_t* tree, myhtml_tree_node_t *target, myhtml_tree_node_t *node);
+myhtml_tree_node_t * myhtml_node_insert_to_appropriate_place(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
+myhtml_tree_node_t * myhtml_node_append_child(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
+myhtml_tree_node_t * myhtml_node_insert_after(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
+myhtml_tree_node_t * myhtml_node_insert_before(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_create(myhtml_tree_t* tree, myhtml_tag_id_t tag_id, enum myhtml_namespace ns);
-myhtml_tree_node_t * myhtml_node_remove(myhtml_tree_t* tree, myhtml_tree_node_t *node);
-void myhtml_node_delete(myhtml_tree_t* tree, myhtml_tree_node_t *node);
-void myhtml_node_delete_recursive(myhtml_tree_t* tree, myhtml_tree_node_t *node);
-void myhtml_node_free(myhtml_tree_t* tree, myhtml_tree_node_t *node);
+myhtml_tree_node_t * myhtml_node_remove(myhtml_tree_node_t *node);
+void myhtml_node_delete(myhtml_tree_node_t *node);
+void myhtml_node_delete_recursive(myhtml_tree_node_t *node);
+void myhtml_node_free(myhtml_tree_node_t *node);
myhtml_token_node_t* myhtml_node_token(myhtml_tree_node_t *node);
myhtml_namespace_t myhtml_node_namespace(myhtml_tree_node_t *node);
@@ -191,7 +191,7 @@ const char * myhtml_attribute_value(myhtml_tree_attr_t *attr, size_t *length);
myhtml_string_t * myhtml_attribute_key_string(myhtml_tree_attr_t* attr);
myhtml_string_t * myhtml_attribute_value_string(myhtml_tree_attr_t* attr);
myhtml_tree_attr_t * myhtml_attribute_by_key(myhtml_tree_node_t *node, const char *key, size_t key_len);
-myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_t *tree, myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, myhtml_encoding_t encoding);
+myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, myhtml_encoding_t encoding);
myhtml_tree_attr_t * myhtml_attribute_remove(myhtml_tree_node_t *node, myhtml_tree_attr_t *attr);
myhtml_tree_attr_t * myhtml_attribute_remove_by_key(myhtml_tree_node_t *node, const char *key, size_t key_len);
void myhtml_attribute_delete(myhtml_tree_t *tree, myhtml_tree_node_t *node, myhtml_tree_attr_t *attr);
@@ -206,14 +206,14 @@ myhtml_collection_t * myhtml_collection_destroy(myhtml_collection_t *collection)
myhtml_status_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t need, size_t upto_length);
// strings
-myhtml_string_t * myhtml_node_text_set(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding);
-myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_t* tree, myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding);
+myhtml_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding);
+myhtml_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, myhtml_encoding_t encoding);
bool myhtml_utils_strcmp(const char* ab, const char* to_lowercase, size_t size);
bool myhtml_is_html_node(myhtml_tree_node_t *node, myhtml_tag_id_t tag_id);
// queue
-void myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* token);
+myhtml_status_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* token);
/**
* Platform-specific hdef performance clock queries.
diff --git a/source/myhtml/mynamespace.c b/source/myhtml/mynamespace.c
index 082919f..596b518 100644
--- a/source/myhtml/mynamespace.c
+++ b/source/myhtml/mynamespace.c
@@ -82,6 +82,24 @@ const char * myhtml_namespace_name_by_id(myhtml_namespace_t ns, size_t *length)
return name_ns;
}
+const char * myhtml_namespace_url_by_id(myhtml_namespace_t ns, size_t *length)
+{
+ if(ns > MyHTML_NAMESPACE_LAST_ENTRY) {
+ if(length)
+ *length = 0;
+
+ return NULL;
+ }
+
+ const myhtml_namespace_detect_url_entry_t *url_entry = &myhtml_namespace_detect_url_entry_static_list_index[ns];
+
+ if(length) {
+ *length = url_entry->url_length;
+ }
+
+ return url_entry->url;
+}
+
myhtml_namespace_t myhtml_namespace_id_by_url(const char *url, size_t length)
{
size_t i = 0;
diff --git a/source/myhtml/mynamespace.h b/source/myhtml/mynamespace.h
index c0daa5e..0d8d0e9 100644
--- a/source/myhtml/mynamespace.h
+++ b/source/myhtml/mynamespace.h
@@ -54,6 +54,7 @@ typedef myhtml_namespace_detect_url_entry_t;
const char * myhtml_namespace_name_by_id(myhtml_namespace_t ns, size_t *length);
bool myhtml_namespace_id_by_name(const char *name, size_t length, myhtml_namespace_t *ns);
+const char * myhtml_namespace_url_by_id(myhtml_namespace_t ns, size_t *length);
myhtml_namespace_t myhtml_namespace_id_by_url(const char *url, size_t length);
const myhtml_namespace_detect_name_entry_t * myhtml_namespace_name_entry_by_name(const char* name, size_t length);
diff --git a/source/myhtml/mynamespace_resource.h b/source/myhtml/mynamespace_resource.h
index 8dfdad2..e584cb7 100644
--- a/source/myhtml/mynamespace_resource.h
+++ b/source/myhtml/mynamespace_resource.h
@@ -46,11 +46,12 @@ static const myhtml_namespace_detect_name_entry_t myhtml_namespace_detect_name_e
{"XLINK", 5, MyHTML_NAMESPACE_XLINK, 0, 19},
};
-static const char myhtml_namespace_resource_names_map[][8] = {
+static const char myhtml_namespace_resource_names_map[(MyHTML_NAMESPACE_LAST_ENTRY + 1)][8] = {
"", "HTML", "MathML", "SVG", "XLink", "XML", "XMLNS", "*"
};
-static const myhtml_namespace_detect_url_entry_t myhtml_namespace_detect_url_entry_static_list_index[] = {
+static const myhtml_namespace_detect_url_entry_t myhtml_namespace_detect_url_entry_static_list_index[(MyHTML_NAMESPACE_LAST_ENTRY + 1)] = {
+ {"undef", 5, MyHTML_NAMESPACE_UNDEF},
{"http://www.w3.org/1999/xhtml", 28, MyHTML_NAMESPACE_HTML},
{"http://www.w3.org/1998/Math/MathML", 34, MyHTML_NAMESPACE_MATHML},
{"http://www.w3.org/2000/svg", 26, MyHTML_NAMESPACE_SVG},
diff --git a/source/myhtml/myosi.h b/source/myhtml/myosi.h
index 4c516a8..3901fae 100644
--- a/source/myhtml/myosi.h
+++ b/source/myhtml/myosi.h
@@ -29,9 +29,9 @@
#include <stdint.h>
#include <stdarg.h>
-#define MyHTML_VERSION_MAJOR 1
+#define MyHTML_VERSION_MAJOR 3
#define MyHTML_VERSION_MINOR 0
-#define MyHTML_VERSION_PATCH 5
+#define MyHTML_VERSION_PATCH 0
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__WINPTHREADS_VERSION)
#define IS_OS_WINDOWS
@@ -39,11 +39,11 @@
#endif
#if defined(_MSC_VER)
-# define MyHTML_DEPRECATED(func, message) __declspec(deprecated(message)) func
+# define MyHTML_DEPRECATED(func) __declspec(deprecated) func
#elif defined(__GNUC__) || defined(__INTEL_COMPILER)
-# define MyHTML_DEPRECATED(func, message) func __attribute__((deprecated(message)))
+# define MyHTML_DEPRECATED(func) func __attribute__((deprecated))
#else
-# define MyHTML_DEPRECATED(func, message) func
+# define MyHTML_DEPRECATED(func) func
#endif
#ifdef __cplusplus
@@ -114,52 +114,60 @@ extern "C" {
// https://encoding.spec.whatwg.org/#legacy-miscellaneous-encodings
enum myhtml_encoding_list {
- MyHTML_ENCODING_DEFAULT = 0x00,
-// MyHTML_ENCODING_AUTO = 0x01, // future
-// MyHTML_ENCODING_CUSTOM = 0x02, // future
- MyHTML_ENCODING_UTF_8 = 0x00, // default encoding
- MyHTML_ENCODING_UTF_16LE = 0x04,
- MyHTML_ENCODING_UTF_16BE = 0x05,
- MyHTML_ENCODING_X_USER_DEFINED = 0x06,
- MyHTML_ENCODING_BIG5 = 0x07,
- MyHTML_ENCODING_EUC_KR = 0x08,
- MyHTML_ENCODING_GB18030 = 0x09,
- MyHTML_ENCODING_IBM866 = 0x0a,
- MyHTML_ENCODING_ISO_8859_10 = 0x0b,
- MyHTML_ENCODING_ISO_8859_13 = 0x0c,
- MyHTML_ENCODING_ISO_8859_14 = 0x0d,
- MyHTML_ENCODING_ISO_8859_15 = 0x0e,
- MyHTML_ENCODING_ISO_8859_16 = 0x0f,
- MyHTML_ENCODING_ISO_8859_2 = 0x10,
- MyHTML_ENCODING_ISO_8859_3 = 0x11,
- MyHTML_ENCODING_ISO_8859_4 = 0x12,
- MyHTML_ENCODING_ISO_8859_5 = 0x13,
- MyHTML_ENCODING_ISO_8859_6 = 0x14,
- MyHTML_ENCODING_ISO_8859_7 = 0x15,
- MyHTML_ENCODING_ISO_8859_8 = 0x16,
- MyHTML_ENCODING_KOI8_R = 0x17,
- MyHTML_ENCODING_KOI8_U = 0x18,
- MyHTML_ENCODING_MACINTOSH = 0x19,
- MyHTML_ENCODING_WINDOWS_1250 = 0x1a,
- MyHTML_ENCODING_WINDOWS_1251 = 0x1b,
- MyHTML_ENCODING_WINDOWS_1252 = 0x1c,
- MyHTML_ENCODING_WINDOWS_1253 = 0x1d,
- MyHTML_ENCODING_WINDOWS_1254 = 0x1e,
- MyHTML_ENCODING_WINDOWS_1255 = 0x1f,
- MyHTML_ENCODING_WINDOWS_1256 = 0x20,
- MyHTML_ENCODING_WINDOWS_1257 = 0x21,
- MyHTML_ENCODING_WINDOWS_1258 = 0x22,
- MyHTML_ENCODING_WINDOWS_874 = 0x23,
- MyHTML_ENCODING_X_MAC_CYRILLIC = 0x24,
- MyHTML_ENCODING_ISO_2022_JP = 0x25,
- MyHTML_ENCODING_GBK = 0x26,
- MyHTML_ENCODING_SHIFT_JIS = 0x27,
- MyHTML_ENCODING_EUC_JP = 0x28,
- MyHTML_ENCODING_ISO_8859_8_I = 0x29,
- MyHTML_ENCODING_LAST_ENTRY = 0x2a
+ MyHTML_ENCODING_DEFAULT = 0x00,
+// MyHTML_ENCODING_AUTO = 0x01, // future
+ MyHTML_ENCODING_NOT_DETERMINED = 0x02,
+ MyHTML_ENCODING_UTF_8 = 0x00, // default encoding
+ MyHTML_ENCODING_UTF_16LE = 0x04,
+ MyHTML_ENCODING_UTF_16BE = 0x05,
+ MyHTML_ENCODING_X_USER_DEFINED = 0x06,
+ MyHTML_ENCODING_BIG5 = 0x07,
+ MyHTML_ENCODING_EUC_JP = 0x08,
+ MyHTML_ENCODING_EUC_KR = 0x09,
+ MyHTML_ENCODING_GB18030 = 0x0a,
+ MyHTML_ENCODING_GBK = 0x0b,
+ MyHTML_ENCODING_IBM866 = 0x0c,
+ MyHTML_ENCODING_ISO_2022_JP = 0x0d,
+ MyHTML_ENCODING_ISO_8859_10 = 0x0e,
+ MyHTML_ENCODING_ISO_8859_13 = 0x0f,
+ MyHTML_ENCODING_ISO_8859_14 = 0x10,
+ MyHTML_ENCODING_ISO_8859_15 = 0x11,
+ MyHTML_ENCODING_ISO_8859_16 = 0x12,
+ MyHTML_ENCODING_ISO_8859_2 = 0x13,
+ MyHTML_ENCODING_ISO_8859_3 = 0x14,
+ MyHTML_ENCODING_ISO_8859_4 = 0x15,
+ MyHTML_ENCODING_ISO_8859_5 = 0x16,
+ MyHTML_ENCODING_ISO_8859_6 = 0x17,
+ MyHTML_ENCODING_ISO_8859_7 = 0x18,
+ MyHTML_ENCODING_ISO_8859_8 = 0x19,
+ MyHTML_ENCODING_ISO_8859_8_I = 0x1a,
+ MyHTML_ENCODING_KOI8_R = 0x1b,
+ MyHTML_ENCODING_KOI8_U = 0x1c,
+ MyHTML_ENCODING_MACINTOSH = 0x1d,
+ MyHTML_ENCODING_SHIFT_JIS = 0x1e,
+ MyHTML_ENCODING_WINDOWS_1250 = 0x1f,
+ MyHTML_ENCODING_WINDOWS_1251 = 0x20,
+ MyHTML_ENCODING_WINDOWS_1252 = 0x21,
+ MyHTML_ENCODING_WINDOWS_1253 = 0x22,
+ MyHTML_ENCODING_WINDOWS_1254 = 0x23,
+ MyHTML_ENCODING_WINDOWS_1255 = 0x24,
+ MyHTML_ENCODING_WINDOWS_1256 = 0x25,
+ MyHTML_ENCODING_WINDOWS_1257 = 0x26,
+ MyHTML_ENCODING_WINDOWS_1258 = 0x27,
+ MyHTML_ENCODING_WINDOWS_874 = 0x28,
+ MyHTML_ENCODING_X_MAC_CYRILLIC = 0x29,
+ MyHTML_ENCODING_LAST_ENTRY = 0x2a
}
typedef myhtml_encoding_t;
+typedef struct myhtml_encoding_result myhtml_encoding_result_t;
+typedef struct myhtml_encoding_trigram myhtml_encoding_trigram_t;
+typedef struct myhtml_encoding_trigram_result myhtml_encoding_trigram_result_t;
+typedef struct myhtml_encoding_unicode_result myhtml_encoding_unicode_result_t;
+typedef struct myhtml_encoding_detect_name_entry myhtml_encoding_detect_name_entry_t;
+typedef struct myhtml_encoding_detect_attr myhtml_encoding_detect_attr_t;
+typedef struct myhtml_encoding_entry_name_index myhtml_encoding_entry_name_index_t;
+
// char references
typedef struct myhtml_data_process_entry myhtml_data_process_entry_t;
@@ -207,7 +215,7 @@ enum myhtml_tree_parse_flags {
MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003,
MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, /* skip ws token, but not for RCDATA, RAWTEXT, CDATA and PLAINTEXT */
- MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008,
+ MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008
}
typedef myhtml_tree_parse_flags_t;
@@ -215,7 +223,6 @@ typedef struct myhtml_tree_temp_tag_name myhtml_tree_temp_tag_name_t;
typedef struct myhtml_tree_insertion_list myhtml_tree_insertion_list_t;
typedef struct myhtml_tree_token_list myhtml_tree_token_list_t;
typedef struct myhtml_tree_list myhtml_tree_list_t;
-typedef struct myhtml_tree_indexes myhtml_tree_indexes_t;
typedef struct myhtml_tree_doctype myhtml_tree_doctype_t;
typedef struct myhtml_async_args myhtml_async_args_t;
@@ -316,7 +323,7 @@ enum myhtml_tokenizer_state {
MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED = 0x026,
MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_UNQUOTED = 0x027,
MyHTML_TOKENIZER_STATE_CHARACTER_REFERENCE_IN_ATTRIBUTE_VALUE = 0x028,
- MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE__QUOTED = 0x029,
+ MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED = 0x029,
MyHTML_TOKENIZER_STATE_SELF_CLOSING_START_TAG = 0x02a,
MyHTML_TOKENIZER_STATE_BOGUS_COMMENT = 0x02b,
MyHTML_TOKENIZER_STATE_MARKUP_DECLARATION_OPEN = 0x02c,
@@ -344,8 +351,9 @@ enum myhtml_tokenizer_state {
MyHTML_TOKENIZER_STATE_BOGUS_DOCTYPE = 0x042,
MyHTML_TOKENIZER_STATE_CDATA_SECTION = 0x043,
MyHTML_TOKENIZER_STATE_CUSTOM_AFTER_DOCTYPE_NAME_A_Z = 0x044,
+ MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP = 0x045,
MyHTML_TOKENIZER_STATE_FIRST_ENTRY = MyHTML_TOKENIZER_STATE_DATA,
- MyHTML_TOKENIZER_STATE_LAST_ENTRY = 0x045
+ MyHTML_TOKENIZER_STATE_LAST_ENTRY = 0x046
};
enum myhtml_insertion_mode {
@@ -499,6 +507,8 @@ typedef void (*myhtml_callback_tree_node_f)(myhtml_tree_t* tree, myhtml_tree_nod
// find attribute value functions
typedef bool (*myhtml_attribute_value_find_f)(myhtml_string_t* str_key, const char* value, size_t value_len);
+// callback to be implemented by the user
+typedef void (*myhtml_callback_serialize_f)(const char* buffer, size_t size, void* ctx);
void * myhtml_mem_malloc(size_t size);
void * myhtml_mem_realloc(void* dst, size_t size);
diff --git a/source/myhtml/mystring.h b/source/myhtml/mystring.h
index cf14992..0a0c02c 100644
--- a/source/myhtml/mystring.h
+++ b/source/myhtml/mystring.h
@@ -97,7 +97,7 @@ myhtml_string_t * myhtml_string_destroy(myhtml_string_t* str, bool destroy_obj);
void myhtml_string_raw_clean(myhtml_string_raw_t* str_raw);
void myhtml_string_raw_clean_all(myhtml_string_raw_t* str_raw);
myhtml_string_raw_t * myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj);
-
+
/* basic api */
char * myhtml_string_data_alloc(mchar_async_t *mchar, size_t node_id, size_t size);
char * myhtml_string_data_realloc(mchar_async_t *mchar, size_t node_id, char *data, size_t len_to_copy, size_t size);
diff --git a/source/myhtml/rules.c b/source/myhtml/rules.c
index e95136f..3921952 100644
--- a/source/myhtml/rules.c
+++ b/source/myhtml/rules.c
@@ -35,14 +35,11 @@ myhtml_token_node_t * myhtml_insertion_fix_split_for_text_begin_ws(myhtml_tree_t
return NULL;
// create new ws token and insert
- mcobject_async_status_t mcstatus;
- myhtml_token_node_t* new_token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
+ myhtml_token_node_t* new_token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
- if(mcstatus)
+ if(new_token == NULL)
return NULL;
- myhtml_token_node_clean(new_token);
-
myhtml_string_init(tree->mchar, tree->mchar_node_id, &new_token->str, (len + 2));
myhtml_string_append(&new_token->str, token->str.data, len);
@@ -92,6 +89,7 @@ bool myhtml_insertion_mode_initial(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG__TEXT:
{
if(token->type & MyHTML_TOKEN_TYPE_WHITESPACE) {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:INFO */
return false;
}
@@ -150,19 +148,27 @@ bool myhtml_insertion_mode_before_html(myhtml_tree_t* tree, myhtml_token_node_t*
case MyHTML_TAG_BODY:
{
myhtml_tree_node_insert_root(tree, NULL, MyHTML_NAMESPACE_HTML);
+
+ /* %EXTERNAL% VALIDATOR:RULES TAG STATUS:ELEMENT_MISSING_NEED LEVEL:INFO TAG_ID:MyHTML_TAG_HTML NS:MyHTML_NAMESPACE_HTML */
+
tree->insert_mode = MyHTML_INSERTION_MODE_BEFORE_HEAD;
return true;
}
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:WARNING */
break;
+ }
}
}
else {
switch (token->tag_id)
{
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:WARNING */
break;
+ }
case MyHTML_TAG__COMMENT:
{
@@ -173,6 +179,7 @@ bool myhtml_insertion_mode_before_html(myhtml_tree_t* tree, myhtml_token_node_t*
case MyHTML_TAG__TEXT:
{
if(token->type & MyHTML_TOKEN_TYPE_WHITESPACE) {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:INFO */
break;
}
@@ -194,6 +201,8 @@ bool myhtml_insertion_mode_before_html(myhtml_tree_t* tree, myhtml_token_node_t*
default:
{
myhtml_tree_node_insert_root(tree, NULL, MyHTML_NAMESPACE_HTML);
+ /* %EXTERNAL% VALIDATOR:RULES TAG STATUS:ELEMENT_MISSING_NEED LEVEL:INFO TAG_ID:MyHTML_TAG_HTML NS:MyHTML_NAMESPACE_HTML */
+
tree->insert_mode = MyHTML_INSERTION_MODE_BEFORE_HEAD;
return true;
}
@@ -214,12 +223,17 @@ bool myhtml_insertion_mode_before_head(myhtml_tree_t* tree, myhtml_token_node_t*
case MyHTML_TAG_BODY:
{
tree->node_head = myhtml_tree_node_insert(tree, MyHTML_TAG_HEAD, MyHTML_NAMESPACE_HTML);
+ /* %EXTERNAL% VALIDATOR:RULES TAG STATUS:ELEMENT_MISSING_NEED LEVEL:INFO TAG_ID:MyHTML_TAG_HEAD NS:MyHTML_NAMESPACE_HTML */
+
tree->insert_mode = MyHTML_INSERTION_MODE_IN_HEAD;
return true;
}
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
+ }
}
}
else {
@@ -228,6 +242,7 @@ bool myhtml_insertion_mode_before_head(myhtml_tree_t* tree, myhtml_token_node_t*
case MyHTML_TAG__TEXT:
{
if(token->type & MyHTML_TOKEN_TYPE_WHITESPACE) {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:INFO */
break;
}
@@ -245,8 +260,10 @@ bool myhtml_insertion_mode_before_head(myhtml_tree_t* tree, myhtml_token_node_t*
break;
}
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
break;
+ }
case MyHTML_TAG_HTML:
{
@@ -263,6 +280,8 @@ bool myhtml_insertion_mode_before_head(myhtml_tree_t* tree, myhtml_token_node_t*
default:
{
tree->node_head = myhtml_tree_node_insert(tree, MyHTML_TAG_HEAD, MyHTML_NAMESPACE_HTML);
+ /* %EXTERNAL% VALIDATOR:RULES TAG STATUS:ELEMENT_MISSING_NEED LEVEL:INFO TAG_ID:MyHTML_TAG_HEAD NS:MyHTML_NAMESPACE_HTML */
+
tree->insert_mode = MyHTML_INSERTION_MODE_IN_HEAD;
return true;
}
@@ -296,10 +315,22 @@ bool myhtml_insertion_mode_in_head(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_TEMPLATE:
{
if(myhtml_tree_open_elements_find_by_tag_idx_reverse(tree, MyHTML_TAG_TEMPLATE, MyHTML_NAMESPACE_HTML, NULL) == NULL)
+ {
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:WARNING */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_ANY NEED_TAG_ID:MyHTML_TAG_TEMPLATE NEED_NS:MyHTML_NAMESPACE_HTML */
+
break;
+ }
// oh God...
myhtml_tree_generate_all_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
+
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(current_node && current_node->tag_id != MyHTML_TAG_TEMPLATE) {
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED_CLOSE_BEFORE LEVEL:WARNING */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_TEMPLATE NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
+
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_TEMPLATE, MyHTML_NAMESPACE_HTML, false);
myhtml_tree_active_formatting_up_to_last_marker(tree);
myhtml_tree_template_insertion_pop(tree);
@@ -308,8 +339,11 @@ bool myhtml_insertion_mode_in_head(myhtml_tree_t* tree, myhtml_token_node_t* tok
break;
}
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:WARNING */
break;
+ }
}
}
else {
@@ -339,8 +373,11 @@ bool myhtml_insertion_mode_in_head(myhtml_tree_t* tree, myhtml_token_node_t* tok
break;
}
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
break;
+ }
case MyHTML_TAG_HTML:
{
@@ -420,7 +457,7 @@ bool myhtml_insertion_mode_in_head(myhtml_tree_t* tree, myhtml_token_node_t* tok
node->ns = MyHTML_NAMESPACE_HTML;
node->flags = MyHTML_TREE_NODE_PARSER_INSERTED|MyHTML_TREE_NODE_BLOCKING;
- myhtml_tree_node_insert_by_mode(tree, adjusted_location, node, insert_mode);
+ myhtml_tree_node_insert_by_mode(adjusted_location, node, insert_mode);
myhtml_tree_open_elements_append(tree, node);
tree->orig_insert_mode = tree->insert_mode;
@@ -443,8 +480,11 @@ bool myhtml_insertion_mode_in_head(myhtml_tree_t* tree, myhtml_token_node_t* tok
break;
}
- case MyHTML_TAG_HEAD:
+ case MyHTML_TAG_HEAD: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY LEVEL:WARNING */
break;
+ }
default:
{
@@ -478,15 +518,21 @@ bool myhtml_insertion_mode_in_head_noscript(myhtml_tree_t* tree, myhtml_token_no
return true;
}
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
+ }
}
}
else {
switch (token->tag_id)
{
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
+ }
case MyHTML_TAG_HTML:
{
@@ -514,11 +560,17 @@ bool myhtml_insertion_mode_in_head_noscript(myhtml_tree_t* tree, myhtml_token_no
return myhtml_insertion_mode_in_head(tree, token);
case MyHTML_TAG_HEAD:
- case MyHTML_TAG_NOSCRIPT:
+ case MyHTML_TAG_NOSCRIPT: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY LEVEL:WARNING */
break;
+ }
default:
{
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_open_elements_pop(tree);
tree->insert_mode = MyHTML_INSERTION_MODE_IN_HEAD;
return true;
@@ -540,6 +592,8 @@ bool myhtml_insertion_mode_after_head(myhtml_tree_t* tree, myhtml_token_node_t*
{
tree->node_body = myhtml_tree_node_insert(tree, MyHTML_TAG_BODY, MyHTML_NAMESPACE_HTML);
tree->insert_mode = MyHTML_INSERTION_MODE_IN_BODY;
+
+ /* %EXTERNAL% VALIDATOR:RULES TAG STATUS:ELEMENT_MISSING_NEED LEVEL:INFO TAG_ID:MyHTML_TAG_BODY NS:MyHTML_NAMESPACE_HTML */
return true;
}
@@ -548,8 +602,11 @@ bool myhtml_insertion_mode_after_head(myhtml_tree_t* tree, myhtml_token_node_t*
return myhtml_insertion_mode_in_head(tree, token);
}
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
+ }
}
}
else {
@@ -577,8 +634,11 @@ bool myhtml_insertion_mode_after_head(myhtml_tree_t* tree, myhtml_token_node_t*
myhtml_tree_node_insert_comment(tree, token, 0);
break;
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
break;
+ }
case MyHTML_TAG_HTML:
return myhtml_insertion_mode_in_body(tree, token);
@@ -608,19 +668,25 @@ bool myhtml_insertion_mode_after_head(myhtml_tree_t* tree, myhtml_token_node_t*
case MyHTML_TAG_TEMPLATE:
case MyHTML_TAG_TITLE:
{
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_open_elements_append(tree, tree->node_head);
myhtml_insertion_mode_in_head(tree, token);
myhtml_tree_open_elements_remove(tree, tree->node_head);
}
- case MyHTML_TAG_HEAD:
- {
+ case MyHTML_TAG_HEAD: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY LEVEL:WARNING */
break;
}
default:
{
tree->node_body = myhtml_tree_node_insert(tree, MyHTML_TAG_BODY, MyHTML_NAMESPACE_HTML);
+ /* %EXTERNAL% VALIDATOR:RULES TAG STATUS:ELEMENT_MISSING_NEED LEVEL:INFO TAG_ID:MyHTML_TAG_BODY NS:MyHTML_NAMESPACE_HTML */
+
tree->insert_mode = MyHTML_INSERTION_MODE_IN_BODY;
return true;
}
@@ -642,6 +708,14 @@ bool myhtml_insertion_mode_in_body_other_end_tag(myhtml_tree_t* tree, myhtml_tok
// step 2
if(node->tag_id == token->tag_id && node->ns == MyHTML_NAMESPACE_HTML) {
myhtml_tree_generate_implied_end_tags(tree, token->tag_id, MyHTML_NAMESPACE_HTML);
+
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(current_node->tag_id != node->tag_id) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:node->tag_id NEED_NS:node->ns */
+ }
+
myhtml_tree_open_elements_pop_until_by_node(tree, node, false);
return false;
@@ -649,6 +723,8 @@ bool myhtml_insertion_mode_in_body_other_end_tag(myhtml_tree_t* tree, myhtml_tok
const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(tree->tags, node->tag_id);
if(tag_ctx->cats[ node->ns ] & MyHTML_TAG_CATEGORIES_SPECIAL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
}
@@ -670,8 +746,11 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
{
myhtml_tree_node_t* body_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_BODY, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE);
- if(body_node == NULL)
+ if(body_node == NULL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
+ }
for (size_t i = 0; i < tree->open_elements->length; i++) {
switch (tree->open_elements->list[i]->tag_id) {
@@ -710,8 +789,11 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
{
myhtml_tree_node_t* body_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_BODY, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE);
- if(body_node == NULL)
+ if(body_node == NULL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
+ }
for (size_t i = 0; i < tree->open_elements->length; i++) {
switch (tree->open_elements->list[i]->tag_id) {
@@ -773,16 +855,23 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_SUMMARY:
case MyHTML_TAG_UL:
{
- if(myhtml_tree_element_in_scope(tree, token->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE) == NULL)
+ if(myhtml_tree_element_in_scope(tree, token->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE) == NULL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
// step 1
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
// step 2
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node->ns != MyHTML_NAMESPACE_HTML)
- // parse error
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, token->tag_id) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:token->tag_id NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
// step 3
myhtml_tree_open_elements_pop_until(tree, token->tag_id, MyHTML_NAMESPACE_HTML, false);
@@ -802,8 +891,10 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
tree->node_form = NULL;
// step 3
- if(node == NULL || myhtml_tree_element_in_scope_by_node(tree, node, MyHTML_TAG_CATEGORIES_SCOPE) == false) {
+ if(node == NULL || myhtml_tree_element_in_scope_by_node(node, MyHTML_TAG_CATEGORIES_SCOPE) == false) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+
break;
}
@@ -811,9 +902,12 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
// step 5
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node != node)
- // // parse error
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(current_node != node) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:node->tag_id NEED_NS:node->ns */
+ }
// step 6
myhtml_tree_open_elements_remove(tree, node);
@@ -822,17 +916,23 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
// step 1
myhtml_tree_node_t* form_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_FORM, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE);
- if(form_node)
+ if(form_node) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
// step 2
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
// step 3
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node != node)
- // // parse error
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_FORM) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_FORM NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
// step 4
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_FORM, MyHTML_NAMESPACE_HTML, false);
@@ -848,7 +948,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_node_insert(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML);
}
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
break;
}
@@ -856,6 +956,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_LI, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_LIST_ITEM) == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
@@ -863,9 +964,12 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_generate_implied_end_tags(tree, MyHTML_TAG_LI, MyHTML_NAMESPACE_HTML);
// step 2
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node->tag_id != MyHTML_TAG_LI)
- // // parse error
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_LI) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_LI NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
// step 3
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_LI, MyHTML_NAMESPACE_HTML, false);
@@ -878,6 +982,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
{
if(myhtml_tree_element_in_scope(tree, token->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE) == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
@@ -885,9 +990,12 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_generate_implied_end_tags(tree, token->tag_id, MyHTML_NAMESPACE_HTML);
// step 2
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node->tag_id != token->tag_id)
- // // parse error
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, token->tag_id) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:token->tag_id NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
// step 3
myhtml_tree_open_elements_pop_until(tree, token->tag_id, MyHTML_NAMESPACE_HTML, false);
@@ -927,6 +1035,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
if(node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
@@ -934,9 +1043,12 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
// step 2
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node->tag_id != token->tag_id)
- // // parse error
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, token->tag_id) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:token->tag_id NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
// step 3
while(tree->open_elements->length) {
@@ -972,7 +1084,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_TT:
case MyHTML_TAG_U:
{
- myhtml_tree_adoption_agency_algorithm(tree, token->tag_id);
+ myhtml_tree_adoption_agency_algorithm(tree, token, token->tag_id);
//myhtml_insertion_mode_in_body_other_end_tag(tree, token);
break;
@@ -984,6 +1096,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
{
if(myhtml_tree_element_in_scope(tree, token->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE) == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
@@ -991,9 +1104,12 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
// step 2
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node->tag_id != token->tag_id)
- // // parse error
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, token->tag_id) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:token->tag_id NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
// step 3
myhtml_tree_open_elements_pop_until(tree, token->tag_id, MyHTML_NAMESPACE_HTML, false);
@@ -1006,6 +1122,9 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_BR:
{
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES CONVERT STATUS:ELEMENT_BAD LEVEL:ERROR FROM_TAG_ID:MyHTML_TAG_BR FROM_NS:MyHTML_NAMESPACE_HTML FROM_TYPE:MyHTML_TOKEN_TYPE_CLOSE TO_TAG_ID:MyHTML_TAG_BR TO_NS:MyHTML_NAMESPACE_HTML TO_TYPE:MyHTML_TOKEN_TYPE_OPEN */
+
if(token->attr_first) {
token->attr_first = NULL;
}
@@ -1022,6 +1141,8 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_open_elements_pop(tree);
tree->flags ^= (tree->flags & MyHTML_TREE_FLAGS_FRAMESET_OK);
+
+ break;
}
default:
@@ -1037,6 +1158,9 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG__TEXT:
{
if(token->type & MyHTML_TOKEN_TYPE_NULL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:NULL_CHAR ACTION:IGNORE LEVEL:ERROR */
+
myhtml_insertion_fix_for_null_char_drop_all(tree, token);
if(token->str.length) {
@@ -1062,13 +1186,22 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_node_insert_comment(tree, token, 0);
break;
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:WARNING */
break;
+ }
case MyHTML_TAG_HTML:
{
- if(myhtml_tree_open_elements_find_by_tag_idx(tree, MyHTML_TAG_TEMPLATE, MyHTML_NAMESPACE_HTML, NULL))
+ if(myhtml_tree_open_elements_find_by_tag_idx(tree, MyHTML_TAG_TEMPLATE, MyHTML_NAMESPACE_HTML, NULL)) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:WARNING */
break;
+ }
+
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:WARNING */
if(tree->open_elements->length > 0) {
myhtml_tree_node_t* top_node = tree->open_elements->list[0];
@@ -1076,7 +1209,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
if(top_node->token) {
myhtml_token_node_wait_for_done(token);
myhtml_token_node_wait_for_done(top_node->token);
- myhtml_token_node_attr_copy_with_check(tree->token, token, top_node->token, tree->mcasync_attr_id);
+ myhtml_token_node_attr_copy_with_check(tree->token, token, top_node->token, tree->mcasync_rules_attr_id);
}
else {
top_node->token = token;
@@ -1102,12 +1235,20 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_BODY:
{
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:WARNING */
+
if(tree->open_elements->length > 1)
{
if(!(tree->open_elements->list[1]->tag_id == MyHTML_TAG_BODY &&
tree->open_elements->list[1]->ns == MyHTML_NAMESPACE_HTML) ||
myhtml_tree_open_elements_find_by_tag_idx(tree, MyHTML_TAG_TEMPLATE, MyHTML_NAMESPACE_HTML, NULL))
+ {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:WARNING */
+
break;
+ }
}
else
break;
@@ -1120,7 +1261,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
if(top_node->token) {
myhtml_token_node_wait_for_done(token);
myhtml_token_node_wait_for_done(top_node->token);
- myhtml_token_node_attr_copy_with_check(tree->token, token, top_node->token, tree->mcasync_attr_id);
+ myhtml_token_node_attr_copy_with_check(tree->token, token, top_node->token, tree->mcasync_rules_attr_id);
}
else {
top_node->token = token;
@@ -1132,21 +1273,33 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_FRAMESET:
{
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
if(tree->open_elements->length > 1)
{
if(!(tree->open_elements->list[1]->tag_id == MyHTML_TAG_BODY &&
tree->open_elements->list[1]->ns == MyHTML_NAMESPACE_HTML))
+ {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
}
else
break;
- if((tree->flags & MyHTML_TREE_FLAGS_FRAMESET_OK) == 0)
+ if((tree->flags & MyHTML_TREE_FLAGS_FRAMESET_OK) == 0) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
myhtml_tree_node_t* node = tree->open_elements->list[1];
- myhtml_tree_node_remove(tree, node);
+ myhtml_tree_node_remove(node);
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_HTML, MyHTML_NAMESPACE_HTML, true);
myhtml_tree_node_insert_html_element(tree, token);
@@ -1206,7 +1359,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_UL:
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_insert_html_element(tree, token);
@@ -1216,7 +1369,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_MENU:
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
@@ -1236,7 +1389,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_H6:
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
@@ -1249,8 +1402,11 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_H5:
case MyHTML_TAG_H6:
- if(current_node->ns == MyHTML_NAMESPACE_HTML)
+ if(current_node->ns == MyHTML_NAMESPACE_HTML) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:WARNING */
myhtml_tree_open_elements_pop(tree);
+ }
break;
@@ -1266,7 +1422,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_LISTING:
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_insert_html_element(tree, token);
@@ -1283,11 +1439,14 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_FORM:
{
myhtml_tree_node_t* is_in_node = myhtml_tree_open_elements_find_by_tag_idx(tree, MyHTML_TAG_TEMPLATE, MyHTML_NAMESPACE_HTML, NULL);
- if(tree->node_form && is_in_node == NULL)
+ if(tree->node_form && is_in_node == NULL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
break;
+ }
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_t* current = myhtml_tree_node_insert_html_element(tree, token);
@@ -1310,8 +1469,20 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_node_t* node = tree->open_elements->list[oel_index];
const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(tree->tags, node->tag_id);
+ /* 3 */
if(myhtml_is_html_node(node, MyHTML_TAG_LI)) {
+ /* 3.1 */
myhtml_tree_generate_implied_end_tags(tree, MyHTML_TAG_LI, MyHTML_NAMESPACE_HTML);
+
+ /* 3.2 */
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_LI) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_LI NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
+
+ /* 3.3 */
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_LI, MyHTML_NAMESPACE_HTML, false);
break;
}
@@ -1324,7 +1495,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
}
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_insert_html_element(tree, token);
@@ -1347,11 +1518,29 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
if(myhtml_is_html_node(node, MyHTML_TAG_DD)) {
myhtml_tree_generate_implied_end_tags(tree, MyHTML_TAG_DD, MyHTML_NAMESPACE_HTML);
+
+ /* 3.2 */
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_DD)) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_DD NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
+
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_DD, MyHTML_NAMESPACE_HTML, false);
break;
}
else if(myhtml_is_html_node(node, MyHTML_TAG_DT)) {
myhtml_tree_generate_implied_end_tags(tree, MyHTML_TAG_DT, MyHTML_NAMESPACE_HTML);
+
+ /* 3.2 */
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_DT)) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_DT NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
+
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_DT, MyHTML_NAMESPACE_HTML, false);
break;
}
@@ -1364,7 +1553,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
}
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_insert_html_element(tree, token);
@@ -1374,7 +1563,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_PLAINTEXT:
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_insert_html_element(tree, token);
@@ -1386,6 +1575,9 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_BUTTON:
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_BUTTON, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE)) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_BUTTON, MyHTML_NAMESPACE_HTML, false);
}
@@ -1402,7 +1594,10 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_node_t* node = myhtml_tree_active_formatting_between_last_marker(tree, MyHTML_TAG_A, NULL);
if(node) {
- myhtml_tree_adoption_agency_algorithm(tree, MyHTML_TAG_A);
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
+ myhtml_tree_adoption_agency_algorithm(tree, token, MyHTML_TAG_A);
node = myhtml_tree_active_formatting_between_last_marker(tree, MyHTML_TAG_A, NULL);
if(node) {
@@ -1443,7 +1638,10 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_active_formatting_reconstruction(tree);
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_NOBR, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE)) {
- myhtml_tree_adoption_agency_algorithm(tree, MyHTML_TAG_NOBR);
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
+ myhtml_tree_adoption_agency_algorithm(tree, token, MyHTML_TAG_NOBR);
myhtml_tree_active_formatting_reconstruction(tree);
}
@@ -1470,7 +1668,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
if((tree->compat_mode & MyHTML_TREE_COMPAT_MODE_QUIRKS) == 0 &&
myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON))
{
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_insert_html_element(tree, token);
@@ -1523,7 +1721,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_HR:
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
@@ -1540,6 +1738,9 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_IMAGE:
{
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES CONVERT STATUS:ELEMENT_CONVERT LEVEL:ERROR FROM_TAG_ID:MyHTML_TAG_IMAGE FROM_NS:MyHTML_NAMESPACE_ANY FROM_TYPE:MyHTML_TOKEN_TYPE_OPEN TO_TAG_ID:MyHTML_TAG_IMG TO_NS:MyHTML_NAMESPACE_ANY TO_TYPE:MyHTML_TOKEN_TYPE_OPEN */
+
token->tag_id = MyHTML_TAG_IMG;
return true;
}
@@ -1565,7 +1766,7 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_XMP:
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_BUTTON)) {
- myhtml_tree_tags_close_p(tree);
+ myhtml_tree_tags_close_p(tree, token);
}
myhtml_tree_active_formatting_reconstruction(tree);
@@ -1654,6 +1855,8 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
if(myhtml_is_html_node(current_node, MyHTML_TAG_MENUITEM))
myhtml_tree_open_elements_pop(tree);
+ myhtml_tree_active_formatting_reconstruction(tree);
+
myhtml_tree_node_insert_html_element(tree, token);
break;
}
@@ -1665,8 +1868,12 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
}
- // myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- // if(current_node->tag_id != MyHTML_TAG_RUBY) PARSE_ERROR
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(current_node->tag_id != MyHTML_TAG_RUBY) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_RUBY NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
myhtml_tree_node_insert_html_element(tree, token);
break;
@@ -1679,9 +1886,13 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_generate_implied_end_tags(tree, MyHTML_TAG_RTC, MyHTML_NAMESPACE_HTML);
}
- // myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- // if(current_node->tag_id != MyHTML_TAG_RTC ||
- // current_node->tag_id != MyHTML_TAG_RUBY) PARSE_ERROR
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(current_node->tag_id != MyHTML_TAG_RTC || current_node->tag_id != MyHTML_TAG_RUBY) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_RTC NEED_NS:MyHTML_NAMESPACE_HTML */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_RUBY NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
myhtml_tree_node_insert_html_element(tree, token);
break;
@@ -1735,7 +1946,8 @@ bool myhtml_insertion_mode_in_body(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_THEAD:
case MyHTML_TAG_TR:
{
- // Ignore this token.
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
}
@@ -1776,6 +1988,9 @@ bool myhtml_insertion_mode_text(myhtml_tree_t* tree, myhtml_token_node_t* token)
else {
if(token->tag_id == MyHTML_TAG__END_OF_FILE)
{
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:PREMATURE_TERMINATION LEVEL:ERROR */
+
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
if(current_node->tag_id == MyHTML_TAG_SCRIPT)
@@ -1802,9 +2017,11 @@ bool myhtml_insertion_mode_in_table(myhtml_tree_t* tree, myhtml_token_node_t* to
{
myhtml_tree_node_t* table_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TABLE, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(table_node == NULL)
+ if(table_node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
+ }
myhtml_tree_open_elements_pop_until_by_node(tree, table_node, false);
myhtml_tree_reset_insertion_mode_appropriately(tree);
@@ -1825,6 +2042,7 @@ bool myhtml_insertion_mode_in_table(myhtml_tree_t* tree, myhtml_token_node_t* to
case MyHTML_TAG_TR:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
}
@@ -1877,8 +2095,10 @@ bool myhtml_insertion_mode_in_table(myhtml_tree_t* tree, myhtml_token_node_t* to
myhtml_tree_node_insert_comment(tree, token, 0);
break;
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:WARNING */
break;
+ }
case MyHTML_TAG_CAPTION:
{
@@ -1936,10 +2156,16 @@ bool myhtml_insertion_mode_in_table(myhtml_tree_t* tree, myhtml_token_node_t* to
case MyHTML_TAG_TABLE:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_node_t* table_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TABLE, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(table_node == NULL)
+ if(table_node == NULL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_TABLE, MyHTML_NAMESPACE_HTML, false);
myhtml_tree_reset_insertion_mode_appropriately(tree);
@@ -1966,6 +2192,8 @@ bool myhtml_insertion_mode_in_table(myhtml_tree_t* tree, myhtml_token_node_t* to
}
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_node_insert_html_element(tree, token);
myhtml_tree_open_elements_pop(tree);
@@ -1976,6 +2204,7 @@ bool myhtml_insertion_mode_in_table(myhtml_tree_t* tree, myhtml_token_node_t* to
case MyHTML_TAG_FORM:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
myhtml_tree_node_t* template = myhtml_tree_open_elements_find_by_tag_idx(tree, MyHTML_TAG_TEMPLATE, MyHTML_NAMESPACE_HTML, NULL);
if(tree->node_form || template)
@@ -1992,6 +2221,8 @@ bool myhtml_insertion_mode_in_table(myhtml_tree_t* tree, myhtml_token_node_t* to
default:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
tree->foster_parenting = true;
myhtml_insertion_mode_in_body(tree, token);
tree->foster_parenting = false;
@@ -2010,6 +2241,9 @@ bool myhtml_insertion_mode_in_table_text(myhtml_tree_t* tree, myhtml_token_node_
if(token->tag_id == MyHTML_TAG__TEXT)
{
if(token->type & MyHTML_TOKEN_TYPE_NULL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:NULL_CHAR ACTION:IGNORE LEVEL:ERROR */
+
myhtml_insertion_fix_for_null_char_drop_all(tree, token);
if(token->str.length)
@@ -2032,6 +2266,8 @@ bool myhtml_insertion_mode_in_table_text(myhtml_tree_t* tree, myhtml_token_node_
if(is_not_ws)
{
for(size_t i = 0; i < token_list->length; i++) {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR TOKEN:token_list->list[i] */
+
tree->foster_parenting = true;
myhtml_insertion_mode_in_body(tree, token_list->list[i]);
tree->foster_parenting = false;
@@ -2059,15 +2295,18 @@ bool myhtml_insertion_mode_in_caption(myhtml_tree_t* tree, myhtml_token_node_t*
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_CAPTION, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE) == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node->tag_id != MyHTML_TAG_CAPTION) {
- // // parse error
- //}
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_CAPTION) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_CAPTION NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_CAPTION, MyHTML_NAMESPACE_HTML, false);
myhtml_tree_active_formatting_up_to_last_marker(tree);
@@ -2080,15 +2319,18 @@ bool myhtml_insertion_mode_in_caption(myhtml_tree_t* tree, myhtml_token_node_t*
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_CAPTION, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE) == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node->tag_id != MyHTML_TAG_CAPTION) {
- // // parse error
- //}
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_CAPTION) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_CAPTION NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_CAPTION, MyHTML_NAMESPACE_HTML, false);
myhtml_tree_active_formatting_up_to_last_marker(tree);
@@ -2108,6 +2350,7 @@ bool myhtml_insertion_mode_in_caption(myhtml_tree_t* tree, myhtml_token_node_t*
case MyHTML_TAG_THEAD:
case MyHTML_TAG_TR:
{
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
}
@@ -2130,15 +2373,18 @@ bool myhtml_insertion_mode_in_caption(myhtml_tree_t* tree, myhtml_token_node_t*
{
if(myhtml_tree_element_in_scope(tree, MyHTML_TAG_CAPTION, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE) == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
- //myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- //if(current_node->tag_id != MyHTML_TAG_CAPTION) {
- // // parse error
- //}
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_CAPTION) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_CAPTION NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_CAPTION, MyHTML_NAMESPACE_HTML, false);
myhtml_tree_active_formatting_up_to_last_marker(tree);
@@ -2164,20 +2410,21 @@ bool myhtml_insertion_mode_in_column_group(myhtml_tree_t* tree, myhtml_token_nod
{
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- if(current_node == NULL || !(current_node->tag_id == MyHTML_TAG_COLGROUP &&
- current_node->ns == MyHTML_NAMESPACE_HTML))
- {
- break;
+ if(current_node && myhtml_is_html_node(current_node, MyHTML_TAG_COLGROUP)) {
+ myhtml_tree_open_elements_pop(tree);
+
+ tree->insert_mode = MyHTML_INSERTION_MODE_IN_TABLE;
+ return false;
}
- myhtml_tree_open_elements_pop(tree);
-
- tree->insert_mode = MyHTML_INSERTION_MODE_IN_TABLE;
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
case MyHTML_TAG_COL:
{
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
+
break;
}
@@ -2189,15 +2436,15 @@ bool myhtml_insertion_mode_in_column_group(myhtml_tree_t* tree, myhtml_token_nod
default: {
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- if(current_node && current_node->tag_id == MyHTML_TAG_COLGROUP &&
- current_node->ns == MyHTML_NAMESPACE_HTML)
- {
+ if(current_node && myhtml_is_html_node(current_node, MyHTML_TAG_COLGROUP)) {
myhtml_tree_open_elements_pop(tree);
tree->insert_mode = MyHTML_INSERTION_MODE_IN_TABLE;
return true;
}
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
}
@@ -2216,10 +2463,10 @@ bool myhtml_insertion_mode_in_column_group(myhtml_tree_t* tree, myhtml_token_nod
if(new_token)
myhtml_tree_node_insert_text(tree, new_token);
+ /* default: */
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- if(current_node && current_node->tag_id == MyHTML_TAG_COLGROUP)
- {
+ if(current_node && myhtml_is_html_node(current_node, MyHTML_TAG_COLGROUP)) {
myhtml_tree_open_elements_pop(tree);
tree->insert_mode = MyHTML_INSERTION_MODE_IN_TABLE;
@@ -2227,6 +2474,7 @@ bool myhtml_insertion_mode_in_column_group(myhtml_tree_t* tree, myhtml_token_nod
}
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
@@ -2236,9 +2484,10 @@ bool myhtml_insertion_mode_in_column_group(myhtml_tree_t* tree, myhtml_token_nod
break;
}
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
break;
-
+ }
case MyHTML_TAG_HTML:
{
return myhtml_insertion_mode_in_body(tree, token);
@@ -2263,9 +2512,7 @@ bool myhtml_insertion_mode_in_column_group(myhtml_tree_t* tree, myhtml_token_nod
{
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- if(current_node && current_node->tag_id == MyHTML_TAG_COLGROUP &&
- current_node->ns == MyHTML_NAMESPACE_HTML)
- {
+ if(current_node && myhtml_is_html_node(current_node, MyHTML_TAG_COLGROUP)) {
myhtml_tree_open_elements_pop(tree);
tree->insert_mode = MyHTML_INSERTION_MODE_IN_TABLE;
@@ -2273,6 +2520,7 @@ bool myhtml_insertion_mode_in_column_group(myhtml_tree_t* tree, myhtml_token_nod
}
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
}
}
@@ -2292,9 +2540,11 @@ bool myhtml_insertion_mode_in_table_body(myhtml_tree_t* tree, myhtml_token_node_
{
myhtml_tree_node_t* node = myhtml_tree_element_in_scope(tree, token->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(node == NULL)
+ if(node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
+ }
myhtml_tree_clear_stack_back_table_body_context(tree);
myhtml_tree_open_elements_pop(tree);
@@ -2309,9 +2559,14 @@ bool myhtml_insertion_mode_in_table_body(myhtml_tree_t* tree, myhtml_token_node_
myhtml_tree_node_t* tfoot_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TFOOT, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
myhtml_tree_node_t* thead_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_THEAD, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(tbody_node == NULL && tfoot_node == NULL && thead_node == NULL)
+ if(tbody_node == NULL && tfoot_node == NULL && thead_node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_THEAD NEED_NS:MyHTML_NAMESPACE_HTML */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TBODY NEED_NS:MyHTML_NAMESPACE_HTML */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TFOOT NEED_NS:MyHTML_NAMESPACE_HTML */
break;
+ }
myhtml_tree_clear_stack_back_table_body_context(tree);
myhtml_tree_open_elements_pop(tree);
@@ -2330,6 +2585,7 @@ bool myhtml_insertion_mode_in_table_body(myhtml_tree_t* tree, myhtml_token_node_
case MyHTML_TAG_TR:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
}
@@ -2354,6 +2610,8 @@ bool myhtml_insertion_mode_in_table_body(myhtml_tree_t* tree, myhtml_token_node_
case MyHTML_TAG_TD:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_clear_stack_back_table_body_context(tree);
myhtml_tree_node_insert(tree, MyHTML_TAG_TR, MyHTML_NAMESPACE_HTML);
@@ -2373,9 +2631,14 @@ bool myhtml_insertion_mode_in_table_body(myhtml_tree_t* tree, myhtml_token_node_
myhtml_tree_node_t* tfoot_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TFOOT, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
myhtml_tree_node_t* thead_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_THEAD, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(tbody_node == NULL && tfoot_node == NULL && thead_node == NULL)
+ if(tbody_node == NULL && tfoot_node == NULL && thead_node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_THEAD NEED_NS:MyHTML_NAMESPACE_HTML */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TBODY NEED_NS:MyHTML_NAMESPACE_HTML */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TFOOT NEED_NS:MyHTML_NAMESPACE_HTML */
break;
+ }
myhtml_tree_clear_stack_back_table_body_context(tree);
myhtml_tree_open_elements_pop(tree);
@@ -2401,9 +2664,11 @@ bool myhtml_insertion_mode_in_row(myhtml_tree_t* tree, myhtml_token_node_t* toke
{
myhtml_tree_node_t* tr_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TR, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(tr_node == NULL)
+ if(tr_node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
+ }
myhtml_tree_clear_stack_back_table_row_context(tree);
@@ -2417,9 +2682,12 @@ bool myhtml_insertion_mode_in_row(myhtml_tree_t* tree, myhtml_token_node_t* toke
{
myhtml_tree_node_t* tr_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TR, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(tr_node == NULL)
+ if(tr_node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TR NEED_NS:MyHTML_NAMESPACE_HTML */
break;
+ }
myhtml_tree_clear_stack_back_table_row_context(tree);
myhtml_tree_open_elements_pop(tree);
@@ -2433,9 +2701,11 @@ bool myhtml_insertion_mode_in_row(myhtml_tree_t* tree, myhtml_token_node_t* toke
case MyHTML_TAG_THEAD:
{
myhtml_tree_node_t* node = myhtml_tree_element_in_scope(tree, token->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(node == NULL)
+ if(node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
+ }
myhtml_tree_node_t* tr_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TR, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
if(tr_node == NULL)
@@ -2448,6 +2718,19 @@ bool myhtml_insertion_mode_in_row(myhtml_tree_t* tree, myhtml_token_node_t* toke
return true;
}
+ case MyHTML_TAG_BODY:
+ case MyHTML_TAG_CAPTION:
+ case MyHTML_TAG_COL:
+ case MyHTML_TAG_COLGROUP:
+ case MyHTML_TAG_HTML:
+ case MyHTML_TAG_TD:
+ case MyHTML_TAG_TH:
+ {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
+ break;
+ }
+
default:
return myhtml_insertion_mode_in_table(tree, token);
}
@@ -2476,9 +2759,12 @@ bool myhtml_insertion_mode_in_row(myhtml_tree_t* tree, myhtml_token_node_t* toke
{
myhtml_tree_node_t* tr_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TR, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(tr_node == NULL)
+ if(tr_node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TR NEED_NS:MyHTML_NAMESPACE_HTML */
break;
+ }
myhtml_tree_clear_stack_back_table_row_context(tree);
myhtml_tree_open_elements_pop(tree);
@@ -2505,18 +2791,21 @@ bool myhtml_insertion_mode_in_cell(myhtml_tree_t* tree, myhtml_token_node_t* tok
{
myhtml_tree_node_t* node = myhtml_tree_element_in_scope(tree, token->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(node == NULL)
+ if(node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
+ }
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- if(!(current_node->ns = MyHTML_NAMESPACE_HTML &&
- current_node->tag_id == token->tag_id))
+ if(myhtml_is_html_node(current_node, token->tag_id) == false)
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:token->tag_id NEED_NS:MyHTML_NAMESPACE_HTML */
}
myhtml_tree_open_elements_pop_until(tree, token->tag_id, MyHTML_NAMESPACE_HTML, false);
@@ -2532,7 +2821,12 @@ bool myhtml_insertion_mode_in_cell(myhtml_tree_t* tree, myhtml_token_node_t* tok
case MyHTML_TAG_COL:
case MyHTML_TAG_COLGROUP:
case MyHTML_TAG_HTML:
+ {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
+ }
+
case MyHTML_TAG_TABLE:
case MyHTML_TAG_TBODY:
@@ -2542,18 +2836,20 @@ bool myhtml_insertion_mode_in_cell(myhtml_tree_t* tree, myhtml_token_node_t* tok
{
myhtml_tree_node_t* node = myhtml_tree_element_in_scope(tree, token->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(node == NULL)
+ if(node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
+ }
node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TD, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
if(node) {
- myhtml_tree_close_cell(tree, node);
+ myhtml_tree_close_cell(tree, node, token);
}
else {
node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TH, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
if(node)
- myhtml_tree_close_cell(tree, node);
+ myhtml_tree_close_cell(tree, node, token);
}
return true;
@@ -2579,11 +2875,16 @@ bool myhtml_insertion_mode_in_cell(myhtml_tree_t* tree, myhtml_token_node_t* tok
myhtml_tree_node_t* td_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TD, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
myhtml_tree_node_t* th_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_TH, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(td_node == NULL && th_node == NULL)
+ if(td_node == NULL && th_node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TD NEED_NS:MyHTML_NAMESPACE_HTML */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TH NEED_NS:MyHTML_NAMESPACE_HTML */
+
break;
+ }
- myhtml_tree_close_cell(tree, (td_node == NULL ? th_node : td_node));
+ myhtml_tree_close_cell(tree, (td_node == NULL ? th_node : td_node), token);
return true;
}
@@ -2605,31 +2906,28 @@ bool myhtml_insertion_mode_in_select(myhtml_tree_t* tree, myhtml_token_node_t* t
{
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- if(current_node->tag_id == MyHTML_TAG_OPTION &&
- current_node->ns == MyHTML_NAMESPACE_HTML)
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_OPTION))
{
if(tree->open_elements->length > 1) {
myhtml_tree_node_t *optgrp_node = tree->open_elements->list[ tree->open_elements->length - 2 ];
- if(optgrp_node->tag_id == MyHTML_TAG_OPTGROUP &&
- optgrp_node->ns == MyHTML_NAMESPACE_HTML)
+ if(myhtml_is_html_node(optgrp_node, MyHTML_TAG_OPTGROUP))
{
myhtml_tree_open_elements_pop(tree);
}
}
- else {
- MyHTML_DEBUG_ERROR("in select state; open elements length < 2");
- }
}
current_node = myhtml_tree_current_node(tree);
- if(current_node->tag_id == MyHTML_TAG_OPTGROUP &&
- current_node->ns == MyHTML_NAMESPACE_HTML)
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_OPTGROUP))
myhtml_tree_open_elements_pop(tree);
- else
+ else {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_NO_EXPECTED ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
break;
}
@@ -2638,24 +2936,27 @@ bool myhtml_insertion_mode_in_select(myhtml_tree_t* tree, myhtml_token_node_t* t
{
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- if(current_node->tag_id == MyHTML_TAG_OPTION &&
- current_node->ns == MyHTML_NAMESPACE_HTML)
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_OPTION))
myhtml_tree_open_elements_pop(tree);
- else
+ else {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
break;
}
case MyHTML_TAG_SELECT:
{
- // parse error
myhtml_tree_node_t* select_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_SELECT, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_SELECT);
- if(select_node == NULL)
+ if(select_node == NULL) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
break;
+ }
myhtml_tree_open_elements_pop_until_by_node(tree, select_node, false);
myhtml_tree_reset_insertion_mode_appropriately(tree);
@@ -2666,9 +2967,12 @@ bool myhtml_insertion_mode_in_select(myhtml_tree_t* tree, myhtml_token_node_t* t
case MyHTML_TAG_TEMPLATE:
return myhtml_insertion_mode_in_head(tree, token);
- default:
+ default: {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
}
}
else {
@@ -2676,6 +2980,9 @@ bool myhtml_insertion_mode_in_select(myhtml_tree_t* tree, myhtml_token_node_t* t
{
case MyHTML_TAG__TEXT: {
if(token->type & MyHTML_TOKEN_TYPE_NULL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:NULL_CHAR ACTION:IGNORE LEVEL:ERROR */
+
myhtml_insertion_fix_for_null_char_drop_all(tree, token);
if(token->str.length)
@@ -2691,8 +2998,11 @@ bool myhtml_insertion_mode_in_select(myhtml_tree_t* tree, myhtml_token_node_t* t
myhtml_tree_node_insert_comment(tree, token, NULL);
break;
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
case MyHTML_TAG_HTML:
return myhtml_insertion_mode_in_body(tree, token);
@@ -2729,10 +3039,15 @@ bool myhtml_insertion_mode_in_select(myhtml_tree_t* tree, myhtml_token_node_t* t
case MyHTML_TAG_SELECT:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_node_t* select_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_SELECT, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_SELECT);
- if(select_node == NULL)
+ if(select_node == NULL) {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
myhtml_tree_open_elements_pop_until_by_node(tree, select_node, false);
myhtml_tree_reset_insertion_mode_appropriately(tree);
@@ -2745,10 +3060,15 @@ bool myhtml_insertion_mode_in_select(myhtml_tree_t* tree, myhtml_token_node_t* t
case MyHTML_TAG_TEXTAREA:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_node_t* select_node = myhtml_tree_element_in_scope(tree, MyHTML_TAG_SELECT, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_SELECT);
- if(select_node == NULL)
+ if(select_node == NULL) {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
myhtml_tree_open_elements_pop_until_by_node(tree, select_node, false);
myhtml_tree_reset_insertion_mode_appropriately(tree);
@@ -2763,9 +3083,11 @@ bool myhtml_insertion_mode_in_select(myhtml_tree_t* tree, myhtml_token_node_t* t
case MyHTML_TAG__END_OF_FILE:
return myhtml_insertion_mode_in_body(tree, token);
- default:
+ default: {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
break;
+ }
}
}
@@ -2787,10 +3109,15 @@ bool myhtml_insertion_mode_in_select_in_table(myhtml_tree_t* tree, myhtml_token_
case MyHTML_TAG_TH:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_node_t* some_node = myhtml_tree_element_in_scope(tree, token->tag_id, MyHTML_NAMESPACE_HTML, MyHTML_TAG_CATEGORIES_SCOPE_TABLE);
- if(some_node == NULL)
+ if(some_node == NULL) {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_OPEN_NOT_FOUND ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_SELECT, MyHTML_NAMESPACE_HTML, false);
myhtml_tree_reset_insertion_mode_appropriately(tree);
@@ -2815,6 +3142,8 @@ bool myhtml_insertion_mode_in_select_in_table(myhtml_tree_t* tree, myhtml_token_
case MyHTML_TAG_TH:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION LEVEL:ERROR */
+
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_SELECT, MyHTML_NAMESPACE_HTML, false);
myhtml_tree_reset_insertion_mode_appropriately(tree);
@@ -2837,8 +3166,12 @@ bool myhtml_insertion_mode_in_template(myhtml_tree_t* tree, myhtml_token_node_t*
case MyHTML_TAG_TEMPLATE:
return myhtml_insertion_mode_in_body(tree, token);
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
}
}
else {
@@ -2904,6 +3237,8 @@ bool myhtml_insertion_mode_in_template(myhtml_tree_t* tree, myhtml_token_node_t*
}
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TAG STATUS:ELEMENT_NOT_CLOSED LEVEL:ERROR TAG_ID:MyHTML_TAG_TEMPLATE NS:MyHTML_NAMESPACE_HTML */
+
myhtml_tree_open_elements_pop_until_by_node(tree, node, false);
myhtml_tree_active_formatting_up_to_last_marker(tree);
myhtml_tree_template_insertion_pop(tree);
@@ -2933,6 +3268,8 @@ bool myhtml_insertion_mode_after_body(myhtml_tree_t* tree, myhtml_token_node_t*
{
if(tree->fragment) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_NO_EXPECTED ACTION:IGNORE LEVEL:ERROR */
+
break;
}
@@ -2940,9 +3277,13 @@ bool myhtml_insertion_mode_after_body(myhtml_tree_t* tree, myhtml_token_node_t*
break;
}
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY LEVEL:ERROR */
+
tree->insert_mode = MyHTML_INSERTION_MODE_IN_BODY;
return true;
+ }
}
}
else {
@@ -2973,15 +3314,17 @@ bool myhtml_insertion_mode_after_body(myhtml_tree_t* tree, myhtml_token_node_t*
node->token = token;
node->ns = adjusted_location->ns;
- myhtml_tree_node_add_child(tree, adjusted_location, node);
+ myhtml_tree_node_add_child(adjusted_location, node);
break;
}
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
// parse error
- break;
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
+ break;
+ }
case MyHTML_TAG_HTML:
return myhtml_insertion_mode_in_body(tree, token);
@@ -2989,9 +3332,13 @@ bool myhtml_insertion_mode_after_body(myhtml_tree_t* tree, myhtml_token_node_t*
myhtml_rules_stop_parsing(tree);
break;
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY LEVEL:ERROR */
+
tree->insert_mode = MyHTML_INSERTION_MODE_IN_BODY;
return true;
+ }
}
}
@@ -3007,9 +3354,12 @@ bool myhtml_insertion_mode_in_frameset(myhtml_tree_t* tree, myhtml_token_node_t*
{
myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
- if(current_node == tree->document->child)
+ if(current_node == tree->document->child) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_NO_EXPECTED ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
myhtml_tree_open_elements_pop(tree);
@@ -3025,8 +3375,12 @@ bool myhtml_insertion_mode_in_frameset(myhtml_tree_t* tree, myhtml_token_node_t*
break;
}
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
}
}
else {
@@ -3040,6 +3394,8 @@ bool myhtml_insertion_mode_in_frameset(myhtml_tree_t* tree, myhtml_token_node_t*
}
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
+
myhtml_token_node_wait_for_done(token);
myhtml_string_stay_only_whitespace(&token->str);
@@ -3055,9 +3411,12 @@ bool myhtml_insertion_mode_in_frameset(myhtml_tree_t* tree, myhtml_token_node_t*
break;
}
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
case MyHTML_TAG_HTML:
return myhtml_insertion_mode_in_body(tree, token);
@@ -3080,14 +3439,19 @@ bool myhtml_insertion_mode_in_frameset(myhtml_tree_t* tree, myhtml_token_node_t*
if(current_node == tree->document->child) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
}
myhtml_rules_stop_parsing(tree);
break;
}
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
}
}
@@ -3103,8 +3467,12 @@ bool myhtml_insertion_mode_after_frameset(myhtml_tree_t* tree, myhtml_token_node
tree->insert_mode = MyHTML_INSERTION_MODE_AFTER_AFTER_FRAMESET;
break;
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
}
}
else {
@@ -3118,6 +3486,8 @@ bool myhtml_insertion_mode_after_frameset(myhtml_tree_t* tree, myhtml_token_node
}
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
+
myhtml_token_node_wait_for_done(token);
myhtml_string_stay_only_whitespace(&token->str);
@@ -3133,9 +3503,11 @@ bool myhtml_insertion_mode_after_frameset(myhtml_tree_t* tree, myhtml_token_node
break;
}
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
break;
+ }
case MyHTML_TAG_HTML:
return myhtml_insertion_mode_in_body(tree, token);
@@ -3147,8 +3519,12 @@ bool myhtml_insertion_mode_after_frameset(myhtml_tree_t* tree, myhtml_token_node
myhtml_rules_stop_parsing(tree);
break;
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
}
}
@@ -3174,7 +3550,7 @@ bool myhtml_insertion_mode_after_after_body(myhtml_tree_t* tree, myhtml_token_no
node->token = token;
node->ns = adjusted_location->ns;
- myhtml_tree_node_add_child(tree, adjusted_location, node);
+ myhtml_tree_node_add_child(adjusted_location, node);
break;
}
@@ -3183,6 +3559,9 @@ bool myhtml_insertion_mode_after_after_body(myhtml_tree_t* tree, myhtml_token_no
if(token->type & MyHTML_TOKEN_TYPE_WHITESPACE)
return myhtml_insertion_mode_in_body(tree, token);
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+
tree->insert_mode = MyHTML_INSERTION_MODE_IN_BODY;
return true;
}
@@ -3195,9 +3574,13 @@ bool myhtml_insertion_mode_after_after_body(myhtml_tree_t* tree, myhtml_token_no
myhtml_rules_stop_parsing(tree);
break;
- default:
+ default: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+
tree->insert_mode = MyHTML_INSERTION_MODE_IN_BODY;
return true;
+ }
}
}
@@ -3207,6 +3590,9 @@ bool myhtml_insertion_mode_after_after_body(myhtml_tree_t* tree, myhtml_token_no
bool myhtml_insertion_mode_after_after_frameset(myhtml_tree_t* tree, myhtml_token_node_t* token)
{
if(token->type & MyHTML_TOKEN_TYPE_CLOSE) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY LEVEL:ERROR */
+
return false;
}
else {
@@ -3221,7 +3607,7 @@ bool myhtml_insertion_mode_after_after_frameset(myhtml_tree_t* tree, myhtml_toke
node->token = token;
node->ns = adjusted_location->ns;
- myhtml_tree_node_add_child(tree, adjusted_location, node);
+ myhtml_tree_node_add_child(adjusted_location, node);
break;
}
@@ -3235,6 +3621,8 @@ bool myhtml_insertion_mode_after_after_frameset(myhtml_tree_t* tree, myhtml_toke
return myhtml_insertion_mode_in_body(tree, new_token);
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY LEVEL:ERROR */
+
break;
}
@@ -3249,9 +3637,11 @@ bool myhtml_insertion_mode_after_after_frameset(myhtml_tree_t* tree, myhtml_toke
case MyHTML_TAG_NOFRAMES:
return myhtml_insertion_mode_in_head(tree, token);
- default:
+ default: {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_UNNECESSARY LEVEL:ERROR */
break;
+ }
}
}
@@ -3262,6 +3652,8 @@ bool myhtml_insertion_mode_in_foreign_content_end_other(myhtml_tree_t* tree, myh
{
if(current_node->tag_id != token->tag_id) {
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:token HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:token->tag_id NEED_NS:MyHTML_NAMESPACE_HTML */
}
if(tree->open_elements->length)
@@ -3343,6 +3735,9 @@ bool myhtml_insertion_mode_in_foreign_content(myhtml_tree_t* tree, myhtml_token_
case MyHTML_TAG__TEXT:
{
if(token->type & MyHTML_TOKEN_TYPE_NULL) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:NULL_CHAR LEVEL:ERROR */
+
myhtml_token_node_wait_for_done(token);
myhtml_token_set_replacement_character_for_null_token(tree, token);
}
@@ -3359,8 +3754,12 @@ bool myhtml_insertion_mode_in_foreign_content(myhtml_tree_t* tree, myhtml_token_
myhtml_tree_node_insert_comment(tree, token, NULL);
break;
- case MyHTML_TAG__DOCTYPE:
+ case MyHTML_TAG__DOCTYPE: {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_WRONG_LOCATION ACTION:IGNORE LEVEL:ERROR */
+
break;
+ }
case MyHTML_TAG_B:
case MyHTML_TAG_BIG:
@@ -3409,6 +3808,8 @@ bool myhtml_insertion_mode_in_foreign_content(myhtml_tree_t* tree, myhtml_token_
case MyHTML_TAG_FONT:
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+
if(token->tag_id == MyHTML_TAG_FONT)
{
myhtml_token_node_wait_for_done(token);
diff --git a/source/myhtml/serialization.c b/source/myhtml/serialization.c
index 9685cd0..75003f3 100755
--- a/source/myhtml/serialization.c
+++ b/source/myhtml/serialization.c
@@ -28,23 +28,23 @@
static void myhtml_serialization_append(const char* str, size_t size, myhtml_callback_serialize_f callback, void *ptr);
static void myhtml_serialization_append_attr(const char* str, size_t length, myhtml_callback_serialize_f callback, void *ptr);
static void myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, myhtml_callback_serialize_f callback, void *ptr);
-static void myhtml_serialization_node_append_text_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void *ptr);
-static void myhtml_serialization_node_append_close(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void *ptr);
+static void myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void *ptr);
+static void myhtml_serialization_node_append_close(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void *ptr);
/**
* See the function myhtml_serialization_tree_buffer
*/
-bool myhtml_serialization(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str)
+bool myhtml_serialization(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str)
{
- return myhtml_serialization_tree_buffer(tree, scope_node, str);
+ return myhtml_serialization_tree_buffer(scope_node, str);
}
/**
* See the function myhtml_serialization_node_buffer
*/
-bool myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str)
+bool myhtml_serialization_node(myhtml_tree_node_t* node, myhtml_string_raw_t* str)
{
- return myhtml_serialization_node_buffer(tree, node, str);
+ return myhtml_serialization_node_buffer(node, str);
}
/**
@@ -55,32 +55,27 @@ bool myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, my
* @param ptr user-supplied pointer
* @return bool
*/
-bool myhtml_serialization_tree_callback(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_callback_serialize_f callback, void *ptr)
+bool myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, myhtml_callback_serialize_f callback, void *ptr)
{
myhtml_tree_node_t* node = scope_node;
- if(node == tree->document) {
- if (!tree->document) return false;
- node = tree->document->child;
- }
-
while(node) {
- if(!myhtml_serialization_node_callback(tree, node, callback, ptr)) return false;
+ if(!myhtml_serialization_node_callback(node, callback, ptr)) return false;
if(node->child)
node = node->child;
else {
while(node != scope_node && node->next == NULL) {
- myhtml_serialization_node_append_close(tree, node, callback, ptr);
+ myhtml_serialization_node_append_close(node, callback, ptr);
node = node->parent;
}
if(node == scope_node) {
- if(node != tree->document) myhtml_serialization_node_append_close(tree, node, callback, ptr);
+ if(node != node->tree->document) myhtml_serialization_node_append_close(node, callback, ptr);
break;
}
- myhtml_serialization_node_append_close(tree, node, callback, ptr);
+ myhtml_serialization_node_append_close(node, callback, ptr);
node = node->next;
}
}
@@ -96,11 +91,11 @@ bool myhtml_serialization_tree_callback(myhtml_tree_t* tree, myhtml_tree_node_t*
* @param ptr user-supplied pointer
* @return bool
*/
-bool myhtml_serialization_node_callback(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void *ptr)
+bool myhtml_serialization_node_callback(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void *ptr)
{
switch (node->tag_id) {
case MyHTML_TAG__TEXT: {
- myhtml_serialization_node_append_text_node(tree, node, callback, ptr);
+ myhtml_serialization_node_append_text_node(node, callback, ptr);
break;
}
case MyHTML_TAG__COMMENT: {
@@ -125,11 +120,11 @@ bool myhtml_serialization_node_callback(myhtml_tree_t* tree, myhtml_tree_node_t*
}
default: {
size_t length;
- const char *tag = myhtml_tag_name_by_id(tree, node->tag_id, &length);
+ const char *tag = myhtml_tag_name_by_id(node->tree, node->tag_id, &length);
callback("<", 1, ptr);
callback(tag, length, ptr);
- if(node->token) myhtml_serialization_attributes(tree, node->token->attr_first, callback, ptr);
+ if(node->token) myhtml_serialization_attributes(node->tree, node->token->attr_first, callback, ptr);
callback(">", 1, ptr);
break;
}
@@ -194,14 +189,14 @@ void myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* at
* @param callback
* @param ptr
*/
-void myhtml_serialization_node_append_close(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void* ptr)
+void myhtml_serialization_node_append_close(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void* ptr)
{
if(node->tag_id != MyHTML_TAG__TEXT &&
node->tag_id != MyHTML_TAG__COMMENT &&
node->tag_id != MyHTML_TAG__DOCTYPE)
{
size_t length;
- const char *tag = myhtml_tag_name_by_id(tree, node->tag_id, &length);
+ const char *tag = myhtml_tag_name_by_id(node->tree, node->tag_id, &length);
callback("</", 2, ptr);
callback(tag, length, ptr);
@@ -216,7 +211,7 @@ void myhtml_serialization_node_append_close(myhtml_tree_t* tree, myhtml_tree_nod
* @param callback
* @param ptr
*/
-void myhtml_serialization_node_append_text_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void* ptr)
+void myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void* ptr)
{
if(node->token == NULL || node->token->str.data == NULL) return;
@@ -397,7 +392,7 @@ void myhtml_serialization_concatenate(const char* data, size_t length, void *ptr
* @param str
* @return bool
*/
-bool myhtml_serialization_tree_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str) {
+bool myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str) {
// we need an output variable
if(str == NULL) return false;
@@ -418,7 +413,7 @@ bool myhtml_serialization_tree_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* s
if (setjmp(leap) == 0)
{
// serialize the entire tree
- return myhtml_serialization_tree_callback(tree, scope_node, myhtml_serialization_concatenate, str);
+ return myhtml_serialization_tree_callback(scope_node, myhtml_serialization_concatenate, str);
}
else
{
@@ -434,7 +429,7 @@ bool myhtml_serialization_tree_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* s
* @param str
* @return bool
*/
-bool myhtml_serialization_node_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str) {
+bool myhtml_serialization_node_buffer(myhtml_tree_node_t* node, myhtml_string_raw_t* str) {
// we need an output variable
if(str == NULL) return false;
@@ -455,7 +450,7 @@ bool myhtml_serialization_node_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* n
if (setjmp(leap) == 0)
{
// pass on
- return myhtml_serialization_node_callback(tree, node, myhtml_serialization_concatenate, str);
+ return myhtml_serialization_node_callback(node, myhtml_serialization_concatenate, str);
}
else
{
diff --git a/source/myhtml/serialization.h b/source/myhtml/serialization.h
index 0582ffe..e651769 100644
--- a/source/myhtml/serialization.h
+++ b/source/myhtml/serialization.h
@@ -30,19 +30,16 @@
extern "C" {
#endif
-// callback to be implemented by the user
-typedef void (*myhtml_callback_serialize_f)(const char* buffer, size_t size, void* ctx);
-
// the serialization functions
-bool myhtml_serialization_tree_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
-bool myhtml_serialization_node_buffer(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str);
-bool myhtml_serialization_tree_callback(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_callback_serialize_f callback, void* ptr);
-bool myhtml_serialization_node_callback(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void* ptr);
+bool myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
+bool myhtml_serialization_node_buffer(myhtml_tree_node_t* node, myhtml_string_raw_t* str);
+bool myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, myhtml_callback_serialize_f callback, void* ptr);
+bool myhtml_serialization_node_callback(myhtml_tree_node_t* node, myhtml_callback_serialize_f callback, void* ptr);
// in versuon 1.0.3 this is in public api.
// Need to set deprecated?
-bool myhtml_serialization(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
-bool myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str);
+bool myhtml_serialization(myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
+bool myhtml_serialization_node(myhtml_tree_node_t* node, myhtml_string_raw_t* str);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/source/myhtml/tag.c b/source/myhtml/tag.c
index de5daab..65a8f56 100755
--- a/source/myhtml/tag.c
+++ b/source/myhtml/tag.c
@@ -36,10 +36,9 @@ myhtml_status_t myhtml_tag_init(myhtml_tree_t *tree, myhtml_tag_t *tags)
mcsimple_init(tags->mcsimple_context, 128, 1024, sizeof(myhtml_tag_context_t));
tags->mchar_node = mchar_async_node_add(tree->mchar);
- tags->tree = mctree_create(32);
+ tags->tree = mctree_create(2);
tags->mchar = tree->mchar;
tags->tags_count = MyHTML_TAG_LAST_ENTRY;
- tags->mcobject_tag_index = NULL;
myhtml_tag_clean(tags);
@@ -70,168 +69,6 @@ myhtml_tag_t * myhtml_tag_destroy(myhtml_tag_t* tags)
return NULL;
}
-myhtml_tag_index_t * myhtml_tag_index_create(void)
-{
- return (myhtml_tag_index_t*)myhtml_calloc(1, sizeof(myhtml_tag_index_t));
-}
-
-myhtml_status_t myhtml_tag_index_init(myhtml_tag_t* tags, myhtml_tag_index_t* idx_tags)
-{
- /* Tags Index */
- tags->mcobject_tag_index = mcobject_create();
- if(tags->mcobject_tag_index == NULL)
- return MyHTML_STATUS_TAGS_ERROR_INDEX_MEMORY_ALLOCATION;
-
- myhtml_status_t status = mcobject_init(tags->mcobject_tag_index, 4096, sizeof(myhtml_incoming_buffer_t));
- if(status)
- return status;
-
- idx_tags->tags_size = tags->tags_count + 128;
- idx_tags->tags_length = 0;
- idx_tags->tags = (myhtml_tag_index_entry_t*)myhtml_calloc(idx_tags->tags_size, sizeof(myhtml_tag_index_entry_t));
-
- if(idx_tags->tags == NULL)
- return MyHTML_STATUS_TAGS_ERROR_INDEX_MEMORY_ALLOCATION;
-
- return MyHTML_STATUS_OK;
-}
-
-void myhtml_tag_index_clean(myhtml_tag_t* tags, myhtml_tag_index_t* index_tags)
-{
- mcobject_clean(tags->mcobject_tag_index);
- memset(index_tags->tags, 0, sizeof(myhtml_tag_index_entry_t) * index_tags->tags_size);
-}
-
-myhtml_tag_index_t * myhtml_tag_index_destroy(myhtml_tag_t* tags, myhtml_tag_index_t* index_tags)
-{
- mcobject_destroy(tags->mcobject_tag_index, true);
-
- if(index_tags == NULL)
- return NULL;
-
- if(index_tags->tags) {
- myhtml_free(index_tags->tags);
- index_tags->tags = NULL;
- }
-
- myhtml_free(index_tags);
-
- return NULL;
-}
-
-void myhtml_tag_index_check_size(myhtml_tag_t* tags, myhtml_tag_index_t* index_tags, myhtml_tag_id_t tag_id)
-{
- if(tag_id >= index_tags->tags_size) {
- size_t new_size = tag_id + 128;
-
- myhtml_tag_index_entry_t *index_entries = (myhtml_tag_index_entry_t*)myhtml_realloc(index_tags->tags,
- sizeof(myhtml_tag_index_entry_t) *
- new_size);
-
- if(index_entries) {
- index_tags->tags = index_entries;
-
- memset(&index_tags->tags[index_tags->tags_size], 0, sizeof(myhtml_tag_index_entry_t)
- * (new_size - index_tags->tags_size));
-
- index_tags->tags_size = new_size;
- }
- else {
- // TODO: error
- }
- }
-}
-
-myhtml_status_t myhtml_tag_index_add(myhtml_tag_t* tags, myhtml_tag_index_t* idx_tags, myhtml_tree_node_t* node)
-{
- myhtml_tag_index_check_size(tags, idx_tags, node->tag_id);
-
- myhtml_tag_index_entry_t* tag = &idx_tags->tags[node->tag_id];
-
- myhtml_status_t status;
- myhtml_tag_index_node_t* new_node = mcobject_malloc(tags->mcobject_tag_index, &status);
-
- if(status)
- return status;
-
- myhtml_tag_index_clean_node(new_node);
-
- if(tag->first == NULL) {
- tag->first = new_node;
- new_node->prev = NULL;
- }
- else {
- tag->last->next = new_node;
- new_node->prev = tag->last;
- }
-
- new_node->next = NULL;
- new_node->node = node;
-
- tag->last = new_node;
-
- tag->count++;
-
- return MyHTML_STATUS_OK;
-}
-
-myhtml_tag_index_entry_t * myhtml_tag_index_entry(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id)
-{
- if(tag_index->tags_size > tag_id)
- return &tag_index->tags[tag_id];
-
- return NULL;
-}
-
-size_t myhtml_tag_index_entry_count(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id)
-{
- if(tag_index->tags_size > tag_id)
- return tag_index->tags[tag_id].count;
-
- return 0;
-}
-
-myhtml_tag_index_node_t * myhtml_tag_index_first(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id)
-{
- if(tag_index->tags_size > tag_id)
- return tag_index->tags[tag_id].first;
-
- return NULL;
-}
-
-myhtml_tag_index_node_t * myhtml_tag_index_last(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id)
-{
- if(tag_index->tags_size > tag_id)
- return tag_index->tags[tag_id].last;
-
- return NULL;
-}
-
-myhtml_tag_index_node_t * myhtml_tag_index_next(myhtml_tag_index_node_t *index_node)
-{
- if(index_node)
- return index_node->next;
-
- return NULL;
-}
-
-myhtml_tag_index_node_t * myhtml_tag_index_prev(myhtml_tag_index_node_t *index_node)
-{
- if(index_node)
- return index_node->prev;
-
- return NULL;
-}
-
-
-myhtml_tree_node_t * myhtml_tag_index_tree_node(myhtml_tag_index_node_t *index_node)
-{
- if(index_node)
- return index_node->node;
-
- return NULL;
-}
-
myhtml_tag_id_t myhtml_tag_add(myhtml_tag_t* tags, const char* key, size_t key_size,
enum myhtml_tokenizer_state data_parser, bool to_lcase)
{
diff --git a/source/myhtml/tag.h b/source/myhtml/tag.h
index 3437cb9..3814ef9 100644
--- a/source/myhtml/tag.h
+++ b/source/myhtml/tag.h
@@ -60,25 +60,6 @@ extern "C" {
#define myhtml_tag_index_clean_node(index_node) \
memset(index_node, 0, sizeof(myhtml_tag_index_node_t));
-struct myhtml_tag_index_entry {
- myhtml_tag_index_node_t *first;
- myhtml_tag_index_node_t *last;
- size_t count;
-};
-
-struct myhtml_tag_index_node {
- myhtml_tag_index_node_t *next;
- myhtml_tag_index_node_t *prev;
-
- myhtml_tree_node_t* node;
-};
-
-struct myhtml_tag_index {
- myhtml_tag_index_entry_t* tags;
- size_t tags_length;
- size_t tags_size;
-};
-
struct myhtml_tag_context {
myhtml_tag_id_t id;
@@ -104,8 +85,7 @@ struct myhtml_tag {
size_t tags_count;
size_t mchar_node;
- mchar_async_t *mchar;
- mcobject_t *mcobject_tag_index;
+ mchar_async_t *mchar;
};
myhtml_tag_t * myhtml_tag_create(void);
@@ -119,21 +99,6 @@ myhtml_tag_id_t myhtml_tag_add(myhtml_tag_t* tags, const char* key, size_t key_s
void myhtml_tag_set_category(myhtml_tag_t* tags, myhtml_tag_id_t tag_idx,
enum myhtml_namespace ns, enum myhtml_tag_categories cats);
-myhtml_tag_index_t * myhtml_tag_index_create(void);
-myhtml_status_t myhtml_tag_index_init(myhtml_tag_t* tags, myhtml_tag_index_t* tag_index);
-void myhtml_tag_index_clean(myhtml_tag_t* tags, myhtml_tag_index_t* tag_index);
-myhtml_tag_index_t * myhtml_tag_index_destroy(myhtml_tag_t* tags, myhtml_tag_index_t* tag_index);
-
-myhtml_status_t myhtml_tag_index_add(myhtml_tag_t* tags, myhtml_tag_index_t* tag_index, myhtml_tree_node_t* node);
-myhtml_tag_index_entry_t * myhtml_tag_index_entry(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id);
-myhtml_tag_index_node_t * myhtml_tag_index_first(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id);
-myhtml_tag_index_node_t * myhtml_tag_index_last(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id);
-myhtml_tag_index_node_t * myhtml_tag_index_next(myhtml_tag_index_node_t *index_node);
-myhtml_tag_index_node_t * myhtml_tag_index_prev(myhtml_tag_index_node_t *index_node);
-myhtml_tree_node_t * myhtml_tag_index_tree_node(myhtml_tag_index_node_t *index_node);
-
-size_t myhtml_tag_index_entry_count(myhtml_tag_index_t* tag_index, myhtml_tag_id_t tag_id);
-
const myhtml_tag_context_t * myhtml_tag_get_by_id(myhtml_tag_t* tags, myhtml_tag_id_t tag_id);
const myhtml_tag_context_t * myhtml_tag_get_by_name(myhtml_tag_t* tags, const char* name, size_t length);
diff --git a/source/myhtml/tag_init.c b/source/myhtml/tag_init.c
index cc37446..c66c266 100755
--- a/source/myhtml/tag_init.c
+++ b/source/myhtml/tag_init.c
@@ -705,7 +705,7 @@ static const myhtml_tag_context_t myhtml_tag_base_list[MyHTML_TAG_LAST_ENTRY] =
},
{MyHTML_TAG_MENUITEM, "menuitem", 8, MyHTML_TOKENIZER_STATE_DATA,
{
- MyHTML_TAG_CATEGORIES_ORDINARY, MyHTML_TAG_CATEGORIES_SPECIAL,
+ MyHTML_TAG_CATEGORIES_ORDINARY, MyHTML_TAG_CATEGORIES_ORDINARY,
MyHTML_TAG_CATEGORIES_ORDINARY, MyHTML_TAG_CATEGORIES_ORDINARY,
MyHTML_TAG_CATEGORIES_ORDINARY, MyHTML_TAG_CATEGORIES_ORDINARY,
MyHTML_TAG_CATEGORIES_ORDINARY
diff --git a/source/myhtml/thread.c b/source/myhtml/thread.c
index 685d6ec..94e277a 100644
--- a/source/myhtml/thread.c
+++ b/source/myhtml/thread.c
@@ -388,6 +388,7 @@ mythread_id_t _myhread_create_stream_raw(mythread_t *mythread, mythread_work_f w
thr->data.id = mythread->pth_list_length;
thr->data.t_count = total_count;
thr->data.opt = opt;
+ thr->data.status = 0;
myhtml_status_t m_status = myhtml_hread_mutex_create(mythread, &thr->data, 0);
@@ -978,6 +979,17 @@ void mythread_suspend_all(mythread_t *mythread)
}
}
+unsigned int mythread_check_status(mythread_t *mythread)
+{
+ for (size_t idx = mythread->pth_list_root; idx < mythread->pth_list_size; idx++) {
+ if(mythread->pth_list[idx].data.status) {
+ return mythread->pth_list[idx].data.status;
+ }
+ }
+
+ return MyHTML_STATUS_OK;
+}
+
bool mythread_function_see_for_all_done(mythread_queue_list_t *queue_list, size_t thread_id)
{
size_t done_count = 0;
diff --git a/source/myhtml/thread.h b/source/myhtml/thread.h
index 07d8fba..208f6df 100644
--- a/source/myhtml/thread.h
+++ b/source/myhtml/thread.h
@@ -84,6 +84,7 @@ struct mythread_context {
volatile mythread_thread_opt_t opt;
mythread_t *mythread;
+ unsigned int status;
};
struct mythread_list {
@@ -147,6 +148,7 @@ void mythread_stop_all(mythread_t *mythread);
void mythread_queue_wait_all_for_done(mythread_t *mythread);
void mythread_resume_all(mythread_t *mythread);
void mythread_suspend_all(mythread_t *mythread);
+unsigned int mythread_check_status(mythread_t *mythread);
// queue
struct mythread_queue_node {
diff --git a/source/myhtml/token.c b/source/myhtml/token.c
index 95c63d0..1536764 100644
--- a/source/myhtml/token.c
+++ b/source/myhtml/token.c
@@ -162,6 +162,16 @@ myhtml_token_t * myhtml_token_destroy(myhtml_token_t* token)
return NULL;
}
+myhtml_token_node_t * myhtml_token_node_create(myhtml_token_t* token, size_t async_node_id)
+{
+ myhtml_token_node_t *token_node = (myhtml_token_node_t*)mcobject_async_malloc(token->nodes_obj, async_node_id, NULL);
+ if(token_node == NULL)
+ return NULL;
+
+ myhtml_token_node_clean(token_node);
+ return token_node;
+}
+
void myhtml_token_node_clean(myhtml_token_node_t* node)
{
memset(node, 0, sizeof(myhtml_token_node_t));
@@ -170,6 +180,16 @@ void myhtml_token_node_clean(myhtml_token_node_t* node)
myhtml_string_clean_all(&node->str);
}
+myhtml_token_attr_t * myhtml_token_attr_create(myhtml_token_t* token, size_t async_node_id)
+{
+ myhtml_token_attr_t *attr_node = mcobject_async_malloc(token->attr_obj, async_node_id, NULL);
+ if(attr_node == NULL)
+ return NULL;
+
+ myhtml_token_attr_clean(attr_node);
+ return attr_node;
+}
+
void myhtml_token_attr_clean(myhtml_token_attr_t* attr)
{
memset(attr, 0, sizeof(myhtml_token_attr_t));
@@ -249,15 +269,19 @@ myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_toke
return NULL;
myhtml_tree_t* tree = token->tree;
+ myhtml_token_node_t* new_node = myhtml_token_node_create(token, token_thread_idx);
- myhtml_token_node_t* new_node = mcobject_async_malloc(token->nodes_obj, token_thread_idx, NULL);
+ if(new_node == NULL)
+ return NULL;
- new_node->tag_id = node->tag_id;
- new_node->type = node->type;
- new_node->attr_first = NULL;
- new_node->attr_last = NULL;
- new_node->raw_begin = node->raw_begin;
- new_node->raw_length = node->raw_length;
+ new_node->tag_id = node->tag_id;
+ new_node->type = node->type;
+ new_node->attr_first = NULL;
+ new_node->attr_last = NULL;
+ new_node->raw_begin = node->raw_begin;
+ new_node->raw_length = node->raw_length;
+ new_node->element_begin = node->element_begin;
+ new_node->element_length = node->element_length;
myhtml_string_init(tree->mchar, tree->mchar_node_id, &new_node->str, node->str.size);
myhtml_token_node_attr_copy(token, node, new_node, attr_thread_idx);
diff --git a/source/myhtml/token.h b/source/myhtml/token.h
index 84a4517..0f3de04 100644
--- a/source/myhtml/token.h
+++ b/source/myhtml/token.h
@@ -37,16 +37,8 @@ extern "C" {
#include "myhtml/utils/mchar_async.h"
#include "myhtml/utils/mcsync.h"
-#define myhtml_token_attr_malloc(token, attr_node, thread_idx) \
- attr_node = mcobject_async_malloc(token->attr_obj, thread_idx, NULL); \
- myhtml_token_attr_clean(attr_node)
-
#define myhtml_token_node_set_done(token_node) token_node->type |= MyHTML_TOKEN_TYPE_DONE
-#define myhtml_token_node_malloc(token, token_node, thread_idx) \
- token_node = (myhtml_token_node_t*)mcobject_async_malloc(token->nodes_obj, thread_idx, NULL); \
- myhtml_token_node_clean(token_node)
-
struct myhtml_token_replacement_entry {
char* from;
size_t from_size;
@@ -128,11 +120,15 @@ myhtml_string_t * myhtml_token_node_string(myhtml_token_node_t *token_node);
bool myhtml_token_node_is_close(myhtml_token_node_t *token_node);
bool myhtml_token_node_is_close_self(myhtml_token_node_t *token_node);
+myhtml_token_node_t * myhtml_token_node_create(myhtml_token_t* token, size_t async_node_id);
void myhtml_token_node_clean(myhtml_token_node_t* node);
+
+myhtml_token_attr_t * myhtml_token_attr_create(myhtml_token_t* token, size_t async_node_id);
void myhtml_token_attr_clean(myhtml_token_attr_t* attr);
myhtml_token_attr_t * myhtml_token_attr_remove(myhtml_token_node_t* node, myhtml_token_attr_t* attr);
myhtml_token_attr_t * myhtml_token_attr_remove_by_name(myhtml_token_node_t* node, const char* name, size_t name_length);
void myhtml_token_attr_delete_all(myhtml_token_t* token, myhtml_token_node_t* node);
+
void myhtml_token_delete(myhtml_token_t* token, myhtml_token_node_t* node);
void myhtml_token_node_wait_for_done(myhtml_token_node_t* node);
void myhtml_token_set_done(myhtml_token_node_t* node);
diff --git a/source/myhtml/tokenizer.c b/source/myhtml/tokenizer.c
index d9e904b..1740a71 100644
--- a/source/myhtml/tokenizer.c
+++ b/source/myhtml/tokenizer.c
@@ -27,8 +27,10 @@ myhtml_status_t myhtml_tokenizer_set_first_settings(myhtml_tree_t* tree, const c
mythread_queue_node_clean(tree->current_qnode);
tree->current_qnode->tree = tree;
+ tree->current_token_node = myhtml_token_node_create(tree->token, tree->token->mcasync_token_id);
- myhtml_token_node_malloc(tree->token, tree->current_token_node, tree->mcasync_token_id);
+ if(tree->current_token_node == NULL)
+ return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
tree->incoming_buf_first = tree->incoming_buf;
@@ -164,7 +166,11 @@ myhtml_status_t myhtml_tokenizer_end(myhtml_tree_t* tree)
tree->current_token_node->tag_id = MyHTML_TAG__END_OF_FILE;
- myhtml_queue_add(tree, 0, tree->current_token_node);
+ if(myhtml_queue_add(tree, 0, tree->current_token_node) != MyHTML_STATUS_OK) {
+ tree->tokenizer_status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
+ }
+
+ myhtml_status_t status = tree->tokenizer_status;
#ifndef MyHTML_BUILD_WITHOUT_THREADS
@@ -176,22 +182,16 @@ myhtml_status_t myhtml_tokenizer_end(myhtml_tree_t* tree)
if(mythread_queue_list_get_count(tree->myhtml->thread->context) == 0) {
myhtml_tokenizer_pause(tree);
}
+
+ if(status == MyHTML_STATUS_OK)
+ status = mythread_check_status(tree->myhtml->thread);
}
#endif
tree->flags |= MyHTML_TREE_FLAGS_PARSE_END;
-#ifdef DEBUG_MODE
- if(tree->open_elements->length) {
- MyHTML_DEBUG_ERROR("Tokenizer end; Open Elements is %zu", tree->open_elements->length);
- }
- if(tree->active_formatting->length) {
- MyHTML_DEBUG_ERROR("Tokenizer end; Active Formatting Elements is %zu", tree->active_formatting->length);
- }
-#endif
-
- return MyHTML_STATUS_OK;
+ return status;
}
myhtml_tree_node_t * myhtml_tokenizer_fragment_init(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx, enum myhtml_namespace ns)
@@ -217,15 +217,12 @@ myhtml_tree_node_t * myhtml_tokenizer_fragment_init(myhtml_tree_t* tree, myhtml_
}
}
- mcobject_async_status_t mcstatus;
- tree->fragment->token = (myhtml_token_node_t*)mcobject_async_malloc(tree->token->nodes_obj, tree->mcasync_token_id, &mcstatus);
+ tree->fragment->token = myhtml_token_node_create(tree->token, tree->token->mcasync_token_id);
- if(mcstatus)
+ if(tree->fragment->token == NULL)
return NULL;
- myhtml_token_node_clean(tree->fragment->token);
myhtml_token_set_done(tree->fragment->token);
-
tree->token_namespace = tree->fragment->token;
// step 5-7
@@ -324,7 +321,9 @@ myhtml_token_node_t * myhtml_tokenizer_queue_create_text_node_if_need(myhtml_tre
token_node->element_begin = token_node->raw_begin;
token_node->raw_length = token_node->element_length = absolute_html_offset - token_node->raw_begin;
- myhtml_queue_add(tree, tmp_begin, token_node);
+ if(myhtml_queue_add(tree, tmp_begin, token_node) != MyHTML_STATUS_OK) {
+ return NULL;
+ }
return tree->current_token_node;
}
@@ -434,6 +433,9 @@ bool _myhtml_tokenizer_state_andata_end_tag_name(myhtml_tree_t* tree, myhtml_tok
if((token_node->raw_begin - 2) > tmp_begin)
{
+ size_t tmp_element_begin = token_node->element_begin;
+ size_t tmp_raw_begin = token_node->raw_begin;
+
token_node->raw_length = (token_node->raw_begin - 2) - tmp_begin;
token_node->raw_begin = tmp_begin;
token_node->element_begin = tmp_begin;
@@ -442,12 +444,18 @@ bool _myhtml_tokenizer_state_andata_end_tag_name(myhtml_tree_t* tree, myhtml_tok
token_node->type ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
token_node->tag_id = MyHTML_TAG__TEXT;
+ /* TODO: return error */
myhtml_queue_add(tree, *html_offset, token_node);
+
+ /* return true values */
token_node = tree->current_token_node;
+ token_node->element_begin = tmp_element_begin;
+ token_node->raw_begin = tmp_raw_begin;
}
- token_node->tag_id = tree->tmp_tag_id;
- token_node->type |= MyHTML_TOKEN_TYPE_CLOSE;
+ token_node->tag_id = tree->tmp_tag_id;
+ token_node->type |= MyHTML_TOKEN_TYPE_CLOSE;
+ token_node->raw_length = (tree->global_offset + *html_offset) - token_node->raw_begin;
return true;
}
@@ -478,9 +486,12 @@ size_t myhtml_tokenizer_state_rcdata_end_tag_name(myhtml_tree_t* tree, myhtml_to
html_offset++;
token_node = tree->current_token_node;
-
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
tree->tmp_tag_id = 0;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
@@ -605,9 +616,12 @@ size_t myhtml_tokenizer_state_rawtext_end_tag_name(myhtml_tree_t* tree, myhtml_t
html_offset++;
token_node = tree->current_token_node;
-
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
tree->tmp_tag_id = 0;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
@@ -656,7 +670,11 @@ size_t myhtml_tokenizer_state_plaintext(myhtml_tree_t* tree, myhtml_token_node_t
token_node->tag_id = MyHTML_TAG__TEXT;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
- myhtml_queue_add(tree, html_size, token_node);
+
+ if(myhtml_queue_add(tree, html_size, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_size;
}
@@ -688,7 +706,11 @@ size_t myhtml_tokenizer_state_cdata_section(myhtml_tree_t* tree, myhtml_token_no
if(token_node->raw_length) {
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
}
else {
token_node->raw_begin = html_offset + tree->global_offset;
@@ -723,6 +745,9 @@ size_t myhtml_tokenizer_state_data(myhtml_tree_t* tree, myhtml_token_node_t* tok
break;
}
else if(html[html_offset] == '\0' && (token_node->type & MyHTML_TOKEN_TYPE_NULL) == 0) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:TOKENIZER POSITION STATUS:CHAR_NULL LEVEL:ERROR BEGIN:html_offset LENGTH:1 */
+
token_node->type |= MyHTML_TOKEN_TYPE_NULL;
}
else if(token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE &&
@@ -746,6 +771,10 @@ size_t myhtml_tokenizer_state_tag_open(myhtml_tree_t* tree, myhtml_token_node_t*
if(myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyHTML_TOKENIZER_CHAR_A_Z_a_z)
{
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((tree->global_offset + html_offset) - 1), MyHTML_TOKEN_TYPE_DATA);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tree->global_offset + html_offset;
@@ -754,6 +783,10 @@ size_t myhtml_tokenizer_state_tag_open(myhtml_tree_t* tree, myhtml_token_node_t*
else if(html[html_offset] == '!')
{
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((tree->global_offset + html_offset) - 1), MyHTML_TOKEN_TYPE_DATA);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
html_offset++;
token_node->raw_begin = tree->global_offset + html_offset;
@@ -767,13 +800,23 @@ size_t myhtml_tokenizer_state_tag_open(myhtml_tree_t* tree, myhtml_token_node_t*
}
else if(html[html_offset] == '?')
{
+ // parse error
+ /* %EXTERNAL% VALIDATOR:TOKENIZER POSITION STATUS:CHAR_BAD LEVEL:ERROR BEGIN:html_offset LENGTH:1 */
+
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((tree->global_offset + html_offset) - 1), MyHTML_TOKEN_TYPE_DATA);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tree->global_offset + html_offset;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BOGUS_COMMENT;
}
else {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:TOKENIZER POSITION STATUS:NOT_EXPECTED LEVEL:ERROR BEGIN:html_offset LENGTH:1 */
+
token_node->type ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
}
@@ -790,6 +833,10 @@ size_t myhtml_tokenizer_state_end_tag_open(myhtml_tree_t* tree, myhtml_token_nod
if(myhtml_tokenizer_chars_map[ (unsigned char)html[html_offset] ] == MyHTML_TOKENIZER_CHAR_A_Z_a_z)
{
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((tree->global_offset + html_offset) - 2), MyHTML_TOKEN_TYPE_DATA);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tree->global_offset + html_offset;
token_node->type = MyHTML_TOKEN_TYPE_CLOSE;
@@ -798,11 +845,21 @@ size_t myhtml_tokenizer_state_end_tag_open(myhtml_tree_t* tree, myhtml_token_nod
}
else if(html[html_offset] == '>')
{
+ // parse error
+ /* %EXTERNAL% VALIDATOR:TOKENIZER POSITION STATUS:CHAR_BAD LEVEL:ERROR BEGIN:html_offset LENGTH:1 */
+
html_offset++;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
}
else {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:TOKENIZER POSITION STATUS:CHAR_BAD LEVEL:ERROR BEGIN:html_offset LENGTH:1 */
+
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((tree->global_offset + html_offset) - 2), MyHTML_TOKEN_TYPE_DATA);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tree->global_offset + html_offset;
@@ -925,7 +982,11 @@ size_t myhtml_tokenizer_state_tag_name(myhtml_tree_t* tree, myhtml_token_node_t*
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
break;
}
@@ -950,7 +1011,11 @@ size_t myhtml_tokenizer_state_self_closing_start_tag(myhtml_tree_t* tree, myhtml
// TODO: ??????
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
else {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
@@ -979,7 +1044,11 @@ size_t myhtml_tokenizer_state_before_attribute_name(myhtml_tree_t* tree, myhtml_
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
else if(html[html_offset] == '/') {
token_node->type |= MyHTML_TOKEN_TYPE_CLOSE_SELF;
@@ -994,8 +1063,12 @@ size_t myhtml_tokenizer_state_before_attribute_name(myhtml_tree_t* tree, myhtml_
tree->attr_current->raw_value_begin = 0;
tree->attr_current->raw_value_length = 0;
- if(html[html_offset] == '=')
+ if(html[html_offset] == '=') {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:TOKENIZER POSITION STATUS:NOT_EXPECTED LEVEL:ERROR BEGIN:html_offset LENGTH:1 */
+
html_offset++;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_ATTRIBUTE_NAME;
}
@@ -1035,9 +1108,17 @@ size_t myhtml_tokenizer_state_attribute_name(myhtml_tree_t* tree, myhtml_token_n
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
break;
}
@@ -1048,7 +1129,11 @@ size_t myhtml_tokenizer_state_attribute_name(myhtml_tree_t* tree, myhtml_token_n
token_node->type |= MyHTML_TOKEN_TYPE_CLOSE_SELF;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
html_offset++;
break;
@@ -1082,15 +1167,28 @@ size_t myhtml_tokenizer_state_after_attribute_name(myhtml_tree_t* tree, myhtml_t
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
break;
}
else if(html[html_offset] == '"' || html[html_offset] == '\'' || html[html_offset] == '<')
{
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
myhtml_parser_queue_set_attr(tree, token_node)
tree->attr_current->raw_key_begin = (tree->global_offset + html_offset);
@@ -1103,7 +1201,12 @@ size_t myhtml_tokenizer_state_after_attribute_name(myhtml_tree_t* tree, myhtml_t
}
else if(myhtml_whithspace(html[html_offset], !=, &&))
{
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
myhtml_parser_queue_set_attr(tree, token_node)
tree->attr_current->raw_key_begin = (html_offset + tree->global_offset);
@@ -1130,14 +1233,25 @@ size_t myhtml_tokenizer_state_before_attribute_value(myhtml_tree_t* tree, myhtml
while(html_offset < html_size)
{
if(html[html_offset] == '>') {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:TOKENIZER POSITION STATUS:NOT_EXPECTED LEVEL:ERROR BEGIN:html_offset LENGTH:1 */
+
myhtml_tokenizer_set_state(tree, token_node);
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
break;
}
@@ -1180,9 +1294,13 @@ size_t myhtml_tokenizer_state_attribute_value_double_quoted(myhtml_tree_t* tree,
{
tree->attr_current->raw_value_length = (tree->global_offset + html_offset) - tree->attr_current->raw_value_begin;
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
- myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
html_offset++;
break;
@@ -1208,9 +1326,13 @@ size_t myhtml_tokenizer_state_attribute_value_single_quoted(myhtml_tree_t* tree,
{
tree->attr_current->raw_value_length = (tree->global_offset + html_offset) - tree->attr_current->raw_value_begin;
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
- myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
html_offset++;
break;
@@ -1236,12 +1358,19 @@ size_t myhtml_tokenizer_state_attribute_value_unquoted(myhtml_tree_t* tree, myht
tree->attr_current->raw_value_length = (tree->global_offset + html_offset) - tree->attr_current->raw_value_begin;
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
html_offset++;
break;
}
else if(html[html_offset] == '>') {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:TOKENIZER POSITION STATUS:UNSAFE_USE LEVEL:INFO BEGIN:html_offset LENGTH:1 */
+
tree->attr_current->raw_value_length = (tree->global_offset + html_offset) - tree->attr_current->raw_value_begin;
myhtml_tokenizer_set_state(tree, token_node);
@@ -1249,9 +1378,17 @@ size_t myhtml_tokenizer_state_attribute_value_unquoted(myhtml_tree_t* tree, myht
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
break;
}
@@ -1262,6 +1399,35 @@ size_t myhtml_tokenizer_state_attribute_value_unquoted(myhtml_tree_t* tree, myht
return html_offset;
}
+size_t myhtml_tokenizer_state_after_attribute_value_quoted(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
+{
+ if(myhtml_whithspace(html[html_offset], ==, ||)) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
+ html_offset++;
+ }
+ else if(html[html_offset] == '/') {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SELF_CLOSING_START_TAG;
+ html_offset++;
+ }
+ else if(html[html_offset] == '>') {
+ myhtml_tokenizer_set_state(tree, token_node);
+
+ html_offset++;
+
+ token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+ }
+ else {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
+ }
+
+ return html_offset;
+}
+
/////////////////////////////////////////////////////////
//// COMMENT
//// <!--%HERE%
@@ -1281,7 +1447,11 @@ size_t myhtml_tokenizer_state_comment_start(myhtml_tree_t* tree, myhtml_token_no
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
token_node->raw_length = 0;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
return html_offset;
@@ -1310,7 +1480,11 @@ size_t myhtml_tokenizer_state_comment_start_dash(myhtml_tree_t* tree, myhtml_tok
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
token_node->raw_length = 0;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
return html_offset;
@@ -1372,7 +1546,11 @@ size_t myhtml_tokenizer_state_comment_end(myhtml_tree_t* tree, myhtml_token_node
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
}
@@ -1401,7 +1579,11 @@ size_t myhtml_tokenizer_state_comment_end_bang(myhtml_tree_t* tree, myhtml_token
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
else {
html_offset++;
@@ -1409,7 +1591,10 @@ size_t myhtml_tokenizer_state_comment_end_bang(myhtml_tree_t* tree, myhtml_token
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
token_node->raw_length = 0;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
@@ -1444,7 +1629,11 @@ size_t myhtml_tokenizer_state_bogus_comment(myhtml_tree_t* tree, myhtml_token_no
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
break;
@@ -1456,6 +1645,16 @@ size_t myhtml_tokenizer_state_bogus_comment(myhtml_tree_t* tree, myhtml_token_no
return html_offset;
}
+/////////////////////////////////////////////////////////
+//// Parse error
+//// find >
+/////////////////////////////////////////////////////////
+size_t myhtml_tokenizer_state_parse_error_stop(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
+{
+ tree->tokenizer_status = MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION;
+ return html_size;
+}
+
myhtml_status_t myhtml_tokenizer_state_init(myhtml_t* myhtml)
{
myhtml->parse_state_func = (myhtml_tokenizer_state_f*)myhtml_malloc(sizeof(myhtml_tokenizer_state_f) *
@@ -1475,6 +1674,7 @@ myhtml_status_t myhtml_tokenizer_state_init(myhtml_t* myhtml)
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_NAME] = myhtml_tokenizer_state_attribute_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_NAME] = myhtml_tokenizer_state_after_attribute_name;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_VALUE] = myhtml_tokenizer_state_before_attribute_value;
+ myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED] = myhtml_tokenizer_state_after_attribute_value_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED] = myhtml_tokenizer_state_attribute_value_double_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED] = myhtml_tokenizer_state_attribute_value_single_quoted;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_UNQUOTED] = myhtml_tokenizer_state_attribute_value_unquoted;
@@ -1541,6 +1741,9 @@ myhtml_status_t myhtml_tokenizer_state_init(myhtml_t* myhtml)
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN] = myhtml_tokenizer_state_script_data_double_escaped_less_than_sign;
myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END] = myhtml_tokenizer_state_script_data_double_escape_end;
+ myhtml->parse_state_func[MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP] = myhtml_tokenizer_state_parse_error_stop;
+
+
// ***********
// for ends
// *********
@@ -1570,6 +1773,8 @@ myhtml_status_t myhtml_tokenizer_state_init(myhtml_t* myhtml)
+ MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED)] = myhtml_tokenizer_end_state_attribute_value_single_quoted;
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_ATTRIBUTE_VALUE_UNQUOTED)] = myhtml_tokenizer_end_state_attribute_value_unquoted;
+ myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ + MyHTML_TOKENIZER_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED)] = myhtml_tokenizer_end_state_after_attribute_value_quoted;
// for ends comments
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
@@ -1681,6 +1886,10 @@ myhtml_status_t myhtml_tokenizer_state_init(myhtml_t* myhtml)
myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END)] = myhtml_tokenizer_end_state_script_data_double_escape_end;
+ // parse error
+ myhtml->parse_state_func[(MyHTML_TOKENIZER_STATE_LAST_ENTRY
+ + MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP)] = myhtml_tokenizer_end_state_parse_error_stop;
+
return MyHTML_STATUS_OK;
}
diff --git a/source/myhtml/tokenizer.h b/source/myhtml/tokenizer.h
index 87dd361..be1e15f 100644
--- a/source/myhtml/tokenizer.h
+++ b/source/myhtml/tokenizer.h
@@ -34,10 +34,6 @@ extern "C" {
#include "myhtml/tokenizer_doctype.h"
#include "myhtml/tokenizer_script.h"
#include "myhtml/tokenizer_end.h"
-
-#define MyHTML_TOKENIZER_CHAR_OTHER '\000'
-#define MyHTML_TOKENIZER_CHAR_A_Z_a_z '\001'
-#define MyHTML_TOKENIZER_CHAR_WHITESPACE '\002'
#define myhtml_tokenizer_inc_html_offset(offset, size) \
offset++; \
diff --git a/source/myhtml/tokenizer_doctype.c b/source/myhtml/tokenizer_doctype.c
index fb98975..d63036f 100644
--- a/source/myhtml/tokenizer_doctype.c
+++ b/source/myhtml/tokenizer_doctype.c
@@ -51,8 +51,17 @@ size_t myhtml_tokenizer_state_before_doctype_name(myhtml_tree_t* tree, myhtml_to
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
}
@@ -81,8 +90,17 @@ size_t myhtml_tokenizer_state_doctype_name(myhtml_tree_t* tree, myhtml_token_nod
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
@@ -92,7 +110,11 @@ size_t myhtml_tokenizer_state_doctype_name(myhtml_tree_t* tree, myhtml_token_nod
{
tree->attr_current->raw_key_length = (html_offset + tree->global_offset) - tree->attr_current->raw_key_begin;
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_NAME;
@@ -122,7 +144,11 @@ size_t myhtml_tokenizer_state_after_doctype_name(myhtml_tree_t* tree, myhtml_tok
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
return html_offset;
@@ -149,7 +175,11 @@ size_t myhtml_tokenizer_state_custom_after_doctype_name_a_z(myhtml_tree_t* tree,
tree->attr_current->raw_value_begin = token_node->str.length;
tree->attr_current->raw_value_length = 6;
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
@@ -161,7 +191,11 @@ size_t myhtml_tokenizer_state_custom_after_doctype_name_a_z(myhtml_tree_t* tree,
tree->attr_current->raw_value_begin = token_node->str.length;
tree->attr_current->raw_value_length = 6;
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
@@ -205,7 +239,11 @@ size_t myhtml_tokenizer_state_before_doctype_public_identifier(myhtml_tree_t* tr
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
return html_offset;
@@ -231,7 +269,12 @@ size_t myhtml_tokenizer_doctype_public_identifier_dsq(myhtml_tree_t* tree, myhtm
tree->attr_current->raw_value_length = (html_offset + tree->global_offset) - tree->attr_current->raw_value_begin;
myhtml_parser_queue_set_attr(tree, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
@@ -246,13 +289,22 @@ size_t myhtml_tokenizer_doctype_public_identifier_dsq(myhtml_tree_t* tree, myhtm
tree->attr_current->raw_value_length = (html_offset + tree->global_offset) - tree->attr_current->raw_value_begin;
myhtml_parser_queue_set_attr(tree, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
break;
@@ -304,7 +356,11 @@ size_t myhtml_tokenizer_state_after_doctype_public_identifier(myhtml_tree_t* tre
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
return html_offset;
@@ -334,7 +390,12 @@ size_t myhtml_tokenizer_doctype_system_identifier_dsq(myhtml_tree_t* tree, myhtm
tree->attr_current->raw_value_length = (html_offset + tree->global_offset) - tree->attr_current->raw_value_begin;
myhtml_parser_queue_set_attr(tree, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
@@ -349,13 +410,22 @@ size_t myhtml_tokenizer_doctype_system_identifier_dsq(myhtml_tree_t* tree, myhtm
tree->attr_current->raw_value_length = (html_offset + tree->global_offset) - tree->attr_current->raw_value_begin;
myhtml_parser_queue_set_attr(tree, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
break;
@@ -393,7 +463,11 @@ size_t myhtml_tokenizer_state_after_doctype_system_identifier(myhtml_tree_t* tre
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
}
@@ -417,7 +491,11 @@ size_t myhtml_tokenizer_state_bogus_doctype(myhtml_tree_t* tree, myhtml_token_no
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
break;
diff --git a/source/myhtml/tokenizer_end.c b/source/myhtml/tokenizer_end.c
index c4ae160..cca56fc 100644
--- a/source/myhtml/tokenizer_end.c
+++ b/source/myhtml/tokenizer_end.c
@@ -38,7 +38,10 @@ size_t myhtml_tokenizer_end_state_tag_open(myhtml_tree_t* tree, myhtml_token_nod
token_node->raw_length = (html_offset + tree->global_offset) - token_node->raw_begin;
myhtml_check_tag_parser(tree, token_node, html, html_offset);
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
else {
token_node->type ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
@@ -99,7 +102,10 @@ size_t myhtml_tokenizer_end_state_markup_declaration_open(myhtml_tree_t* tree, m
token_node->raw_length = (html_size + tree->global_offset) - token_node->raw_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
}
else {
@@ -109,7 +115,10 @@ size_t myhtml_tokenizer_end_state_markup_declaration_open(myhtml_tree_t* tree, m
token_node->raw_length = (html_size + tree->global_offset) - token_node->raw_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
}
@@ -120,7 +129,11 @@ size_t myhtml_tokenizer_end_state_before_attribute_name(myhtml_tree_t* tree, myh
{
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
return html_offset;
}
@@ -129,7 +142,11 @@ size_t myhtml_tokenizer_end_state_attribute_name(myhtml_tree_t* tree, myhtml_tok
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
tree->attr_current->raw_key_length = (html_offset + tree->global_offset) - tree->attr_current->raw_key_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -138,7 +155,11 @@ size_t myhtml_tokenizer_end_state_after_attribute_name(myhtml_tree_t* tree, myht
{
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
return html_offset;
}
@@ -146,8 +167,16 @@ size_t myhtml_tokenizer_end_state_before_attribute_value(myhtml_tree_t* tree, my
{
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -172,8 +201,30 @@ size_t myhtml_tokenizer_end_state_attribute_value_unquoted(myhtml_tree_t* tree,
tree->attr_current->raw_value_length = (html_offset + tree->global_offset) - tree->attr_current->raw_value_begin;
- myhtml_queue_add(tree, html_offset, token_node);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->token->mcasync_attr_id);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
+ if(tree->attr_current == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
+ return html_offset;
+}
+
+size_t myhtml_tokenizer_end_state_after_attribute_value_quoted(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
+{
+ token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
+
+ token_node->raw_length = ((html_offset + tree->global_offset) - token_node->raw_begin);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -183,7 +234,11 @@ size_t myhtml_tokenizer_end_state_comment_start(myhtml_tree_t* tree, myhtml_toke
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
token_node->raw_length = ((html_offset + tree->global_offset) - token_node->raw_begin);
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -193,7 +248,11 @@ size_t myhtml_tokenizer_end_state_comment_start_dash(myhtml_tree_t* tree, myhtml
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
token_node->raw_length = ((html_offset + tree->global_offset) - token_node->raw_begin);
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -203,7 +262,11 @@ size_t myhtml_tokenizer_end_state_comment(myhtml_tree_t* tree, myhtml_token_node
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
token_node->raw_length = ((html_offset + tree->global_offset) - token_node->raw_begin);
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -216,7 +279,11 @@ size_t myhtml_tokenizer_end_state_comment_end(myhtml_tree_t* tree, myhtml_token_
if(token_node->raw_length > 2) {
token_node->raw_length -= 2;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
return html_offset;
@@ -227,7 +294,11 @@ size_t myhtml_tokenizer_end_state_comment_end_dash(myhtml_tree_t* tree, myhtml_t
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
token_node->raw_length = ((html_offset + tree->global_offset) - token_node->raw_begin);
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -237,7 +308,11 @@ size_t myhtml_tokenizer_end_state_comment_end_bang(myhtml_tree_t* tree, myhtml_t
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
token_node->raw_length = ((html_offset + tree->global_offset) - token_node->raw_begin);
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -247,7 +322,11 @@ size_t myhtml_tokenizer_end_state_bogus_comment(myhtml_tree_t* tree, myhtml_toke
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
token_node->raw_length = ((html_offset + tree->global_offset) - token_node->raw_begin);
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -259,7 +338,10 @@ size_t myhtml_tokenizer_end_state_cdata_section(myhtml_tree_t* tree, myhtml_toke
token_node->raw_length = ((html_offset + tree->global_offset) - token_node->raw_begin);
if(token_node->raw_length) {
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
return html_offset;
@@ -274,7 +356,10 @@ size_t myhtml_tokenizer_end_state_rcdata(myhtml_tree_t* tree, myhtml_token_node_
token_node->tag_id = MyHTML_TAG__TEXT;
token_node->raw_length = (html_size + tree->global_offset) - token_node->raw_begin;
- myhtml_queue_add(tree, 0, token_node);
+ if(myhtml_queue_add(tree, 0, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
return html_offset;
@@ -357,7 +442,11 @@ size_t myhtml_tokenizer_end_state_before_doctype_name(myhtml_tree_t* tree, myhtm
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
tree->compat_mode = MyHTML_TREE_COMPAT_MODE_QUIRKS;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -368,7 +457,10 @@ size_t myhtml_tokenizer_end_state_doctype_name(myhtml_tree_t* tree, myhtml_token
tree->attr_current->raw_key_length = (html_offset + tree->global_offset) - tree->attr_current->raw_key_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -377,7 +469,11 @@ size_t myhtml_tokenizer_end_state_after_doctype_name(myhtml_tree_t* tree, myhtml
{
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
return html_offset;
}
@@ -385,7 +481,11 @@ size_t myhtml_tokenizer_end_state_custom_after_doctype_name_a_z(myhtml_tree_t* t
{
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
return html_offset;
}
@@ -394,7 +494,11 @@ size_t myhtml_tokenizer_end_state_before_doctype_public_identifier(myhtml_tree_t
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
tree->compat_mode = MyHTML_TREE_COMPAT_MODE_QUIRKS;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -409,7 +513,10 @@ size_t myhtml_tokenizer_end_state_doctype_public_identifier_double_quoted(myhtml
tree->attr_current->raw_key_length = (html_offset + tree->global_offset) - tree->attr_current->raw_key_begin;
}
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -426,7 +533,11 @@ size_t myhtml_tokenizer_end_state_after_doctype_public_identifier(myhtml_tree_t*
{
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
return html_offset;
}
@@ -440,7 +551,10 @@ size_t myhtml_tokenizer_end_state_doctype_system_identifier_double_quoted(myhtml
tree->attr_current->raw_key_length = (html_offset + tree->global_offset) - tree->attr_current->raw_key_begin;
}
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
return html_offset;
}
@@ -457,7 +571,11 @@ size_t myhtml_tokenizer_end_state_after_doctype_system_identifier(myhtml_tree_t*
{
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
return html_offset;
}
@@ -465,7 +583,11 @@ size_t myhtml_tokenizer_end_state_bogus_doctype(myhtml_tree_t* tree, myhtml_toke
{
token_node->element_length = (tree->global_offset + html_size) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
+
return html_offset;
}
@@ -613,3 +735,9 @@ size_t myhtml_tokenizer_end_state_script_data_double_escape_end(myhtml_tree_t* t
return html_offset;
}
+size_t myhtml_tokenizer_end_state_parse_error_stop(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size)
+{
+
+
+ return html_size;
+}
diff --git a/source/myhtml/tokenizer_end.h b/source/myhtml/tokenizer_end.h
index 6282083..3d78807 100644
--- a/source/myhtml/tokenizer_end.h
+++ b/source/myhtml/tokenizer_end.h
@@ -43,6 +43,7 @@ size_t myhtml_tokenizer_end_state_before_attribute_value(myhtml_tree_t* tree, my
size_t myhtml_tokenizer_end_state_attribute_value_double_quoted(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
size_t myhtml_tokenizer_end_state_attribute_value_single_quoted(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
size_t myhtml_tokenizer_end_state_attribute_value_unquoted(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
+size_t myhtml_tokenizer_end_state_after_attribute_value_quoted(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
size_t myhtml_tokenizer_end_state_comment_start(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
size_t myhtml_tokenizer_end_state_comment_start_dash(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
size_t myhtml_tokenizer_end_state_comment(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
@@ -94,6 +95,7 @@ size_t myhtml_tokenizer_end_state_script_data_double_escaped_dash(myhtml_tree_t*
size_t myhtml_tokenizer_end_state_script_data_double_escaped_dash_dash(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
size_t myhtml_tokenizer_end_state_script_data_double_escaped_less_than_sign(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
size_t myhtml_tokenizer_end_state_script_data_double_escape_end(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
+size_t myhtml_tokenizer_end_state_parse_error_stop(myhtml_tree_t* tree, myhtml_token_node_t* token_node, const char* html, size_t html_offset, size_t html_size);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/source/myhtml/tokenizer_script.c b/source/myhtml/tokenizer_script.c
index 344a186..f5f473b 100644
--- a/source/myhtml/tokenizer_script.c
+++ b/source/myhtml/tokenizer_script.c
@@ -116,6 +116,10 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
if(myhtml_strncasecmp(tem_name, "script", 6) == 0)
{
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tmp_size;
token_node->raw_length = 6;
@@ -145,6 +149,10 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
if(myhtml_strncasecmp(tem_name, "script", 6) == 0)
{
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tmp_size;
token_node->raw_length = 6;
@@ -174,6 +182,10 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
if(myhtml_strncasecmp(tem_name, "script", 6) == 0)
{
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tmp_size;
token_node->raw_length = 6;
@@ -183,7 +195,11 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
}
@@ -276,6 +292,10 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
if(myhtml_strncasecmp(tem_name, "script", 6) == 0)
{
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tmp_size;
token_node->raw_length = 6;
@@ -305,6 +325,10 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
if(myhtml_strncasecmp(tem_name, "script", 6) == 0)
{
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tmp_size;
token_node->raw_length = 6;
@@ -334,6 +358,10 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
if(myhtml_strncasecmp(tem_name, "script", 6) == 0)
{
token_node = myhtml_tokenizer_queue_create_text_node_if_need(tree, token_node, html, ((html_offset + tree->global_offset) - 8), MyHTML_TOKEN_TYPE_SCRIPT);
+ if(token_node == NULL) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
token_node->raw_begin = tmp_size;
token_node->raw_length = 6;
@@ -345,7 +373,11 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
html_offset++;
token_node->element_length = (tree->global_offset + html_offset) - token_node->element_begin;
- myhtml_queue_add(tree, html_offset, token_node);
+
+ if(myhtml_queue_add(tree, html_offset, token_node) != MyHTML_STATUS_OK) {
+ myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_PARSE_ERROR_STOP;
+ return 0;
+ }
}
else {
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED;
diff --git a/source/myhtml/tree.c b/source/myhtml/tree.c
index 6bf7772..618801e 100644
--- a/source/myhtml/tree.c
+++ b/source/myhtml/tree.c
@@ -30,7 +30,7 @@ myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml)
myhtml_status_t status = MyHTML_STATUS_OK;
tree->myhtml = myhtml;
- tree->token = myhtml_token_create(tree, 4096);
+ tree->token = myhtml_token_create(tree, 512);
if(tree->token == NULL)
return MyHTML_STATUS_TOKENIZER_ERROR_MEMORY_ALLOCATION;
@@ -39,6 +39,7 @@ myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml)
tree->stream_buffer = NULL;
tree->parse_flags = MyHTML_TREE_PARSE_FLAGS_CLEAN;
tree->queue = mythread_queue_create(9182, &status);
+ tree->context = NULL;
tree->callback_before_token = NULL;
tree->callback_after_token = NULL;
@@ -82,11 +83,11 @@ myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml)
if(mcstatus)
return MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE_NODE;
- tree->mcasync_token_id = mcobject_async_node_add(tree->token->nodes_obj, &mcstatus);
+ tree->mcasync_rules_token_id = mcobject_async_node_add(tree->token->nodes_obj, &mcstatus);
if(mcstatus)
return MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE_NODE;
- tree->mcasync_attr_id = mcobject_async_node_add(tree->token->attr_obj, &mcstatus);
+ tree->mcasync_rules_attr_id = mcobject_async_node_add(tree->token->attr_obj, &mcstatus);
if(mcstatus)
return MyHTML_STATUS_TREE_ERROR_MCOBJECT_CREATE_NODE;
@@ -125,8 +126,6 @@ myhtml_status_t myhtml_tree_init(myhtml_tree_t* tree, myhtml_t* myhtml)
tree->tags = myhtml_tag_create();
status = myhtml_tag_init(tree, tree->tags);
- tree->indexes = myhtml_tree_index_create(tree, tree->tags);
-
myhtml_tree_clean(tree);
return status;
@@ -143,8 +142,8 @@ void myhtml_tree_clean(myhtml_tree_t* tree)
#endif /* MyHTML_BUILD_WITHOUT_THREADS */
mcobject_async_node_clean(tree->tree_obj, tree->mcasync_tree_id);
- mcobject_async_node_clean(tree->token->nodes_obj, tree->mcasync_token_id);
- mcobject_async_node_clean(tree->token->attr_obj, tree->mcasync_attr_id);
+ mcobject_async_node_clean(tree->token->nodes_obj, tree->mcasync_rules_token_id);
+ mcobject_async_node_clean(tree->token->attr_obj, tree->mcasync_rules_attr_id);
mchar_async_node_clean(tree->mchar, tree->mchar_node_id);
#ifndef MyHTML_BUILD_WITHOUT_THREADS
@@ -183,6 +182,7 @@ void myhtml_tree_clean(myhtml_tree_t* tree)
tree->global_offset = 0;
tree->current_qnode = NULL;
tree->token_last_done = NULL;
+ tree->tokenizer_status = MyHTML_STATUS_OK;
tree->encoding = MyHTML_ENCODING_UTF_8;
tree->encoding_usereq = MyHTML_ENCODING_DEFAULT;
@@ -194,12 +194,11 @@ void myhtml_tree_clean(myhtml_tree_t* tree)
myhtml_tree_list_clean(tree->other_elements);
myhtml_tree_token_list_clean(tree->token_list);
myhtml_tree_template_insertion_clean(tree);
- myhtml_tree_index_clean(tree, tree->tags);
mcobject_clean(tree->mcobject_incoming_buf);
myhtml_tag_clean(tree->tags);
mythread_queue_clean(tree->queue);
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
}
void myhtml_tree_clean_all(myhtml_tree_t* tree)
@@ -238,6 +237,7 @@ void myhtml_tree_clean_all(myhtml_tree_t* tree)
tree->global_offset = 0;
tree->current_qnode = NULL;
tree->token_last_done = NULL;
+ tree->tokenizer_status = MyHTML_STATUS_OK;
tree->encoding = MyHTML_ENCODING_UTF_8;
tree->encoding_usereq = MyHTML_ENCODING_DEFAULT;
@@ -249,7 +249,6 @@ void myhtml_tree_clean_all(myhtml_tree_t* tree)
myhtml_tree_list_clean(tree->other_elements);
myhtml_tree_token_list_clean(tree->token_list);
myhtml_tree_template_insertion_clean(tree);
- myhtml_tree_index_clean(tree, tree->tags);
mcobject_clean(tree->mcobject_incoming_buf);
myhtml_tag_clean(tree->tags);
@@ -257,7 +256,7 @@ void myhtml_tree_clean_all(myhtml_tree_t* tree)
mythread_queue_list_entry_clean(tree->myhtml->thread, tree->queue_entry);
#endif /* MyHTML_BUILD_WITHOUT_THREADS */
- myhtml_token_attr_malloc(tree->token, tree->attr_current, tree->mcasync_attr_id);
+ tree->attr_current = myhtml_token_attr_create(tree->token, tree->token->mcasync_attr_id);
}
myhtml_tree_t * myhtml_tree_destroy(myhtml_tree_t* tree)
@@ -266,7 +265,6 @@ myhtml_tree_t * myhtml_tree_destroy(myhtml_tree_t* tree)
return NULL;
/* destroy tags before other objects */
- tree->indexes = myhtml_tree_index_destroy(tree, tree->tags);
tree->tags = myhtml_tag_destroy(tree->tags);
tree->active_formatting = myhtml_tree_active_formatting_destroy(tree);
tree->open_elements = myhtml_tree_open_elements_destroy(tree);
@@ -306,57 +304,6 @@ void myhtml_tree_parse_flags_set(myhtml_tree_t* tree, myhtml_tree_parse_flags_t
tree->parse_flags = flags;
}
-/* index */
-myhtml_tree_indexes_t * myhtml_tree_index_create(myhtml_tree_t* tree, myhtml_tag_t* tags)
-{
- myhtml_tree_indexes_t* indexes = (myhtml_tree_indexes_t*)myhtml_malloc(sizeof(myhtml_tree_indexes_t));
-
- indexes->tags = myhtml_tag_index_create();
- myhtml_tag_index_init(tags, indexes->tags);
-
- return indexes;
-}
-
-void myhtml_tree_index_clean(myhtml_tree_t* tree, myhtml_tag_t* tags)
-{
- if(tree->indexes == NULL)
- return;
-
- myhtml_tag_index_clean(tags, tree->indexes->tags);
-}
-
-myhtml_tree_indexes_t * myhtml_tree_index_destroy(myhtml_tree_t* tree, myhtml_tag_t* tags)
-{
- if(tree->indexes == NULL)
- return NULL;
-
- tree->indexes->tags = myhtml_tag_index_destroy(tags, tree->indexes->tags);
- myhtml_free(tree->indexes);
-
- return NULL;
-}
-
-void myhtml_tree_index_append(myhtml_tree_t* tree, myhtml_tree_node_t* node)
-{
- if(tree->indexes == NULL)
- return;
-
- myhtml_tag_index_add(tree->tags, tree->indexes->tags, node);
-}
-
-myhtml_tree_node_t * myhtml_tree_index_get(myhtml_tree_t* tree, myhtml_tag_id_t tag_id)
-{
- if(tree->indexes == NULL)
- return NULL;
-
- myhtml_tag_index_node_t *tag_index = myhtml_tag_index_first(tree->indexes->tags, tag_id);
-
- if(tag_index)
- return tag_index->node;
-
- return NULL;
-}
-
myhtml_t * myhtml_tree_get_myhtml(myhtml_tree_t* tree)
{
if(tree)
@@ -373,14 +320,6 @@ myhtml_tag_t * myhtml_tree_get_tag(myhtml_tree_t* tree)
return NULL;
}
-myhtml_tag_index_t * myhtml_tree_get_tag_index(myhtml_tree_t* tree)
-{
- if(tree && tree->indexes)
- return tree->indexes->tags;
-
- return NULL;
-}
-
myhtml_tree_node_t * myhtml_tree_get_document(myhtml_tree_t* tree)
{
return tree->document;
@@ -415,10 +354,13 @@ myhtml_tree_node_t * myhtml_tree_node_create(myhtml_tree_t* tree)
{
myhtml_tree_node_t* node = (myhtml_tree_node_t*)mcobject_async_malloc(tree->tree_obj, tree->mcasync_tree_id, NULL);
myhtml_tree_node_clean(node);
+
+ node->tree = tree;
+
return node;
}
-void myhtml_tree_node_add_child(myhtml_tree_t* tree, myhtml_tree_node_t* root, myhtml_tree_node_t* node)
+void myhtml_tree_node_add_child(myhtml_tree_node_t* root, myhtml_tree_node_t* node)
{
if(root->last_child) {
root->last_child->next = node;
@@ -431,10 +373,10 @@ void myhtml_tree_node_add_child(myhtml_tree_t* tree, myhtml_tree_node_t* root, m
node->parent = root;
root->last_child = node;
- myhtml_tree_node_callback_insert(tree, node);
+ myhtml_tree_node_callback_insert(node->tree, node);
}
-void myhtml_tree_node_insert_before(myhtml_tree_t* tree, myhtml_tree_node_t* root, myhtml_tree_node_t* node)
+void myhtml_tree_node_insert_before(myhtml_tree_node_t* root, myhtml_tree_node_t* node)
{
if(root->prev) {
root->prev->next = node;
@@ -448,10 +390,10 @@ void myhtml_tree_node_insert_before(myhtml_tree_t* tree, myhtml_tree_node_t* roo
node->next = root;
root->prev = node;
- myhtml_tree_node_callback_insert(tree, node);
+ myhtml_tree_node_callback_insert(node->tree, node);
}
-void myhtml_tree_node_insert_after(myhtml_tree_t* tree, myhtml_tree_node_t* root, myhtml_tree_node_t* node)
+void myhtml_tree_node_insert_after(myhtml_tree_node_t* root, myhtml_tree_node_t* node)
{
if(root->next) {
root->next->prev = node;
@@ -465,7 +407,7 @@ void myhtml_tree_node_insert_after(myhtml_tree_t* tree, myhtml_tree_node_t* root
node->prev = root;
root->next = node;
- myhtml_tree_node_callback_insert(tree, node);
+ myhtml_tree_node_callback_insert(node->tree, node);
}
myhtml_tree_node_t * myhtml_tree_node_find_parent_by_tag_id(myhtml_tree_node_t* node, myhtml_tag_id_t tag_id)
@@ -479,7 +421,7 @@ myhtml_tree_node_t * myhtml_tree_node_find_parent_by_tag_id(myhtml_tree_node_t*
return node;
}
-myhtml_tree_node_t * myhtml_tree_node_remove(myhtml_tree_t* tree, myhtml_tree_node_t* node)
+myhtml_tree_node_t * myhtml_tree_node_remove(myhtml_tree_node_t* node)
{
if(node->next)
node->next->prev = node->prev;
@@ -497,63 +439,65 @@ myhtml_tree_node_t * myhtml_tree_node_remove(myhtml_tree_t* tree, myhtml_tree_no
if(node->next)
node->next = NULL;
- myhtml_tree_node_callback_remove(tree, node);
+ myhtml_tree_node_callback_remove(node->tree, node);
return node;
}
-void myhtml_tree_node_free(myhtml_tree_t* tree, myhtml_tree_node_t* node)
+void myhtml_tree_node_free(myhtml_tree_node_t* node)
{
if(node == NULL)
return;
if(node->token) {
- myhtml_token_attr_delete_all(tree->token, node->token);
- myhtml_token_delete(tree->token, node->token);
+ myhtml_token_attr_delete_all(node->tree->token, node->token);
+ myhtml_token_delete(node->tree->token, node->token);
}
- mcobject_async_free(tree->tree_obj, node);
+ mcobject_async_free(node->tree->tree_obj, node);
}
-void myhtml_tree_node_delete(myhtml_tree_t* tree, myhtml_tree_node_t* node)
+void myhtml_tree_node_delete(myhtml_tree_node_t* node)
{
if(node == NULL)
return;
- myhtml_tree_node_remove(tree, node);
- myhtml_tree_node_free(tree, node);
+ myhtml_tree_node_remove(node);
+ myhtml_tree_node_free(node);
}
-void _myhtml_tree_node_delete_recursive(myhtml_tree_t* tree, myhtml_tree_node_t* node)
+static void _myhtml_tree_node_delete_recursive(myhtml_tree_node_t* node)
{
while(node)
{
if(node->child)
- _myhtml_tree_node_delete_recursive(tree, node->child);
+ _myhtml_tree_node_delete_recursive(node->child);
node = node->next;
- myhtml_tree_node_delete(tree, node);
+ myhtml_tree_node_delete(node);
}
}
-void myhtml_tree_node_delete_recursive(myhtml_tree_t* tree, myhtml_tree_node_t* node)
+void myhtml_tree_node_delete_recursive(myhtml_tree_node_t* node)
{
if(node == NULL)
return;
if(node->child)
- _myhtml_tree_node_delete_recursive(tree, node->child);
+ _myhtml_tree_node_delete_recursive(node->child);
- myhtml_tree_node_delete(tree, node);
+ myhtml_tree_node_delete(node);
}
-myhtml_tree_node_t * myhtml_tree_node_clone(myhtml_tree_t* tree, myhtml_tree_node_t* node)
+myhtml_tree_node_t * myhtml_tree_node_clone(myhtml_tree_node_t* node)
{
- myhtml_tree_node_t* new_node = myhtml_tree_node_create(tree);
+ myhtml_tree_node_t* new_node = myhtml_tree_node_create(node->tree);
myhtml_token_node_wait_for_done(node->token);
- new_node->token = myhtml_token_node_clone(tree->token, node->token, tree->mcasync_token_id, tree->mcasync_attr_id);
+ new_node->token = myhtml_token_node_clone(node->tree->token, node->token,
+ node->tree->mcasync_rules_token_id,
+ node->tree->mcasync_rules_attr_id);
new_node->tag_id = node->tag_id;
new_node->ns = node->ns;
new_node->token->type |= MyHTML_TOKEN_TYPE_DONE;
@@ -561,19 +505,18 @@ myhtml_tree_node_t * myhtml_tree_node_clone(myhtml_tree_t* tree, myhtml_tree_nod
return new_node;
}
-void myhtml_tree_node_insert_by_mode(myhtml_tree_t* tree, myhtml_tree_node_t* adjusted_location,
- myhtml_tree_node_t* node, enum myhtml_tree_insertion_mode mode)
+void myhtml_tree_node_insert_by_mode(myhtml_tree_node_t* adjusted_location,
+ myhtml_tree_node_t* node, enum myhtml_tree_insertion_mode mode)
{
if(mode == MyHTML_TREE_INSERTION_MODE_DEFAULT)
- myhtml_tree_node_add_child(tree, adjusted_location, node);
+ myhtml_tree_node_add_child(adjusted_location, node);
else if(mode == MyHTML_TREE_INSERTION_MODE_BEFORE)
- myhtml_tree_node_insert_before(tree, adjusted_location, node);
+ myhtml_tree_node_insert_before(adjusted_location, node);
else
- myhtml_tree_node_insert_after(tree, adjusted_location, node);
+ myhtml_tree_node_insert_after(adjusted_location, node);
}
-myhtml_tree_node_t * myhtml_tree_node_insert_by_token(myhtml_tree_t* tree, myhtml_token_node_t* token,
- enum myhtml_namespace ns)
+myhtml_tree_node_t * myhtml_tree_node_insert_by_token(myhtml_tree_t* tree, myhtml_token_node_t* token, myhtml_namespace_t ns)
{
myhtml_tree_node_t* node = myhtml_tree_node_create(tree);
@@ -583,16 +526,13 @@ myhtml_tree_node_t * myhtml_tree_node_insert_by_token(myhtml_tree_t* tree, myhtm
enum myhtml_tree_insertion_mode mode;
myhtml_tree_node_t* adjusted_location = myhtml_tree_appropriate_place_inserting(tree, NULL, &mode);
- myhtml_tree_node_insert_by_mode(tree, adjusted_location, node, mode);
+ myhtml_tree_node_insert_by_mode(adjusted_location, node, mode);
myhtml_tree_open_elements_append(tree, node);
- myhtml_tree_index_append(tree, node);
-
return node;
}
-myhtml_tree_node_t * myhtml_tree_node_insert(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx,
- enum myhtml_namespace ns)
+myhtml_tree_node_t * myhtml_tree_node_insert(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx, myhtml_namespace_t ns)
{
myhtml_tree_node_t* node = myhtml_tree_node_create(tree);
@@ -602,11 +542,9 @@ myhtml_tree_node_t * myhtml_tree_node_insert(myhtml_tree_t* tree, myhtml_tag_id_
enum myhtml_tree_insertion_mode mode;
myhtml_tree_node_t* adjusted_location = myhtml_tree_appropriate_place_inserting(tree, NULL, &mode);
- myhtml_tree_node_insert_by_mode(tree, adjusted_location, node, mode);
+ myhtml_tree_node_insert_by_mode(adjusted_location, node, mode);
myhtml_tree_open_elements_append(tree, node);
- myhtml_tree_index_append(tree, node);
-
return node;
}
@@ -622,11 +560,9 @@ myhtml_tree_node_t * myhtml_tree_node_insert_comment(myhtml_tree_t* tree, myhtml
parent = myhtml_tree_appropriate_place_inserting(tree, NULL, &mode);
}
- myhtml_tree_node_insert_by_mode(tree, parent, node, mode);
+ myhtml_tree_node_insert_by_mode(parent, node, mode);
node->ns = parent->ns;
- myhtml_tree_index_append(tree, node);
-
return node;
}
@@ -638,9 +574,7 @@ myhtml_tree_node_t * myhtml_tree_node_insert_doctype(myhtml_tree_t* tree, myhtml
node->ns = MyHTML_NAMESPACE_HTML;
node->tag_id = MyHTML_TAG__DOCTYPE;
- myhtml_tree_node_add_child(tree, tree->document, node);
- myhtml_tree_index_append(tree, node);
-
+ myhtml_tree_node_add_child(tree->document, node);
return node;
}
@@ -656,12 +590,10 @@ myhtml_tree_node_t * myhtml_tree_node_insert_root(myhtml_tree_t* tree, myhtml_to
node->token = token;
node->ns = ns;
- myhtml_tree_node_add_child(tree, tree->document, node);
+ myhtml_tree_node_add_child(tree->document, node);
myhtml_tree_open_elements_append(tree, node);
- myhtml_tree_index_append(tree, node);
tree->node_html = node;
-
return node;
}
@@ -701,9 +633,7 @@ myhtml_tree_node_t * myhtml_tree_node_insert_text(myhtml_tree_t* tree, myhtml_to
node->token = token;
node->ns = adjusted_location->ns;
- myhtml_tree_node_insert_by_mode(tree, adjusted_location, node, mode);
- myhtml_tree_index_append(tree, node);
-
+ myhtml_tree_node_insert_by_mode(adjusted_location, node, mode);
return node;
}
@@ -711,11 +641,9 @@ myhtml_tree_node_t * myhtml_tree_node_insert_by_node(myhtml_tree_t* tree, myhtml
{
enum myhtml_tree_insertion_mode mode;
myhtml_tree_node_t* adjusted_location = myhtml_tree_appropriate_place_inserting(tree, NULL, &mode);
- myhtml_tree_node_insert_by_mode(tree, adjusted_location, node, mode);
+ myhtml_tree_node_insert_by_mode(adjusted_location, node, mode);
myhtml_tree_open_elements_append(tree, node);
- myhtml_tree_index_append(tree, node);
-
return node;
}
@@ -730,10 +658,8 @@ myhtml_tree_node_t * myhtml_tree_node_insert_foreign_element(myhtml_tree_t* tree
node->token = token;
node->ns = adjusted_location->ns;
- myhtml_tree_node_insert_by_mode(tree, adjusted_location, node, mode);
+ myhtml_tree_node_insert_by_mode(adjusted_location, node, mode);
myhtml_tree_open_elements_append(tree, node);
- myhtml_tree_index_append(tree, node);
-
return node;
}
@@ -748,14 +674,13 @@ myhtml_tree_node_t * myhtml_tree_node_insert_html_element(myhtml_tree_t* tree, m
node->token = token;
node->ns = MyHTML_NAMESPACE_HTML;
- myhtml_tree_node_insert_by_mode(tree, adjusted_location, node, mode);
+ myhtml_tree_node_insert_by_mode(adjusted_location, node, mode);
myhtml_tree_open_elements_append(tree, node);
- myhtml_tree_index_append(tree, node);
-
return node;
}
-myhtml_tree_node_t * myhtml_tree_element_in_scope(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx, myhtml_namespace_t mynamespace, enum myhtml_tag_categories category)
+myhtml_tree_node_t * myhtml_tree_element_in_scope(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx,
+ myhtml_namespace_t mynamespace, enum myhtml_tag_categories category)
{
myhtml_tree_node_t** list = tree->open_elements->list;
@@ -781,8 +706,9 @@ myhtml_tree_node_t * myhtml_tree_element_in_scope(myhtml_tree_t* tree, myhtml_ta
return NULL;
}
-bool myhtml_tree_element_in_scope_by_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, enum myhtml_tag_categories category)
+bool myhtml_tree_element_in_scope_by_node(myhtml_tree_node_t* node, enum myhtml_tag_categories category)
{
+ myhtml_tree_t* tree = node->tree;
myhtml_tree_node_t** list = tree->open_elements->list;
const myhtml_tag_context_t *tag_ctx;
@@ -1650,7 +1576,7 @@ void myhtml_tree_active_formatting_reconstruction(myhtml_tree_t* tree)
}
#endif
- myhtml_tree_node_t* node = myhtml_tree_node_clone(tree, list[af_idx]);
+ myhtml_tree_node_t* node = myhtml_tree_node_clone(list[af_idx]);
myhtml_tree_node_insert_by_node(tree, node);
list[af_idx] = node;
@@ -1659,7 +1585,7 @@ void myhtml_tree_active_formatting_reconstruction(myhtml_tree_t* tree)
}
}
-bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t subject_tag_idx)
+bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_token_node_t* token, myhtml_tag_id_t subject_tag_idx)
{
if(tree->open_elements->length == 0)
return false;
@@ -1729,14 +1655,25 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t
}
// step 7
- if(myhtml_tree_element_in_scope_by_node(tree, formatting_element, MyHTML_TAG_CATEGORIES_SCOPE) == false)
+ if(myhtml_tree_element_in_scope_by_node(formatting_element, MyHTML_TAG_CATEGORIES_SCOPE) == false) {
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:AAA_FORMATTING_ELEMENT_NOT_FOUND LEVEL:ERROR NODE:formatting_element */
return false;
+ }
// step 8
//if(afe_last != list[i])
// fprintf(stderr, "oh");
// step 9
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(current_node->ns != formatting_element->ns ||
+ current_node->tag_id != formatting_element->tag_id) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:formatting_element->token HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:formatting_element->tag_id NEED_NS:formatting_element->ns */
+ }
+
+ // 10
// Let furthest block be the topmost node in the stack of open elements
// that is lower in the stack than formatting element, and is an element in the special category. T
// here might not be one.
@@ -1752,7 +1689,7 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t
}
}
- // step 10
+ // step 11
// If there is no furthest block, then the UA must first pop all the nodes from the bottom
// of the stack of open elements, from the current node up to and including formatting element,
// then remove formatting element from the list of active formatting elements, and finally abort these steps.
@@ -1768,13 +1705,15 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t
return false;
}
+ /* %EXTERNAL% VALIDATOR:RULES TOKEN STATUS:AAA_BEGIN LEVEL:INFO */
+
#ifdef DEBUG_MODE
if(oel_format_el_idx == 0) {
MyHTML_DEBUG_ERROR("Adoption agency algorithm; Step 11; oel_format_el_idx is 0; Bad!");
}
#endif
- // step 11
+ // step 12
myhtml_tree_node_t* common_ancestor = oel_list[oel_format_el_idx - 1];
#ifdef DEBUG_MODE
@@ -1783,20 +1722,20 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t
}
#endif
- // step 12
+ // step 13
size_t bookmark = afe_index + 1;
- // step 13
+ // step 14
myhtml_tree_node_t *node = furthest_block, *last = furthest_block;
size_t index_oel_node = idx_furthest_block;
- // step 13.1
+ // step 14.1
for(int inner_loop = 0;;)
{
- // step 13.2
+ // step 14.2
inner_loop++;
- // step 13.3
+ // step 14.3
size_t node_index;
if(myhtml_tree_open_elements_find(tree, node, &node_index) == false) {
node_index = index_oel_node;
@@ -1818,11 +1757,11 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t
MyHTML_DEBUG_ERROR("Adoption agency algorithm; Step 13.3; node is NULL");
}
#endif
- // step 13.4
+ // step 14.4
if(node == formatting_element)
break;
- // step 13.5
+ // step 14.5
size_t afe_node_index;
bool is_exists = myhtml_tree_active_formatting_find(tree, node, &afe_node_index);
if(inner_loop > 3 && is_exists) {
@@ -1836,14 +1775,14 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t
continue;
}
- // step 13.6
+ // step 14.6
if(is_exists == false) {
myhtml_tree_open_elements_remove(tree, node);
continue;
}
- // step 13.7
- myhtml_tree_node_t* clone = myhtml_tree_node_clone(tree, node);
+ // step 14.7
+ myhtml_tree_node_t* clone = myhtml_tree_node_clone(node);
clone->ns = MyHTML_NAMESPACE_HTML;
@@ -1852,7 +1791,7 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t
node = clone;
- // step 13.8
+ // step 14.8
if(last == furthest_block) {
bookmark = afe_node_index + 1;
@@ -1863,44 +1802,44 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t
#endif
}
- // step 13.9
+ // step 14.9
if(last->parent)
- myhtml_tree_node_remove(tree, last);
+ myhtml_tree_node_remove(last);
- myhtml_tree_node_add_child(tree, node, last);
+ myhtml_tree_node_add_child(node, last);
- // step 13.10
+ // step 14.10
last = node;
}
if(last->parent)
- myhtml_tree_node_remove(tree, last);
+ myhtml_tree_node_remove(last);
- // step 14
+ // step 15
enum myhtml_tree_insertion_mode insert_mode;
common_ancestor = myhtml_tree_appropriate_place_inserting(tree, common_ancestor, &insert_mode);
- myhtml_tree_node_insert_by_mode(tree, common_ancestor, last, insert_mode);
+ myhtml_tree_node_insert_by_mode(common_ancestor, last, insert_mode);
- // step 15
- myhtml_tree_node_t* new_formatting_element = myhtml_tree_node_clone(tree, formatting_element);
+ // step 16
+ myhtml_tree_node_t* new_formatting_element = myhtml_tree_node_clone(formatting_element);
new_formatting_element->ns = MyHTML_NAMESPACE_HTML;
- // step 16
+ // step 17
myhtml_tree_node_t * furthest_block_child = furthest_block->child;
while (furthest_block_child) {
myhtml_tree_node_t *next = furthest_block_child->next;
- myhtml_tree_node_remove(tree, furthest_block_child);
+ myhtml_tree_node_remove(furthest_block_child);
- myhtml_tree_node_add_child(tree, new_formatting_element, furthest_block_child);
+ myhtml_tree_node_add_child(new_formatting_element, furthest_block_child);
furthest_block_child = next;
}
- // step 17
- myhtml_tree_node_add_child(tree, furthest_block, new_formatting_element);
-
// step 18
+ myhtml_tree_node_add_child(furthest_block, new_formatting_element);
+
+ // step 19
if(myhtml_tree_active_formatting_find(tree, formatting_element, &afe_index) == false)
return false;
@@ -1916,7 +1855,7 @@ bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t
myhtml_tree_active_formatting_remove_by_index(tree, afe_index);
myhtml_tree_list_insert_by_index(tree->active_formatting, new_formatting_element, bookmark);
- // step 19
+ // step 20
myhtml_tree_open_elements_remove(tree, formatting_element);
if(myhtml_tree_open_elements_find(tree, furthest_block, &idx_furthest_block)) {
@@ -2025,15 +1964,14 @@ myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting(myhtml_tree_t* tree
return adjusted_location;
}
-myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting_in_tree(myhtml_tree_t* tree, myhtml_tree_node_t* target,
- enum myhtml_tree_insertion_mode* mode)
+myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting_in_tree(myhtml_tree_node_t* target, enum myhtml_tree_insertion_mode* mode)
{
*mode = MyHTML_TREE_INSERTION_MODE_BEFORE;
// step 2
myhtml_tree_node_t* adjusted_location;
- if(tree->foster_parenting) {
+ if(target->tree->foster_parenting) {
#ifdef DEBUG_MODE
if(target == NULL) {
MyHTML_DEBUG_ERROR("Appropriate place inserting; Step 2; target is NULL in return value! This IS very bad");
@@ -2383,9 +2321,17 @@ myhtml_token_node_t * myhtml_tree_token_list_current_node(myhtml_tree_token_list
}
// other
-void myhtml_tree_tags_close_p(myhtml_tree_t* tree)
+void myhtml_tree_tags_close_p(myhtml_tree_t* tree, myhtml_token_node_t* token)
{
myhtml_tree_generate_implied_end_tags(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML);
+
+ myhtml_tree_node_t* current_node = myhtml_tree_current_node(tree);
+ if(myhtml_is_html_node(current_node, MyHTML_TAG_P) == false) {
+ // parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_NO_EXPECTED LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:current_node->token NEED:NULL HAVE_TAG_ID:current_node->tag_id HAVE_NS:current_node->ns NEED_TAG_ID:MyHTML_TAG_P NEED_NS:MyHTML_NAMESPACE_HTML */
+ }
+
myhtml_tree_open_elements_pop_until(tree, MyHTML_TAG_P, MyHTML_NAMESPACE_HTML, false);
}
@@ -2443,7 +2389,7 @@ void myhtml_tree_clear_stack_back_table_row_context(myhtml_tree_t* tree)
}
}
-void myhtml_tree_close_cell(myhtml_tree_t* tree, myhtml_tree_node_t* tr_or_th_node)
+void myhtml_tree_close_cell(myhtml_tree_t* tree, myhtml_tree_node_t* tr_or_th_node, myhtml_token_node_t* token)
{
// step 1
myhtml_tree_generate_implied_end_tags(tree, 0, MyHTML_NAMESPACE_UNDEF);
@@ -2455,6 +2401,9 @@ void myhtml_tree_close_cell(myhtml_tree_t* tree, myhtml_tree_node_t* tr_or_th_no
current_node->ns == MyHTML_NAMESPACE_HTML))
{
// parse error
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED STATUS:ELEMENT_OPEN_NOT_FOUND LEVEL:ERROR */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TD NEED_NS:MyHTML_NAMESPACE_HTML */
+ /* %EXTERNAL% VALIDATOR:RULES HAVE_NEED_ADD HAVE:NULL NEED:NULL HAVE_TAG_ID:MyHTML_TAG__UNDEF HAVE_NS:MyHTML_NAMESPACE_UNDEF NEED_TAG_ID:MyHTML_TAG_TH NEED_NS:MyHTML_NAMESPACE_HTML */
}
// step 3
@@ -2568,7 +2517,7 @@ myhtml_status_t myhtml_tree_temp_tag_name_append_one(myhtml_tree_temp_tag_name_t
myhtml_status_t myhtml_tree_temp_tag_name_append(myhtml_tree_temp_tag_name_t* temp_tag_name, const char* name, size_t name_len)
{
- if(name_len == 0)
+ if(temp_tag_name->data == NULL || name_len == 0)
return MyHTML_STATUS_OK;
if((temp_tag_name->length + name_len) >= temp_tag_name->size) {
diff --git a/source/myhtml/tree.h b/source/myhtml/tree.h
index eedf914..1e126f6 100644
--- a/source/myhtml/tree.h
+++ b/source/myhtml/tree.h
@@ -89,6 +89,8 @@ struct myhtml_tree_node {
myhtml_token_node_t* token;
void* data;
+
+ myhtml_tree_t* tree;
};
enum myhtml_tree_compat_mode {
@@ -120,10 +122,6 @@ struct myhtml_tree_doctype {
char* attr_system;
};
-struct myhtml_tree_indexes {
- myhtml_tag_index_t* tags;
-};
-
struct myhtml_tree_list {
myhtml_tree_node_t** list;
volatile size_t length;
@@ -181,10 +179,11 @@ struct myhtml_tree {
mythread_queue_t* queue;
myhtml_tag_t* tags;
void* modest;
+ void* context;
// init id's
- size_t mcasync_token_id;
- size_t mcasync_attr_id;
+ size_t mcasync_rules_token_id;
+ size_t mcasync_rules_attr_id;
size_t mcasync_tree_id;
size_t mchar_node_id;
myhtml_token_attr_t* attr_current;
@@ -196,8 +195,6 @@ struct myhtml_tree {
myhtml_incoming_buffer_t* incoming_buf;
myhtml_incoming_buffer_t* incoming_buf_first;
- myhtml_tree_indexes_t* indexes;
-
// ref for nodes
myhtml_tree_node_t* document;
myhtml_tree_node_t* fragment;
@@ -230,6 +227,7 @@ struct myhtml_tree {
volatile myhtml_tree_parse_flags_t parse_flags;
bool foster_parenting;
size_t global_offset;
+ myhtml_status_t tokenizer_status;
myhtml_encoding_t encoding;
myhtml_encoding_t encoding_usereq;
@@ -262,7 +260,6 @@ void myhtml_tree_parse_flags_set(myhtml_tree_t* tree, myhtml_tree_parse_flags_t
myhtml_t * myhtml_tree_get_myhtml(myhtml_tree_t* tree);
myhtml_tag_t * myhtml_tree_get_tag(myhtml_tree_t* tree);
-myhtml_tag_index_t * myhtml_tree_get_tag_index(myhtml_tree_t* tree);
myhtml_tree_node_t * myhtml_tree_get_document(myhtml_tree_t* tree);
myhtml_tree_node_t * myhtml_tree_get_node_html(myhtml_tree_t* tree);
myhtml_tree_node_t * myhtml_tree_get_node_head(myhtml_tree_t* tree);
@@ -331,11 +328,11 @@ bool myhtml_tree_open_elements_find_reverse(myhtml_tree_t* tree, myhtml_tree_nod
myhtml_tree_node_t * myhtml_tree_open_elements_find_by_tag_idx(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx, myhtml_namespace_t mynamespace, size_t* return_index);
myhtml_tree_node_t * myhtml_tree_open_elements_find_by_tag_idx_reverse(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx, myhtml_namespace_t mynamespace, size_t* return_index);
myhtml_tree_node_t * myhtml_tree_element_in_scope(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx, myhtml_namespace_t mynamespace, enum myhtml_tag_categories category);
-bool myhtml_tree_element_in_scope_by_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, enum myhtml_tag_categories category);
+bool myhtml_tree_element_in_scope_by_node(myhtml_tree_node_t* node, enum myhtml_tag_categories category);
void myhtml_tree_generate_implied_end_tags(myhtml_tree_t* tree, myhtml_tag_id_t exclude_tag_idx, myhtml_namespace_t mynamespace);
void myhtml_tree_generate_all_implied_end_tags(myhtml_tree_t* tree, myhtml_tag_id_t exclude_tag_idx, myhtml_namespace_t mynamespace);
myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting(myhtml_tree_t* tree, myhtml_tree_node_t* override_target, enum myhtml_tree_insertion_mode* mode);
-myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting_in_tree(myhtml_tree_t* tree, myhtml_tree_node_t* target, enum myhtml_tree_insertion_mode* mode);
+myhtml_tree_node_t * myhtml_tree_appropriate_place_inserting_in_tree(myhtml_tree_node_t* target, enum myhtml_tree_insertion_mode* mode);
// template insertion
myhtml_tree_insertion_list_t * myhtml_tree_template_insertion_init(myhtml_tree_t* tree);
@@ -347,55 +344,47 @@ void myhtml_tree_template_insertion_pop(myhtml_tree_t* tree);
void myhtml_tree_reset_insertion_mode_appropriately(myhtml_tree_t* tree);
-bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_tag_id_t subject_tag_idx);
+bool myhtml_tree_adoption_agency_algorithm(myhtml_tree_t* tree, myhtml_token_node_t* token, myhtml_tag_id_t subject_tag_idx);
size_t myhtml_tree_template_insertion_length(myhtml_tree_t* tree);
// other for a tree
myhtml_tree_node_t * myhtml_tree_node_create(myhtml_tree_t* tree);
-void myhtml_tree_node_delete(myhtml_tree_t* tree, myhtml_tree_node_t* node);
-void myhtml_tree_node_delete_recursive(myhtml_tree_t* tree, myhtml_tree_node_t* node);
+void myhtml_tree_node_delete(myhtml_tree_node_t* node);
+void myhtml_tree_node_delete_recursive(myhtml_tree_node_t* node);
void myhtml_tree_node_clean(myhtml_tree_node_t* tree_node);
-void myhtml_tree_node_free(myhtml_tree_t* tree, myhtml_tree_node_t* node);
-myhtml_tree_node_t * myhtml_tree_node_clone(myhtml_tree_t* tree, myhtml_tree_node_t* node);
+void myhtml_tree_node_free(myhtml_tree_node_t* node);
+myhtml_tree_node_t * myhtml_tree_node_clone(myhtml_tree_node_t* node);
void myhtml_tree_print_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out);
void myhtml_tree_print_node_children(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out, size_t inc);
void myhtml_tree_print_by_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, FILE* out, size_t inc);
-void myhtml_tree_node_add_child(myhtml_tree_t* myhtml_tree, myhtml_tree_node_t* root, myhtml_tree_node_t* node);
-void myhtml_tree_node_insert_before(myhtml_tree_t* myhtml_tree, myhtml_tree_node_t* root, myhtml_tree_node_t* node);
-void myhtml_tree_node_insert_after(myhtml_tree_t* myhtml_tree, myhtml_tree_node_t* root, myhtml_tree_node_t* node);
-void myhtml_tree_node_insert_by_mode(myhtml_tree_t* tree, myhtml_tree_node_t* adjusted_location, myhtml_tree_node_t* node, enum myhtml_tree_insertion_mode mode);
-myhtml_tree_node_t * myhtml_tree_node_remove(myhtml_tree_t* tree, myhtml_tree_node_t* node);
+void myhtml_tree_node_add_child(myhtml_tree_node_t* root, myhtml_tree_node_t* node);
+void myhtml_tree_node_insert_before(myhtml_tree_node_t* root, myhtml_tree_node_t* node);
+void myhtml_tree_node_insert_after(myhtml_tree_node_t* root, myhtml_tree_node_t* node);
+void myhtml_tree_node_insert_by_mode(myhtml_tree_node_t* adjusted_location, myhtml_tree_node_t* node, enum myhtml_tree_insertion_mode mode);
+myhtml_tree_node_t * myhtml_tree_node_remove(myhtml_tree_node_t* node);
myhtml_tree_node_t * myhtml_tree_node_insert_html_element(myhtml_tree_t* tree, myhtml_token_node_t* token);
myhtml_tree_node_t * myhtml_tree_node_insert_foreign_element(myhtml_tree_t* tree, myhtml_token_node_t* token);
-myhtml_tree_node_t * myhtml_tree_node_insert_by_token(myhtml_tree_t* tree, myhtml_token_node_t* token, enum myhtml_namespace ns);
-myhtml_tree_node_t * myhtml_tree_node_insert(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx, enum myhtml_namespace ns);
+myhtml_tree_node_t * myhtml_tree_node_insert_by_token(myhtml_tree_t* tree, myhtml_token_node_t* token, myhtml_namespace_t ns);
+myhtml_tree_node_t * myhtml_tree_node_insert(myhtml_tree_t* tree, myhtml_tag_id_t tag_idx, myhtml_namespace_t ns);
myhtml_tree_node_t * myhtml_tree_node_insert_by_node(myhtml_tree_t* tree, myhtml_tree_node_t* idx);
myhtml_tree_node_t * myhtml_tree_node_insert_comment(myhtml_tree_t* tree, myhtml_token_node_t* token, myhtml_tree_node_t* parent);
myhtml_tree_node_t * myhtml_tree_node_insert_doctype(myhtml_tree_t* tree, myhtml_token_node_t* token);
-myhtml_tree_node_t * myhtml_tree_node_insert_root(myhtml_tree_t* tree, myhtml_token_node_t* token, enum myhtml_namespace ns);
+myhtml_tree_node_t * myhtml_tree_node_insert_root(myhtml_tree_t* tree, myhtml_token_node_t* token, myhtml_namespace_t ns);
myhtml_tree_node_t * myhtml_tree_node_insert_text(myhtml_tree_t* tree, myhtml_token_node_t* token);
myhtml_tree_node_t * myhtml_tree_node_find_parent_by_tag_id(myhtml_tree_node_t* node, myhtml_tag_id_t tag_id);
-// indexes
-myhtml_tree_indexes_t * myhtml_tree_index_create(myhtml_tree_t* tree, myhtml_tag_t* tags);
-void myhtml_tree_index_clean(myhtml_tree_t* tree, myhtml_tag_t* tags);
-myhtml_tree_indexes_t * myhtml_tree_index_destroy(myhtml_tree_t* tree, myhtml_tag_t* tags);
-
-void myhtml_tree_index_append(myhtml_tree_t* tree, myhtml_tree_node_t* node);
-myhtml_tree_node_t * myhtml_tree_index_get(myhtml_tree_t* tree, myhtml_tag_id_t tag_id);
-
// other
void myhtml_tree_wait_for_last_done_token(myhtml_tree_t* tree, myhtml_token_node_t* token_for_wait);
-void myhtml_tree_tags_close_p(myhtml_tree_t* tree);
+void myhtml_tree_tags_close_p(myhtml_tree_t* tree, myhtml_token_node_t* token);
myhtml_tree_node_t * myhtml_tree_generic_raw_text_element_parsing_algorithm(myhtml_tree_t* tree, myhtml_token_node_t* token_node);
void myhtml_tree_clear_stack_back_table_context(myhtml_tree_t* tree);
void myhtml_tree_clear_stack_back_table_body_context(myhtml_tree_t* tree);
void myhtml_tree_clear_stack_back_table_row_context(myhtml_tree_t* tree);
-void myhtml_tree_close_cell(myhtml_tree_t* tree, myhtml_tree_node_t* tr_or_th_node);
+void myhtml_tree_close_cell(myhtml_tree_t* tree, myhtml_tree_node_t* tr_or_th_node, myhtml_token_node_t* token);
bool myhtml_tree_is_mathml_integration_point(myhtml_tree_t* tree, myhtml_tree_node_t* node);
bool myhtml_tree_is_html_integration_point(myhtml_tree_t* tree, myhtml_tree_node_t* node);
diff --git a/source/myhtml/utils.c b/source/myhtml/utils.c
index eb5c741..52a99da 100644
--- a/source/myhtml/utils.c
+++ b/source/myhtml/utils.c
@@ -185,4 +185,17 @@ size_t myhtml_strcmp_ws(const char* str1, const char* str2)
}
}
+bool myhtml_ustrcasecmp_without_checks_by_secondary(const unsigned char* ustr1, const unsigned char* ustr2)
+{
+ while (*ustr1 != '\0') {
+ if(myhtml_utils_chars_map[*ustr1] != myhtml_utils_chars_map[*ustr2])
+ return false;
+
+ ustr1++;
+ ustr2++;
+ }
+
+ return true;
+}
+
diff --git a/source/myhtml/utils.h b/source/myhtml/utils.h
index 2db22c2..bca879c 100644
--- a/source/myhtml/utils.h
+++ b/source/myhtml/utils.h
@@ -40,6 +40,7 @@ size_t myhtml_strcasecmp(const char* str1, const char* str2);
size_t myhtml_strncmp(const char* str1, const char* str2, size_t size);
size_t myhtml_strcmp(const char* str1, const char* str2);
size_t myhtml_strcmp_ws(const char* str1, const char* str2);
+bool myhtml_ustrcasecmp_without_checks_by_secondary(const unsigned char* ustr1, const unsigned char* ustr2);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/source/myhtml/utils/mcobject.c b/source/myhtml/utils/mcobject.c
index 9c2852e..3353909 100644
--- a/source/myhtml/utils/mcobject.c
+++ b/source/myhtml/utils/mcobject.c
@@ -150,10 +150,15 @@ void * mcobject_malloc(mcobject_t *mcobject, myhtml_status_t* status)
if(chunk == NULL || chunk->length >= chunk->size)
{
- mcobject_chunk_malloc(mcobject, status);
+ myhtml_status_t ns_status;
+ mcobject_chunk_malloc(mcobject, &ns_status);
- if(status && *status)
+ if(ns_status) {
+ if(status)
+ *status = ns_status;
+
return NULL;
+ }
chunk = mcobject->chunk;
}
diff --git a/source/myhtml/utils/mctree.c b/source/myhtml/utils/mctree.c
index e13d424..526560c 100644
--- a/source/myhtml/utils/mctree.c
+++ b/source/myhtml/utils/mctree.c
@@ -26,7 +26,7 @@ mctree_t * mctree_create(size_t start_size)
{
mctree_t* mctree = (mctree_t*)myhtml_malloc(sizeof(mctree_t));
- mctree->nodes_size = start_size + 4096;
+ mctree->nodes_size = start_size + 512;
mctree->nodes_length = start_size + 1;
mctree->nodes = (mctree_node_t*)myhtml_calloc(mctree->nodes_size, sizeof(mctree_node_t));
diff --git a/source/myhtml/utils/mhash.c b/source/myhtml/utils/mhash.c
new file mode 100644
index 0000000..1929f26
--- /dev/null
+++ b/source/myhtml/utils/mhash.c
@@ -0,0 +1,267 @@
+/*
+ Copyright (C) 2017 Alexander Borisov
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin avl_treet, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Author: lex.borisov@gmail.com (Alexander Borisov)
+*/
+
+#include "myhtml/utils/mhash.h"
+
+size_t myhtml_utils_mhash_hash(const char* key, size_t key_size, size_t table_size)
+{
+ size_t hash, i;
+
+ for(hash = i = 0; i < key_size; i++)
+ {
+ hash += key[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash % table_size;
+}
+
+myhtml_utils_mhash_t * myhtml_utils_mhash_create(void)
+{
+ return myhtml_calloc(1, sizeof(myhtml_utils_mhash_t));
+};
+
+myhtml_status_t myhtml_utils_mhash_init(myhtml_utils_mhash_t* mhash, size_t table_size, size_t max_depth)
+{
+ mhash->mchar_obj = mchar_async_create(128, 4096);
+ if(mhash->mchar_obj == NULL)
+ return MyHTML_STATUS_ATTR_ERROR_ALLOCATION;
+
+ mhash->mchar_node = mchar_async_node_add(mhash->mchar_obj);
+
+ if(table_size < 128)
+ table_size = 128;
+
+ mhash->table = myhtml_calloc(table_size, sizeof(myhtml_utils_mhash_entry_t*));
+ if(mhash->table == NULL)
+ return MyHTML_STATUS_ATTR_ERROR_ALLOCATION;
+
+ if(max_depth < 1)
+ max_depth = 1;
+
+ mhash->table_max_depth = max_depth;
+ mhash->table_size = table_size;
+
+ return MyHTML_STATUS_OK;
+};
+
+void myhtml_utils_mhash_clean(myhtml_utils_mhash_t* mhash)
+{
+ mchar_async_clean(mhash->mchar_obj);
+ memset(mhash->table, 0, (sizeof(myhtml_utils_mhash_entry_t*) * mhash->table_size));
+}
+
+myhtml_utils_mhash_t * myhtml_utils_mhash_destroy(myhtml_utils_mhash_t* mhash, bool self_destroy)
+{
+ if(mhash == NULL)
+ return NULL;
+
+ if(mhash->table) {
+ myhtml_free(mhash->table);
+ mhash->table = NULL;
+ }
+
+ if(self_destroy) {
+ myhtml_free(mhash->table);
+ return NULL;
+ }
+
+ return mhash;
+}
+
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_create_entry(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value)
+{
+ myhtml_utils_mhash_entry_t *entry = (myhtml_utils_mhash_entry_t*)
+ mchar_async_malloc(mhash->mchar_obj, mhash->mchar_node, sizeof(myhtml_utils_mhash_entry_t));
+
+ entry->key = mchar_async_malloc(mhash->mchar_obj, mhash->mchar_node, (sizeof(char) * key_size) + 1);
+
+ if(entry->key == NULL) {
+ mchar_async_free(mhash->mchar_obj, mhash->mchar_node, (char*)entry);
+ return NULL;
+ }
+
+ memcpy(entry->key, key, (sizeof(char) * key_size));
+ entry->key[key_size] = '\0';
+
+ entry->key_length = key_size;
+ entry->value = value;
+ entry->next = NULL;
+
+ return entry;
+}
+
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_add_with_choice(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size)
+{
+ if(key == NULL || key_size == 0)
+ return NULL;
+
+ size_t hash_id = myhtml_utils_mhash_hash(key, key_size, mhash->table_size);
+
+
+ myhtml_utils_mhash_entry_t *entry;
+
+ if(mhash->table[hash_id] == NULL) {
+ /* rebuild table if need */
+ if(mhash->table_length >= (mhash->table_size - (mhash->table_size / 4))) {
+ myhtml_utils_mhash_rebuld(mhash);
+ }
+
+ mhash->table[hash_id] = myhtml_utils_mhash_create_entry(mhash, key, key_size, NULL);
+ return mhash->table[hash_id];
+ }
+
+ size_t depth = 0;
+ entry = mhash->table[hash_id];
+
+ do {
+ if(entry->key_length == key_size) {
+ if(strncmp(entry->key, key, key_size) == 0)
+ return entry;
+ }
+
+ if(entry->next == NULL) {
+ entry->next = myhtml_utils_mhash_create_entry(mhash, key, key_size, NULL);
+
+ if(depth > mhash->table_max_depth) {
+ myhtml_utils_mhash_entry_t *entry_new = entry->next;
+ myhtml_utils_mhash_rebuld(mhash);
+
+ return entry_new;
+ }
+
+ return entry->next;
+ }
+
+ depth++;
+ entry = entry->next;
+ }
+ while(1);
+}
+
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_add(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value)
+{
+ myhtml_utils_mhash_entry_t *entry = myhtml_utils_mhash_add_with_choice(mhash, key, key_size);
+
+ if(entry)
+ entry->value = value;
+
+ return entry;
+}
+
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_search(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value)
+{
+ if(key == NULL || key_size == 0)
+ return NULL;
+
+ size_t hash_id = myhtml_utils_mhash_hash(key, key_size, mhash->table_size);
+
+ myhtml_utils_mhash_entry_t *entry = mhash->table[hash_id];
+
+ while(entry) {
+ if(entry->key_length == key_size) {
+ if(strncmp(entry->key, key, key_size) == 0)
+ return entry;
+ }
+
+ entry = entry->next;
+ }
+
+ return NULL;
+}
+
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_entry_by_id(myhtml_utils_mhash_t* mhash, size_t id)
+{
+ if(mhash->table_size > id)
+ return mhash->table[id];
+
+ return NULL;
+}
+
+size_t myhtml_utils_mhash_get_table_size(myhtml_utils_mhash_t* mhash)
+{
+ return mhash->table_size;
+}
+
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_rebuild_add_entry(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, myhtml_utils_mhash_entry_t *ext_entry)
+{
+ if(key == NULL || key_size == 0)
+ return NULL;
+
+ ext_entry->next = NULL;
+
+ size_t hash_id = myhtml_utils_mhash_hash(key, key_size, mhash->table_size);
+
+ if(mhash->table[hash_id] == NULL) {
+ mhash->table[hash_id] = ext_entry;
+ return ext_entry;
+ }
+
+ myhtml_utils_mhash_entry_t *entry = mhash->table[hash_id];
+
+ do {
+ if(entry->next == NULL) {
+ entry->next = ext_entry;
+ break;
+ }
+
+ entry = entry->next;
+ }
+ while(1);
+
+ return ext_entry;
+}
+
+myhtml_utils_mhash_entry_t ** myhtml_utils_mhash_rebuld(myhtml_utils_mhash_t* mhash)
+{
+ myhtml_utils_mhash_entry_t **table = mhash->table;
+ size_t size = mhash->table_size;
+
+ mhash->table_size = mhash->table_size << 1;
+ mhash->table = myhtml_calloc(mhash->table_size, sizeof(myhtml_utils_mhash_entry_t*));
+
+ if(mhash->table == NULL) {
+ mhash->table = table;
+ mhash->table_size = size;
+
+ return NULL;
+ }
+
+ for(size_t i = 0; i < mhash->table_size; i++) {
+ myhtml_utils_mhash_entry_t *entry = table[i];
+
+ while(entry) {
+ myhtml_utils_mhash_rebuild_add_entry(mhash, entry->key, entry->key_length, entry);
+
+ entry = entry->next;
+ }
+ }
+
+ myhtml_free(table);
+
+ return mhash->table;
+}
+
+
diff --git a/source/myhtml/utils/mhash.h b/source/myhtml/utils/mhash.h
new file mode 100644
index 0000000..c3c8aec
--- /dev/null
+++ b/source/myhtml/utils/mhash.h
@@ -0,0 +1,65 @@
+/*
+ Copyright (C) 2017 Alexander Borisov
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin avl_treet, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Author: lex.borisov@gmail.com (Alexander Borisov)
+*/
+
+#ifndef html_mhash_h
+#define html_mhash_h
+
+#include "myhtml/myosi.h"
+#include "myhtml/utils/mchar_async.h"
+
+typedef struct myhtml_utils_mhash_entry myhtml_utils_mhash_entry_t;
+
+struct myhtml_utils_mhash_entry {
+ char* key;
+ size_t key_length;
+
+ void *value;
+
+ myhtml_utils_mhash_entry_t* next;
+};
+
+struct myhtml_utils_mhash {
+ mchar_async_t* mchar_obj;
+ size_t mchar_node;
+
+ myhtml_utils_mhash_entry_t** table;
+ size_t table_size;
+ size_t table_length;
+
+ size_t table_max_depth;
+}
+typedef myhtml_utils_mhash_t;
+
+myhtml_utils_mhash_t * myhtml_utils_mhash_create(void);
+myhtml_status_t myhtml_utils_mhash_init(myhtml_utils_mhash_t* mhash, size_t table_size, size_t depth);
+void myhtml_utils_mhash_clean(myhtml_utils_mhash_t* mhash);
+myhtml_utils_mhash_t * myhtml_utils_mhash_destroy(myhtml_utils_mhash_t* mhash, bool self_destroy);
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_create_entry(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value);
+
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_add(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value);
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_search(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size, void* value);
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_add_with_choice(myhtml_utils_mhash_t* mhash, const char* key, size_t key_size);
+
+myhtml_utils_mhash_entry_t * myhtml_utils_mhash_entry_by_id(myhtml_utils_mhash_t* mhash, size_t id);
+size_t myhtml_utils_mhash_get_table_size(myhtml_utils_mhash_t* mhash);
+
+myhtml_utils_mhash_entry_t ** myhtml_utils_mhash_rebuld(myhtml_utils_mhash_t* mhash);
+
+#endif
diff --git a/source/myhtml/utils/resources.h b/source/myhtml/utils/resources.h
index c9acbac..beee061 100644
--- a/source/myhtml/utils/resources.h
+++ b/source/myhtml/utils/resources.h
@@ -24,6 +24,10 @@
#include <stddef.h>
+#define MyHTML_TOKENIZER_CHAR_OTHER '\000'
+#define MyHTML_TOKENIZER_CHAR_A_Z_a_z '\001'
+#define MyHTML_TOKENIZER_CHAR_WHITESPACE '\002'
+
static const unsigned char myhtml_string_chars_num_map[] = {
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,