diff options
Diffstat (limited to 'source/myhtml/parser.c')
-rw-r--r-- | source/myhtml/parser.c | 66 |
1 files changed, 36 insertions, 30 deletions
diff --git a/source/myhtml/parser.c b/source/myhtml/parser.c index 95cdc1b..95a7764 100644 --- a/source/myhtml/parser.c +++ b/source/myhtml/parser.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2015-2016 Alexander Borisov + Copyright (C) 2015-2017 Alexander Borisov This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -24,19 +24,19 @@ void myhtml_parser_stream(mythread_id_t thread_id, void* ctx) { mythread_queue_node_t *qnode = (mythread_queue_node_t*)ctx; - if((qnode->tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE) == 0) { - while(myhtml_rules_tree_dispatcher(qnode->tree, qnode->token)){} + if((((myhtml_tree_t*)(qnode->context))->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE) == 0) { + while(myhtml_rules_tree_dispatcher(qnode->context, qnode->args)){} } } -size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) +size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) { - myhtml_incoming_buffer_t *buffer = myhtml_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); + mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); size_t relative_begin = begin - buffer->offset; // if token data length in one buffer then print them all at once if((relative_begin + length) <= buffer->size) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char); else myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -57,7 +57,7 @@ size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_ size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position); if(relative_end > 0) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) save_position = myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char); else save_position = myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -72,7 +72,7 @@ size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_ size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position); if(length > 0) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) myhtml_string_append_lowercase_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char); else myhtml_string_append_lowercase_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -87,14 +87,14 @@ size_t myhtml_parser_token_data_to_string_lowercase(myhtml_tree_t *tree, myhtml_ return str->length; } -size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) +size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) { - myhtml_incoming_buffer_t *buffer = myhtml_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); + mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); size_t relative_begin = begin - buffer->offset; // if token data length in one buffer then print them all at once if((relative_begin + length) <= buffer->size) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) myhtml_string_append_with_preprocessing(str, &buffer->data[relative_begin], length, proc_entry->emit_null_char); else myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -115,7 +115,7 @@ size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, myhtml_string_t* size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], relative_end, save_position); if(relative_end > 0) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) save_position = myhtml_string_append_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (relative_end - tmp_offset), proc_entry->emit_null_char); else save_position = myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, @@ -131,12 +131,12 @@ size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, myhtml_string_t* size_t tmp_offset = myhtml_string_before_append_any_preprocessing(str, &buffer->data[relative_begin], length, save_position); if(length > 0) { - if(tree->encoding == MyHTML_ENCODING_UTF_8) + if(tree->encoding == MyENCODING_UTF_8) myhtml_string_append_with_preprocessing(str, &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), proc_entry->emit_null_char); else myhtml_string_append_chunk_with_convert_encoding_with_preprocessing(str, &proc_entry->res, - &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), - proc_entry->encoding, proc_entry->emit_null_char); + &buffer->data[(relative_begin + tmp_offset)], (length - tmp_offset), + proc_entry->encoding, proc_entry->emit_null_char); } break; @@ -146,9 +146,9 @@ size_t myhtml_parser_token_data_to_string(myhtml_tree_t *tree, myhtml_string_t* return str->length; } -size_t myhtml_parser_token_data_to_string_charef(myhtml_tree_t *tree, myhtml_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) +size_t myhtml_parser_token_data_to_string_charef(myhtml_tree_t *tree, mycore_string_t* str, myhtml_data_process_entry_t* proc_entry, size_t begin, size_t length) { - myhtml_incoming_buffer_t *buffer = myhtml_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); + mycore_incoming_buffer_t *buffer = mycore_incoming_buffer_find_by_position(tree->incoming_buf_first, begin); size_t relative_begin = begin - buffer->offset; // if token data length in one buffer then print them all at once @@ -186,8 +186,8 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) { mythread_queue_node_t *qnode = (mythread_queue_node_t*)ctx; - myhtml_tree_t* tree = qnode->tree; - myhtml_token_node_t* token = qnode->token; + myhtml_tree_t* tree = qnode->context; + myhtml_token_node_t* token = qnode->args; /* * Tree can not be built without tokens @@ -201,8 +201,8 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) * return true * we need check both, 1 and 2 */ - if((qnode->tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN) && - (qnode->tree->parse_flags & 2)) + if((tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN) && + (tree->parse_flags & 2)) { if(tree->callback_before_token) tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx); @@ -215,15 +215,21 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) return; } + size_t mchar_node_id; +#ifndef MyCORE_BUILD_WITHOUT_THREADS + if(tree->myhtml->thread_batch) + mchar_node_id = tree->async_args[(thread_id + tree->myhtml->thread_batch->id_increase)].mchar_node_id; + else +#endif + mchar_node_id = tree->async_args[thread_id].mchar_node_id; + if(tree->callback_before_token) tree->callback_before_token_ctx = tree->callback_before_token(tree, token, tree->callback_before_token_ctx); - size_t mchar_node_id = qnode->tree->async_args[thread_id].mchar_node_id; - if(token->tag_id == MyHTML_TAG__TEXT || token->tag_id == MyHTML_TAG__COMMENT) { - myhtml_string_init(tree->mchar, mchar_node_id, &token->str, (token->raw_length + 1)); + mycore_string_init(tree->mchar, mchar_node_id, &token->str, (token->raw_length + 1)); token->attr_first = NULL; token->attr_last = NULL; @@ -246,7 +252,7 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) } else if(token->attr_first) { - myhtml_string_clean_all(&token->str); + mycore_string_clean_all(&token->str); myhtml_token_attr_t* attr = token->attr_first; myhtml_data_process_entry_t proc_entry; @@ -257,22 +263,22 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = tree->encoding; - myhtml_string_init(tree->mchar, mchar_node_id, &attr->key, (attr->raw_key_length + 1)); + mycore_string_init(tree->mchar, mchar_node_id, &attr->key, (attr->raw_key_length + 1)); myhtml_parser_token_data_to_string_lowercase(tree, &attr->key, &proc_entry, attr->raw_key_begin, attr->raw_key_length); } else - myhtml_string_clean_all(&attr->key); + mycore_string_clean_all(&attr->key); if(attr->raw_value_length) { myhtml_data_process_entry_clean(&proc_entry); proc_entry.encoding = tree->encoding; proc_entry.is_attributes = true; - myhtml_string_init(tree->mchar, mchar_node_id, &attr->value, (attr->raw_value_length + 1)); + mycore_string_init(tree->mchar, mchar_node_id, &attr->value, (attr->raw_value_length + 1)); myhtml_parser_token_data_to_string_charef(tree, &attr->value, &proc_entry, attr->raw_value_begin, attr->raw_value_length); } else - myhtml_string_clean_all(&attr->value); + mycore_string_clean_all(&attr->value); attr = attr->next; } @@ -281,7 +287,7 @@ void myhtml_parser_worker(mythread_id_t thread_id, void* ctx) token->attr_first = NULL; token->attr_last = NULL; - myhtml_string_clean_all(&token->str); + mycore_string_clean_all(&token->str); } token->type |= MyHTML_TOKEN_TYPE_DONE; |