Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/sundown-ffi.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/markdown.c')
-rw-r--r--src/markdown.c2556
1 files changed, 2556 insertions, 0 deletions
diff --git a/src/markdown.c b/src/markdown.c
new file mode 100644
index 0000000..ea3cf23
--- /dev/null
+++ b/src/markdown.c
@@ -0,0 +1,2556 @@
+/* markdown.c - generic markdown parser */
+
+/*
+ * Copyright (c) 2009, Natacha Porté
+ * Copyright (c) 2011, Vicent Marti
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "markdown.h"
+#include "stack.h"
+
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#if defined(_WIN32)
+#define strncasecmp _strnicmp
+#endif
+
+#define REF_TABLE_SIZE 8
+
+#define BUFFER_BLOCK 0
+#define BUFFER_SPAN 1
+
+#define MKD_LI_END 8 /* internal list flag */
+
+#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
+#define GPERF_DOWNCASE 1
+#define GPERF_CASE_STRNCMP 1
+#include "html_blocks.h"
+
+/***************
+ * LOCAL TYPES *
+ ***************/
+
+/* link_ref: reference to a link */
+struct link_ref {
+ unsigned int id;
+
+ struct buf *link;
+ struct buf *title;
+
+ struct link_ref *next;
+};
+
+/* char_trigger: function pointer to render active chars */
+/* returns the number of chars taken care of */
+/* data is the pointer of the beginning of the span */
+/* offset is the number of valid chars before data */
+struct sd_markdown;
+typedef size_t
+(*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+
+static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
+
+enum markdown_char_t {
+ MD_CHAR_NONE = 0,
+ MD_CHAR_EMPHASIS,
+ MD_CHAR_CODESPAN,
+ MD_CHAR_LINEBREAK,
+ MD_CHAR_LINK,
+ MD_CHAR_LANGLE,
+ MD_CHAR_ESCAPE,
+ MD_CHAR_ENTITITY,
+ MD_CHAR_AUTOLINK_URL,
+ MD_CHAR_AUTOLINK_EMAIL,
+ MD_CHAR_AUTOLINK_WWW,
+ MD_CHAR_SUPERSCRIPT,
+};
+
+static char_trigger markdown_char_ptrs[] = {
+ NULL,
+ &char_emphasis,
+ &char_codespan,
+ &char_linebreak,
+ &char_link,
+ &char_langle_tag,
+ &char_escape,
+ &char_entity,
+ &char_autolink_url,
+ &char_autolink_email,
+ &char_autolink_www,
+ &char_superscript,
+};
+
+/* render • structure containing one particular render */
+struct sd_markdown {
+ struct sd_callbacks cb;
+ void *opaque;
+
+ struct link_ref *refs[REF_TABLE_SIZE];
+ uint8_t active_char[256];
+ struct stack work_bufs[2];
+ unsigned int ext_flags;
+ size_t max_nesting;
+ int in_link_body;
+};
+
+/***************************
+ * HELPER FUNCTIONS *
+ ***************************/
+
+static inline struct buf *
+rndr_newbuf(struct sd_markdown *rndr, int type)
+{
+ static const size_t buf_size[2] = {256, 64};
+ struct buf *work = NULL;
+ struct stack *pool = &rndr->work_bufs[type];
+
+ if (pool->size < pool->asize &&
+ pool->item[pool->size] != NULL) {
+ work = pool->item[pool->size++];
+ work->size = 0;
+ } else {
+ work = bufnew(buf_size[type]);
+ stack_push(pool, work);
+ }
+
+ return work;
+}
+
+static inline void
+rndr_popbuf(struct sd_markdown *rndr, int type)
+{
+ rndr->work_bufs[type].size--;
+}
+
+static void
+unscape_text(struct buf *ob, struct buf *src)
+{
+ size_t i = 0, org;
+ while (i < src->size) {
+ org = i;
+ while (i < src->size && src->data[i] != '\\')
+ i++;
+
+ if (i > org)
+ bufput(ob, src->data + org, i - org);
+
+ if (i + 1 >= src->size)
+ break;
+
+ bufputc(ob, src->data[i + 1]);
+ i += 2;
+ }
+}
+
+static unsigned int
+hash_link_ref(const uint8_t *link_ref, size_t length)
+{
+ size_t i;
+ unsigned int hash = 0;
+
+ for (i = 0; i < length; ++i)
+ hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
+
+ return hash;
+}
+
+static struct link_ref *
+add_link_ref(
+ struct link_ref **references,
+ const uint8_t *name, size_t name_size)
+{
+ struct link_ref *ref = calloc(1, sizeof(struct link_ref));
+
+ if (!ref)
+ return NULL;
+
+ ref->id = hash_link_ref(name, name_size);
+ ref->next = references[ref->id % REF_TABLE_SIZE];
+
+ references[ref->id % REF_TABLE_SIZE] = ref;
+ return ref;
+}
+
+static struct link_ref *
+find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
+{
+ unsigned int hash = hash_link_ref(name, length);
+ struct link_ref *ref = NULL;
+
+ ref = references[hash % REF_TABLE_SIZE];
+
+ while (ref != NULL) {
+ if (ref->id == hash)
+ return ref;
+
+ ref = ref->next;
+ }
+
+ return NULL;
+}
+
+static void
+free_link_refs(struct link_ref **references)
+{
+ size_t i;
+
+ for (i = 0; i < REF_TABLE_SIZE; ++i) {
+ struct link_ref *r = references[i];
+ struct link_ref *next;
+
+ while (r) {
+ next = r->next;
+ bufrelease(r->link);
+ bufrelease(r->title);
+ free(r);
+ r = next;
+ }
+ }
+}
+
+/*
+ * Check whether a char is a Markdown space.
+
+ * Right now we only consider spaces the actual
+ * space and a newline: tabs and carriage returns
+ * are filtered out during the preprocessing phase.
+ *
+ * If we wanted to actually be UTF-8 compliant, we
+ * should instead extract an Unicode codepoint from
+ * this character and check for space properties.
+ */
+static inline int
+_isspace(int c)
+{
+ return c == ' ' || c == '\n';
+}
+
+/****************************
+ * INLINE PARSING FUNCTIONS *
+ ****************************/
+
+/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
+/* this is less strict than the original markdown e-mail address matching */
+static size_t
+is_mail_autolink(uint8_t *data, size_t size)
+{
+ size_t i = 0, nb = 0;
+
+ /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
+ for (i = 0; i < size; ++i) {
+ if (isalnum(data[i]))
+ continue;
+
+ switch (data[i]) {
+ case '@':
+ nb++;
+
+ case '-':
+ case '.':
+ case '_':
+ break;
+
+ case '>':
+ return (nb == 1) ? i + 1 : 0;
+
+ default:
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+/* tag_length • returns the length of the given tag, or 0 is it's not valid */
+static size_t
+tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
+{
+ size_t i, j;
+
+ /* a valid tag can't be shorter than 3 chars */
+ if (size < 3) return 0;
+
+ /* begins with a '<' optionally followed by '/', followed by letter or number */
+ if (data[0] != '<') return 0;
+ i = (data[1] == '/') ? 2 : 1;
+
+ if (!isalnum(data[i]))
+ return 0;
+
+ /* scheme test */
+ *autolink = MKDA_NOT_AUTOLINK;
+
+ /* try to find the beginning of an URI */
+ while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
+ i++;
+
+ if (i > 1 && data[i] == '@') {
+ if ((j = is_mail_autolink(data + i, size - i)) != 0) {
+ *autolink = MKDA_EMAIL;
+ return i + j;
+ }
+ }
+
+ if (i > 2 && data[i] == ':') {
+ *autolink = MKDA_NORMAL;
+ i++;
+ }
+
+ /* completing autolink test: no whitespace or ' or " */
+ if (i >= size)
+ *autolink = MKDA_NOT_AUTOLINK;
+
+ else if (*autolink) {
+ j = i;
+
+ while (i < size) {
+ if (data[i] == '\\') i += 2;
+ else if (data[i] == '>' || data[i] == '\'' ||
+ data[i] == '"' || data[i] == ' ' || data[i] == '\n')
+ break;
+ else i++;
+ }
+
+ if (i >= size) return 0;
+ if (i > j && data[i] == '>') return i + 1;
+ /* one of the forbidden chars has been found */
+ *autolink = MKDA_NOT_AUTOLINK;
+ }
+
+ /* looking for sometinhg looking like a tag end */
+ while (i < size && data[i] != '>') i++;
+ if (i >= size) return 0;
+ return i + 1;
+}
+
+/* parse_inline • parses inline markdown elements */
+static void
+parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
+{
+ size_t i = 0, end = 0;
+ uint8_t action = 0;
+ struct buf work = { 0, 0, 0, 0 };
+
+ if (rndr->work_bufs[BUFFER_SPAN].size +
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
+ return;
+
+ while (i < size) {
+ /* copying inactive chars into the output */
+ while (end < size && (action = rndr->active_char[data[end]]) == 0) {
+ end++;
+ }
+
+ if (rndr->cb.normal_text) {
+ work.data = data + i;
+ work.size = end - i;
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
+ }
+ else
+ bufput(ob, data + i, end - i);
+
+ if (end >= size) break;
+ i = end;
+
+ end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
+ if (!end) /* no action from the callback */
+ end = i + 1;
+ else {
+ i += end;
+ end = i;
+ }
+ }
+}
+
+/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
+static size_t
+find_emph_char(uint8_t *data, size_t size, uint8_t c)
+{
+ size_t i = 1;
+
+ while (i < size) {
+ while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
+ i++;
+
+ if (i == size)
+ return 0;
+
+ if (data[i] == c)
+ return i;
+
+ /* not counting escaped chars */
+ if (i && data[i - 1] == '\\') {
+ i++; continue;
+ }
+
+ if (data[i] == '`') {
+ size_t span_nb = 0, bt;
+ size_t tmp_i = 0;
+
+ /* counting the number of opening backticks */
+ while (i < size && data[i] == '`') {
+ i++; span_nb++;
+ }
+
+ if (i >= size) return 0;
+
+ /* finding the matching closing sequence */
+ bt = 0;
+ while (i < size && bt < span_nb) {
+ if (!tmp_i && data[i] == c) tmp_i = i;
+ if (data[i] == '`') bt++;
+ else bt = 0;
+ i++;
+ }
+
+ if (i >= size) return tmp_i;
+ }
+ /* skipping a link */
+ else if (data[i] == '[') {
+ size_t tmp_i = 0;
+ uint8_t cc;
+
+ i++;
+ while (i < size && data[i] != ']') {
+ if (!tmp_i && data[i] == c) tmp_i = i;
+ i++;
+ }
+
+ i++;
+ while (i < size && (data[i] == ' ' || data[i] == '\n'))
+ i++;
+
+ if (i >= size)
+ return tmp_i;
+
+ switch (data[i]) {
+ case '[':
+ cc = ']'; break;
+
+ case '(':
+ cc = ')'; break;
+
+ default:
+ if (tmp_i)
+ return tmp_i;
+ else
+ continue;
+ }
+
+ i++;
+ while (i < size && data[i] != cc) {
+ if (!tmp_i && data[i] == c) tmp_i = i;
+ i++;
+ }
+
+ if (i >= size)
+ return tmp_i;
+
+ i++;
+ }
+ }
+
+ return 0;
+}
+
+/* parse_emph1 • parsing single emphase */
+/* closed by a symbol not preceded by whitespace and not followed by symbol */
+static size_t
+parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
+{
+ size_t i = 0, len;
+ struct buf *work = 0;
+ int r;
+
+ if (!rndr->cb.emphasis) return 0;
+
+ /* skipping one symbol if coming from emph3 */
+ if (size > 1 && data[0] == c && data[1] == c) i = 1;
+
+ while (i < size) {
+ len = find_emph_char(data + i, size - i, c);
+ if (!len) return 0;
+ i += len;
+ if (i >= size) return 0;
+
+ if (data[i] == c && !_isspace(data[i - 1])) {
+
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
+ if (i + 1 < size && isalnum(data[i + 1]))
+ continue;
+ }
+
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
+ parse_inline(work, rndr, data, i);
+ r = rndr->cb.emphasis(ob, work, rndr->opaque);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ return r ? i + 1 : 0;
+ }
+ }
+
+ return 0;
+}
+
+/* parse_emph2 • parsing single emphase */
+static size_t
+parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
+{
+ int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
+ size_t i = 0, len;
+ struct buf *work = 0;
+ int r;
+
+ render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
+
+ if (!render_method)
+ return 0;
+
+ while (i < size) {
+ len = find_emph_char(data + i, size - i, c);
+ if (!len) return 0;
+ i += len;
+
+ if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
+ parse_inline(work, rndr, data, i);
+ r = render_method(ob, work, rndr->opaque);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ return r ? i + 2 : 0;
+ }
+ i++;
+ }
+ return 0;
+}
+
+/* parse_emph3 • parsing single emphase */
+/* finds the first closing tag, and delegates to the other emph */
+static size_t
+parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
+{
+ size_t i = 0, len;
+ int r;
+
+ while (i < size) {
+ len = find_emph_char(data + i, size - i, c);
+ if (!len) return 0;
+ i += len;
+
+ /* skip whitespace preceded symbols */
+ if (data[i] != c || _isspace(data[i - 1]))
+ continue;
+
+ if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
+ /* triple symbol found */
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
+
+ parse_inline(work, rndr, data, i);
+ r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ return r ? i + 3 : 0;
+
+ } else if (i + 1 < size && data[i + 1] == c) {
+ /* double symbol found, handing over to emph1 */
+ len = parse_emph1(ob, rndr, data - 2, size + 2, c);
+ if (!len) return 0;
+ else return len - 2;
+
+ } else {
+ /* single symbol found, handing over to emph2 */
+ len = parse_emph2(ob, rndr, data - 1, size + 1, c);
+ if (!len) return 0;
+ else return len - 1;
+ }
+ }
+ return 0;
+}
+
+/* char_emphasis • single and double emphasis parsing */
+static size_t
+char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ uint8_t c = data[0];
+ size_t ret;
+
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
+ if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>')
+ return 0;
+ }
+
+ if (size > 2 && data[1] != c) {
+ /* whitespace cannot follow an opening emphasis;
+ * strikethrough only takes two characters '~~' */
+ if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
+ return 0;
+
+ return ret + 1;
+ }
+
+ if (size > 3 && data[1] == c && data[2] != c) {
+ if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
+ return 0;
+
+ return ret + 2;
+ }
+
+ if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
+ if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
+ return 0;
+
+ return ret + 3;
+ }
+
+ return 0;
+}
+
+
+/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
+static size_t
+char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
+ return 0;
+
+ /* removing the last space from ob and rendering */
+ while (ob->size && ob->data[ob->size - 1] == ' ')
+ ob->size--;
+
+ return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
+}
+
+
+/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
+static size_t
+char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ size_t end, nb = 0, i, f_begin, f_end;
+
+ /* counting the number of backticks in the delimiter */
+ while (nb < size && data[nb] == '`')
+ nb++;
+
+ /* finding the next delimiter */
+ i = 0;
+ for (end = nb; end < size && i < nb; end++) {
+ if (data[end] == '`') i++;
+ else i = 0;
+ }
+
+ if (i < nb && end >= size)
+ return 0; /* no matching delimiter */
+
+ /* trimming outside whitespaces */
+ f_begin = nb;
+ while (f_begin < end && data[f_begin] == ' ')
+ f_begin++;
+
+ f_end = end - nb;
+ while (f_end > nb && data[f_end-1] == ' ')
+ f_end--;
+
+ /* real code span */
+ if (f_begin < f_end) {
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
+ if (!rndr->cb.codespan(ob, &work, rndr->opaque))
+ end = 0;
+ } else {
+ if (!rndr->cb.codespan(ob, 0, rndr->opaque))
+ end = 0;
+ }
+
+ return end;
+}
+
+
+/* char_escape • '\\' backslash escape */
+static size_t
+char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
+ struct buf work = { 0, 0, 0, 0 };
+
+ if (size > 1) {
+ if (strchr(escape_chars, data[1]) == NULL)
+ return 0;
+
+ if (rndr->cb.normal_text) {
+ work.data = data + 1;
+ work.size = 1;
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
+ }
+ else bufputc(ob, data[1]);
+ } else if (size == 1) {
+ bufputc(ob, data[0]);
+ }
+
+ return 2;
+}
+
+/* char_entity • '&' escaped when it doesn't belong to an entity */
+/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
+static size_t
+char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ size_t end = 1;
+ struct buf work = { 0, 0, 0, 0 };
+
+ if (end < size && data[end] == '#')
+ end++;
+
+ while (end < size && isalnum(data[end]))
+ end++;
+
+ if (end < size && data[end] == ';')
+ end++; /* real entity */
+ else
+ return 0; /* lone '&' */
+
+ if (rndr->cb.entity) {
+ work.data = data;
+ work.size = end;
+ rndr->cb.entity(ob, &work, rndr->opaque);
+ }
+ else bufput(ob, data, end);
+
+ return end;
+}
+
+/* char_langle_tag • '<' when tags or autolinks are allowed */
+static size_t
+char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
+ size_t end = tag_length(data, size, &altype);
+ struct buf work = { data, end, 0, 0 };
+ int ret = 0;
+
+ if (end > 2) {
+ if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
+ struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
+ work.data = data + 1;
+ work.size = end - 2;
+ unscape_text(u_link, &work);
+ ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ }
+ else if (rndr->cb.raw_html_tag)
+ ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
+ }
+
+ if (!ret) return 0;
+ else return end;
+}
+
+static size_t
+char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ struct buf *link, *link_url, *link_text;
+ size_t link_len, rewind;
+
+ if (!rndr->cb.link || rndr->in_link_body)
+ return 0;
+
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
+
+ if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) {
+ link_url = rndr_newbuf(rndr, BUFFER_SPAN);
+ BUFPUTSL(link_url, "http://");
+ bufput(link_url, link->data, link->size);
+
+ ob->size -= rewind;
+ if (rndr->cb.normal_text) {
+ link_text = rndr_newbuf(rndr, BUFFER_SPAN);
+ rndr->cb.normal_text(link_text, link, rndr->opaque);
+ rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ } else {
+ rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
+ }
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ }
+
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ return link_len;
+}
+
+static size_t
+char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ struct buf *link;
+ size_t link_len, rewind;
+
+ if (!rndr->cb.autolink || rndr->in_link_body)
+ return 0;
+
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
+
+ if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
+ ob->size -= rewind;
+ rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
+ }
+
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ return link_len;
+}
+
+static size_t
+char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ struct buf *link;
+ size_t link_len, rewind;
+
+ if (!rndr->cb.autolink || rndr->in_link_body)
+ return 0;
+
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
+
+ if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
+ ob->size -= rewind;
+ rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
+ }
+
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ return link_len;
+}
+
+/* char_link • '[': parsing a link or an image */
+static size_t
+char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ int is_img = (offset && data[-1] == '!'), level;
+ size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
+ struct buf *content = 0;
+ struct buf *link = 0;
+ struct buf *title = 0;
+ struct buf *u_link = 0;
+ size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
+ int text_has_nl = 0, ret = 0;
+ int in_title = 0, qtype = 0;
+
+ /* checking whether the correct renderer exists */
+ if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
+ goto cleanup;
+
+ /* looking for the matching closing bracket */
+ for (level = 1; i < size; i++) {
+ if (data[i] == '\n')
+ text_has_nl = 1;
+
+ else if (data[i - 1] == '\\')
+ continue;
+
+ else if (data[i] == '[')
+ level++;
+
+ else if (data[i] == ']') {
+ level--;
+ if (level <= 0)
+ break;
+ }
+ }
+
+ if (i >= size)
+ goto cleanup;
+
+ txt_e = i;
+ i++;
+
+ /* skip any amount of whitespace or newline */
+ /* (this is much more laxist than original markdown syntax) */
+ while (i < size && _isspace(data[i]))
+ i++;
+
+ /* inline style link */
+ if (i < size && data[i] == '(') {
+ /* skipping initial whitespace */
+ i++;
+
+ while (i < size && _isspace(data[i]))
+ i++;
+
+ link_b = i;
+
+ /* looking for link end: ' " ) */
+ while (i < size) {
+ if (data[i] == '\\') i += 2;
+ else if (data[i] == ')') break;
+ else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
+ else i++;
+ }
+
+ if (i >= size) goto cleanup;
+ link_e = i;
+
+ /* looking for title end if present */
+ if (data[i] == '\'' || data[i] == '"') {
+ qtype = data[i];
+ in_title = 1;
+ i++;
+ title_b = i;
+
+ while (i < size) {
+ if (data[i] == '\\') i += 2;
+ else if (data[i] == qtype) {in_title = 0; i++;}
+ else if ((data[i] == ')') && !in_title) break;
+ else i++;
+ }
+
+ if (i >= size) goto cleanup;
+
+ /* skipping whitespaces after title */
+ title_e = i - 1;
+ while (title_e > title_b && _isspace(data[title_e]))
+ title_e--;
+
+ /* checking for closing quote presence */
+ if (data[title_e] != '\'' && data[title_e] != '"') {
+ title_b = title_e = 0;
+ link_e = i;
+ }
+ }
+
+ /* remove whitespace at the end of the link */
+ while (link_e > link_b && _isspace(data[link_e - 1]))
+ link_e--;
+
+ /* remove optional angle brackets around the link */
+ if (data[link_b] == '<') link_b++;
+ if (data[link_e - 1] == '>') link_e--;
+
+ /* building escaped link and title */
+ if (link_e > link_b) {
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
+ bufput(link, data + link_b, link_e - link_b);
+ }
+
+ if (title_e > title_b) {
+ title = rndr_newbuf(rndr, BUFFER_SPAN);
+ bufput(title, data + title_b, title_e - title_b);
+ }
+
+ i++;
+ }
+
+ /* reference style link */
+ else if (i < size && data[i] == '[') {
+ struct buf id = { 0, 0, 0, 0 };
+ struct link_ref *lr;
+
+ /* looking for the id */
+ i++;
+ link_b = i;
+ while (i < size && data[i] != ']') i++;
+ if (i >= size) goto cleanup;
+ link_e = i;
+
+ /* finding the link_ref */
+ if (link_b == link_e) {
+ if (text_has_nl) {
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
+ size_t j;
+
+ for (j = 1; j < txt_e; j++) {
+ if (data[j] != '\n')
+ bufputc(b, data[j]);
+ else if (data[j - 1] != ' ')
+ bufputc(b, ' ');
+ }
+
+ id.data = b->data;
+ id.size = b->size;
+ } else {
+ id.data = data + 1;
+ id.size = txt_e - 1;
+ }
+ } else {
+ id.data = data + link_b;
+ id.size = link_e - link_b;
+ }
+
+ lr = find_link_ref(rndr->refs, id.data, id.size);
+ if (!lr)
+ goto cleanup;
+
+ /* keeping link and title from link_ref */
+ link = lr->link;
+ title = lr->title;
+ i++;
+ }
+
+ /* shortcut reference style link */
+ else {
+ struct buf id = { 0, 0, 0, 0 };
+ struct link_ref *lr;
+
+ /* crafting the id */
+ if (text_has_nl) {
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
+ size_t j;
+
+ for (j = 1; j < txt_e; j++) {
+ if (data[j] != '\n')
+ bufputc(b, data[j]);
+ else if (data[j - 1] != ' ')
+ bufputc(b, ' ');
+ }
+
+ id.data = b->data;
+ id.size = b->size;
+ } else {
+ id.data = data + 1;
+ id.size = txt_e - 1;
+ }
+
+ /* finding the link_ref */
+ lr = find_link_ref(rndr->refs, id.data, id.size);
+ if (!lr)
+ goto cleanup;
+
+ /* keeping link and title from link_ref */
+ link = lr->link;
+ title = lr->title;
+
+ /* rewinding the whitespace */
+ i = txt_e + 1;
+ }
+
+ /* building content: img alt is escaped, link content is parsed */
+ if (txt_e > 1) {
+ content = rndr_newbuf(rndr, BUFFER_SPAN);
+ if (is_img) {
+ bufput(content, data + 1, txt_e - 1);
+ } else {
+ /* disable autolinking when parsing inline the
+ * content of a link */
+ rndr->in_link_body = 1;
+ parse_inline(content, rndr, data + 1, txt_e - 1);
+ rndr->in_link_body = 0;
+ }
+ }
+
+ if (link) {
+ u_link = rndr_newbuf(rndr, BUFFER_SPAN);
+ unscape_text(u_link, link);
+ }
+
+ /* calling the relevant rendering function */
+ if (is_img) {
+ if (ob->size && ob->data[ob->size - 1] == '!')
+ ob->size -= 1;
+
+ ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
+ } else {
+ ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
+ }
+
+ /* cleanup */
+cleanup:
+ rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
+ return ret ? i : 0;
+}
+
+static size_t
+char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
+{
+ size_t sup_start, sup_len;
+ struct buf *sup;
+
+ if (!rndr->cb.superscript)
+ return 0;
+
+ if (size < 2)
+ return 0;
+
+ if (data[1] == '(') {
+ sup_start = sup_len = 2;
+
+ while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
+ sup_len++;
+
+ if (sup_len == size)
+ return 0;
+ } else {
+ sup_start = sup_len = 1;
+
+ while (sup_len < size && !_isspace(data[sup_len]))
+ sup_len++;
+ }
+
+ if (sup_len - sup_start == 0)
+ return (sup_start == 2) ? 3 : 0;
+
+ sup = rndr_newbuf(rndr, BUFFER_SPAN);
+ parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
+ rndr->cb.superscript(ob, sup, rndr->opaque);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+
+ return (sup_start == 2) ? sup_len + 1 : sup_len;
+}
+
+/*********************************
+ * BLOCK-LEVEL PARSING FUNCTIONS *
+ *********************************/
+
+/* is_empty • returns the line length when it is empty, 0 otherwise */
+static size_t
+is_empty(uint8_t *data, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size && data[i] != '\n'; i++)
+ if (data[i] != ' ')
+ return 0;
+
+ return i + 1;
+}
+
+/* is_hrule • returns whether a line is a horizontal rule */
+static int
+is_hrule(uint8_t *data, size_t size)
+{
+ size_t i = 0, n = 0;
+ uint8_t c;
+
+ /* skipping initial spaces */
+ if (size < 3) return 0;
+ if (data[0] == ' ') { i++;
+ if (data[1] == ' ') { i++;
+ if (data[2] == ' ') { i++; } } }
+
+ /* looking at the hrule uint8_t */
+ if (i + 2 >= size
+ || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
+ return 0;
+ c = data[i];
+
+ /* the whole line must be the char or whitespace */
+ while (i < size && data[i] != '\n') {
+ if (data[i] == c) n++;
+ else if (data[i] != ' ')
+ return 0;
+
+ i++;
+ }
+
+ return n >= 3;
+}
+
+/* check if a line begins with a code fence; return the
+ * width of the code fence */
+static size_t
+prefix_codefence(uint8_t *data, size_t size)
+{
+ size_t i = 0, n = 0;
+ uint8_t c;
+
+ /* skipping initial spaces */
+ if (size < 3) return 0;
+ if (data[0] == ' ') { i++;
+ if (data[1] == ' ') { i++;
+ if (data[2] == ' ') { i++; } } }
+
+ /* looking at the hrule uint8_t */
+ if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
+ return 0;
+
+ c = data[i];
+
+ /* the whole line must be the uint8_t or whitespace */
+ while (i < size && data[i] == c) {
+ n++; i++;
+ }
+
+ if (n < 3)
+ return 0;
+
+ return i;
+}
+
+/* check if a line is a code fence; return its size if it is */
+static size_t
+is_codefence(uint8_t *data, size_t size, struct buf *syntax)
+{
+ size_t i = 0, syn_len = 0;
+ uint8_t *syn_start;
+
+ i = prefix_codefence(data, size);
+ if (i == 0)
+ return 0;
+
+ while (i < size && data[i] == ' ')
+ i++;
+
+ syn_start = data + i;
+
+ if (i < size && data[i] == '{') {
+ i++; syn_start++;
+
+ while (i < size && data[i] != '}' && data[i] != '\n') {
+ syn_len++; i++;
+ }
+
+ if (i == size || data[i] != '}')
+ return 0;
+
+ /* strip all whitespace at the beginning and the end
+ * of the {} block */
+ while (syn_len > 0 && _isspace(syn_start[0])) {
+ syn_start++; syn_len--;
+ }
+
+ while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
+ syn_len--;
+
+ i++;
+ } else {
+ while (i < size && !_isspace(data[i])) {
+ syn_len++; i++;
+ }
+ }
+
+ if (syntax) {
+ syntax->data = syn_start;
+ syntax->size = syn_len;
+ }
+
+ while (i < size && data[i] != '\n') {
+ if (!_isspace(data[i]))
+ return 0;
+
+ i++;
+ }
+
+ return i + 1;
+}
+
+/* is_atxheader • returns whether the line is a hash-prefixed header */
+static int
+is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
+{
+ if (data[0] != '#')
+ return 0;
+
+ if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
+ size_t level = 0;
+
+ while (level < size && level < 6 && data[level] == '#')
+ level++;
+
+ if (level < size && data[level] != ' ')
+ return 0;
+ }
+
+ return 1;
+}
+
+/* is_headerline • returns whether the line is a setext-style hdr underline */
+static int
+is_headerline(uint8_t *data, size_t size)
+{
+ size_t i = 0;
+
+ /* test of level 1 header */
+ if (data[i] == '=') {
+ for (i = 1; i < size && data[i] == '='; i++);
+ while (i < size && data[i] == ' ') i++;
+ return (i >= size || data[i] == '\n') ? 1 : 0; }
+
+ /* test of level 2 header */
+ if (data[i] == '-') {
+ for (i = 1; i < size && data[i] == '-'; i++);
+ while (i < size && data[i] == ' ') i++;
+ return (i >= size || data[i] == '\n') ? 2 : 0; }
+
+ return 0;
+}
+
+static int
+is_next_headerline(uint8_t *data, size_t size)
+{
+ size_t i = 0;
+
+ while (i < size && data[i] != '\n')
+ i++;
+
+ if (++i >= size)
+ return 0;
+
+ return is_headerline(data + i, size - i);
+}
+
+/* prefix_quote • returns blockquote prefix length */
+static size_t
+prefix_quote(uint8_t *data, size_t size)
+{
+ size_t i = 0;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
+
+ if (i < size && data[i] == '>') {
+ if (i + 1 < size && data[i + 1] == ' ')
+ return i + 2;
+
+ return i + 1;
+ }
+
+ return 0;
+}
+
+/* prefix_code • returns prefix length for block code*/
+static size_t
+prefix_code(uint8_t *data, size_t size)
+{
+ if (size > 3 && data[0] == ' ' && data[1] == ' '
+ && data[2] == ' ' && data[3] == ' ') return 4;
+
+ return 0;
+}
+
+/* prefix_oli • returns ordered list item prefix */
+static size_t
+prefix_oli(uint8_t *data, size_t size)
+{
+ size_t i = 0;
+
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
+
+ if (i >= size || data[i] < '0' || data[i] > '9')
+ return 0;
+
+ while (i < size && data[i] >= '0' && data[i] <= '9')
+ i++;
+
+ if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
+ return 0;
+
+ if (is_next_headerline(data + i, size - i))
+ return 0;
+
+ return i + 2;
+}
+
+/* prefix_uli • returns ordered list item prefix */
+static size_t
+prefix_uli(uint8_t *data, size_t size)
+{
+ size_t i = 0;
+
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
+
+ if (i + 1 >= size ||
+ (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
+ data[i + 1] != ' ')
+ return 0;
+
+ if (is_next_headerline(data + i, size - i))
+ return 0;
+
+ return i + 2;
+}
+
+
+/* parse_block • parsing of one block, returning next uint8_t to parse */
+static void parse_block(struct buf *ob, struct sd_markdown *rndr,
+ uint8_t *data, size_t size);
+
+
+/* parse_blockquote • handles parsing of a blockquote fragment */
+static size_t
+parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
+{
+ size_t beg, end = 0, pre, work_size = 0;
+ uint8_t *work_data = 0;
+ struct buf *out = 0;
+
+ out = rndr_newbuf(rndr, BUFFER_BLOCK);
+ beg = 0;
+ while (beg < size) {
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
+
+ pre = prefix_quote(data + beg, end - beg);
+
+ if (pre)
+ beg += pre; /* skipping prefix */
+
+ /* empty line followed by non-quote line */
+ else if (is_empty(data + beg, end - beg) &&
+ (end >= size || (prefix_quote(data + end, size - end) == 0 &&
+ !is_empty(data + end, size - end))))
+ break;
+
+ if (beg < end) { /* copy into the in-place working buffer */
+ /* bufput(work, data + beg, end - beg); */
+ if (!work_data)
+ work_data = data + beg;
+ else if (data + beg != work_data + work_size)
+ memmove(work_data + work_size, data + beg, end - beg);
+ work_size += end - beg;
+ }
+ beg = end;
+ }
+
+ parse_block(out, rndr, work_data, work_size);
+ if (rndr->cb.blockquote)
+ rndr->cb.blockquote(ob, out, rndr->opaque);
+ rndr_popbuf(rndr, BUFFER_BLOCK);
+ return end;
+}
+
+static size_t
+parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
+
+/* parse_blockquote • handles parsing of a regular paragraph */
+static size_t
+parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
+{
+ size_t i = 0, end = 0;
+ int level = 0;
+ struct buf work = { data, 0, 0, 0 };
+
+ while (i < size) {
+ for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
+
+ if (is_empty(data + i, size - i))
+ break;
+
+ if ((level = is_headerline(data + i, size - i)) != 0)
+ break;
+
+ if (is_atxheader(rndr, data + i, size - i) ||
+ is_hrule(data + i, size - i) ||
+ prefix_quote(data + i, size - i)) {
+ end = i;
+ break;
+ }
+
+ /*
+ * Early termination of a paragraph with the same logic
+ * as Markdown 1.0.0. If this logic is applied, the
+ * Markdown 1.0.3 test suite won't pass cleanly
+ *
+ * :: If the first character in a new line is not a letter,
+ * let's check to see if there's some kind of block starting
+ * here
+ */
+ if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
+ if (prefix_oli(data + i, size - i) ||
+ prefix_uli(data + i, size - i)) {
+ end = i;
+ break;
+ }
+
+ /* see if an html block starts here */
+ if (data[i] == '<' && rndr->cb.blockhtml &&
+ parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
+ end = i;
+ break;
+ }
+
+ /* see if a code fence starts here */
+ if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
+ is_codefence(data + i, size - i, NULL) != 0) {
+ end = i;
+ break;
+ }
+ }
+
+ i = end;
+ }
+
+ work.size = i;
+ while (work.size && data[work.size - 1] == '\n')
+ work.size--;
+
+ if (!level) {
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
+ parse_inline(tmp, rndr, work.data, work.size);
+ if (rndr->cb.paragraph)
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
+ rndr_popbuf(rndr, BUFFER_BLOCK);
+ } else {
+ struct buf *header_work;
+
+ if (work.size) {
+ size_t beg;
+ i = work.size;
+ work.size -= 1;
+
+ while (work.size && data[work.size] != '\n')
+ work.size -= 1;
+
+ beg = work.size + 1;
+ while (work.size && data[work.size - 1] == '\n')
+ work.size -= 1;
+
+ if (work.size > 0) {
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
+ parse_inline(tmp, rndr, work.data, work.size);
+
+ if (rndr->cb.paragraph)
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
+
+ rndr_popbuf(rndr, BUFFER_BLOCK);
+ work.data += beg;
+ work.size = i - beg;
+ }
+ else work.size = i;
+ }
+
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
+ parse_inline(header_work, rndr, work.data, work.size);
+
+ if (rndr->cb.header)
+ rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
+
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ }
+
+ return end;
+}
+
+/* parse_fencedcode • handles parsing of a block-level code fragment */
+static size_t
+parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
+{
+ size_t beg, end;
+ struct buf *work = 0;
+ struct buf lang = { 0, 0, 0, 0 };
+
+ beg = is_codefence(data, size, &lang);
+ if (beg == 0) return 0;
+
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
+
+ while (beg < size) {
+ size_t fence_end;
+ struct buf fence_trail = { 0, 0, 0, 0 };
+
+ fence_end = is_codefence(data + beg, size - beg, &fence_trail);
+ if (fence_end != 0 && fence_trail.size == 0) {
+ beg += fence_end;
+ break;
+ }
+
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
+
+ if (beg < end) {
+ /* verbatim copy to the working buffer,
+ escaping entities */
+ if (is_empty(data + beg, end - beg))
+ bufputc(work, '\n');
+ else bufput(work, data + beg, end - beg);
+ }
+ beg = end;
+ }
+
+ if (work->size && work->data[work->size - 1] != '\n')
+ bufputc(work, '\n');
+
+ if (rndr->cb.blockcode)
+ rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
+
+ rndr_popbuf(rndr, BUFFER_BLOCK);
+ return beg;
+}
+
+static size_t
+parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
+{
+ size_t beg, end, pre;
+ struct buf *work = 0;
+
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
+
+ beg = 0;
+ while (beg < size) {
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
+ pre = prefix_code(data + beg, end - beg);
+
+ if (pre)
+ beg += pre; /* skipping prefix */
+ else if (!is_empty(data + beg, end - beg))
+ /* non-empty non-prefixed line breaks the pre */
+ break;
+
+ if (beg < end) {
+ /* verbatim copy to the working buffer,
+ escaping entities */
+ if (is_empty(data + beg, end - beg))
+ bufputc(work, '\n');
+ else bufput(work, data + beg, end - beg);
+ }
+ beg = end;
+ }
+
+ while (work->size && work->data[work->size - 1] == '\n')
+ work->size -= 1;
+
+ bufputc(work, '\n');
+
+ if (rndr->cb.blockcode)
+ rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
+
+ rndr_popbuf(rndr, BUFFER_BLOCK);
+ return beg;
+}
+
+/* parse_listitem • parsing of a single list item */
+/* assuming initial prefix is already removed */
+static size_t
+parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
+{
+ struct buf *work = 0, *inter = 0;
+ size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
+ int in_empty = 0, has_inside_empty = 0, in_fence = 0;
+
+ /* keeping track of the first indentation prefix */
+ while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
+ orgpre++;
+
+ beg = prefix_uli(data, size);
+ if (!beg)
+ beg = prefix_oli(data, size);
+
+ if (!beg)
+ return 0;
+
+ /* skipping to the beginning of the following line */
+ end = beg;
+ while (end < size && data[end - 1] != '\n')
+ end++;
+
+ /* getting working buffers */
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
+ inter = rndr_newbuf(rndr, BUFFER_SPAN);
+
+ /* putting the first line into the working buffer */
+ bufput(work, data + beg, end - beg);
+ beg = end;
+
+ /* process the following lines */
+ while (beg < size) {
+ size_t has_next_uli = 0, has_next_oli = 0;
+
+ end++;
+
+ while (end < size && data[end - 1] != '\n')
+ end++;
+
+ /* process an empty line */
+ if (is_empty(data + beg, end - beg)) {
+ in_empty = 1;
+ beg = end;
+ continue;
+ }
+
+ /* calculating the indentation */
+ i = 0;
+ while (i < 4 && beg + i < end && data[beg + i] == ' ')
+ i++;
+
+ pre = i;
+
+ if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
+ if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
+ in_fence = !in_fence;
+ }
+
+ /* Only check for new list items if we are **not** inside
+ * a fenced code block */
+ if (!in_fence) {
+ has_next_uli = prefix_uli(data + beg + i, end - beg - i);
+ has_next_oli = prefix_oli(data + beg + i, end - beg - i);
+ }
+
+ /* checking for ul/ol switch */
+ if (in_empty && (
+ ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
+ (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
+ *flags |= MKD_LI_END;
+ break; /* the following item must have same list type */
+ }
+
+ /* checking for a new item */
+ if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
+ if (in_empty)
+ has_inside_empty = 1;
+
+ if (pre == orgpre) /* the following item must have */
+ break; /* the same indentation */
+
+ if (!sublist)
+ sublist = work->size;
+ }
+ /* joining only indented stuff after empty lines;
+ * note that now we only require 1 space of indentation
+ * to continue a list */
+ else if (in_empty && pre == 0) {
+ *flags |= MKD_LI_END;
+ break;
+ }
+ else if (in_empty) {
+ bufputc(work, '\n');
+ has_inside_empty = 1;
+ }
+
+ in_empty = 0;
+
+ /* adding the line without prefix into the working buffer */
+ bufput(work, data + beg + i, end - beg - i);
+ beg = end;
+ }
+
+ /* render of li contents */
+ if (has_inside_empty)
+ *flags |= MKD_LI_BLOCK;
+
+ if (*flags & MKD_LI_BLOCK) {
+ /* intermediate render of block li */
+ if (sublist && sublist < work->size) {
+ parse_block(inter, rndr, work->data, sublist);
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
+ }
+ else
+ parse_block(inter, rndr, work->data, work->size);
+ } else {
+ /* intermediate render of inline li */
+ if (sublist && sublist < work->size) {
+ parse_inline(inter, rndr, work->data, sublist);
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
+ }
+ else
+ parse_inline(inter, rndr, work->data, work->size);
+ }
+
+ /* render of li itself */
+ if (rndr->cb.listitem)
+ rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
+
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ return beg;
+}
+
+
+/* parse_list • parsing ordered or unordered list block */
+static size_t
+parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
+{
+ struct buf *work = 0;
+ size_t i = 0, j;
+
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
+
+ while (i < size) {
+ j = parse_listitem(work, rndr, data + i, size - i, &flags);
+ i += j;
+
+ if (!j || (flags & MKD_LI_END))
+ break;
+ }
+
+ if (rndr->cb.list)
+ rndr->cb.list(ob, work, flags, rndr->opaque);
+ rndr_popbuf(rndr, BUFFER_BLOCK);
+ return i;
+}
+
+/* parse_atxheader • parsing of atx-style headers */
+static size_t
+parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
+{
+ size_t level = 0;
+ size_t i, end, skip;
+
+ while (level < size && level < 6 && data[level] == '#')
+ level++;
+
+ for (i = level; i < size && data[i] == ' '; i++);
+
+ for (end = i; end < size && data[end] != '\n'; end++);
+ skip = end;
+
+ while (end && data[end - 1] == '#')
+ end--;
+
+ while (end && data[end - 1] == ' ')
+ end--;
+
+ if (end > i) {
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
+
+ parse_inline(work, rndr, data + i, end - i);
+
+ if (rndr->cb.header)
+ rndr->cb.header(ob, work, (int)level, rndr->opaque);
+
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ }
+
+ return skip;
+}
+
+
+/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
+/* returns the length on match, 0 otherwise */
+static size_t
+htmlblock_end_tag(
+ const char *tag,
+ size_t tag_len,
+ struct sd_markdown *rndr,
+ uint8_t *data,
+ size_t size)
+{
+ size_t i, w;
+
+ /* checking if tag is a match */
+ if (tag_len + 3 >= size ||
+ strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
+ data[tag_len + 2] != '>')
+ return 0;
+
+ /* checking white lines */
+ i = tag_len + 3;
+ w = 0;
+ if (i < size && (w = is_empty(data + i, size - i)) == 0)
+ return 0; /* non-blank after tag */
+ i += w;
+ w = 0;
+
+ if (i < size)
+ w = is_empty(data + i, size - i);
+
+ return i + w;
+}
+
+static size_t
+htmlblock_end(const char *curtag,
+ struct sd_markdown *rndr,
+ uint8_t *data,
+ size_t size,
+ int start_of_line)
+{
+ size_t tag_size = strlen(curtag);
+ size_t i = 1, end_tag;
+ int block_lines = 0;
+
+ while (i < size) {
+ i++;
+ while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
+ if (data[i] == '\n')
+ block_lines++;
+
+ i++;
+ }
+
+ /* If we are only looking for unindented tags, skip the tag
+ * if it doesn't follow a newline.
+ *
+ * The only exception to this is if the tag is still on the
+ * initial line; in that case it still counts as a closing
+ * tag
+ */
+ if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
+ continue;
+
+ if (i + 2 + tag_size >= size)
+ break;
+
+ end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
+ if (end_tag)
+ return i + end_tag - 1;
+ }
+
+ return 0;
+}
+
+
+/* parse_htmlblock • parsing of inline HTML block */
+static size_t
+parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
+{
+ size_t i, j = 0, tag_end;
+ const char *curtag = NULL;
+ struct buf work = { data, 0, 0, 0 };
+
+ /* identification of the opening tag */
+ if (size < 2 || data[0] != '<')
+ return 0;
+
+ i = 1;
+ while (i < size && data[i] != '>' && data[i] != ' ')
+ i++;
+
+ if (i < size)
+ curtag = find_block_tag((char *)data + 1, (int)i - 1);
+
+ /* handling of special cases */
+ if (!curtag) {
+
+ /* HTML comment, laxist form */
+ if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
+ i = 5;
+
+ while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
+ i++;
+
+ i++;
+
+ if (i < size)
+ j = is_empty(data + i, size - i);
+
+ if (j) {
+ work.size = i + j;
+ if (do_render && rndr->cb.blockhtml)
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
+ return work.size;
+ }
+ }
+
+ /* HR, which is the only self-closing block tag considered */
+ if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
+ i = 3;
+ while (i < size && data[i] != '>')
+ i++;
+
+ if (i + 1 < size) {
+ i++;
+ j = is_empty(data + i, size - i);
+ if (j) {
+ work.size = i + j;
+ if (do_render && rndr->cb.blockhtml)
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
+ return work.size;
+ }
+ }
+ }
+
+ /* no special case recognised */
+ return 0;
+ }
+
+ /* looking for an unindented matching closing tag */
+ /* followed by a blank line */
+ tag_end = htmlblock_end(curtag, rndr, data, size, 1);
+
+ /* if not found, trying a second pass looking for indented match */
+ /* but not if tag is "ins" or "del" (following original Markdown.pl) */
+ if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
+ tag_end = htmlblock_end(curtag, rndr, data, size, 0);
+ }
+
+ if (!tag_end)
+ return 0;
+
+ /* the end of the block has been found */
+ work.size = tag_end;
+ if (do_render && rndr->cb.blockhtml)
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
+
+ return tag_end;
+}
+
+static void
+parse_table_row(
+ struct buf *ob,
+ struct sd_markdown *rndr,
+ uint8_t *data,
+ size_t size,
+ size_t columns,
+ int *col_data,
+ int header_flag)
+{
+ size_t i = 0, col;
+ struct buf *row_work = 0;
+
+ if (!rndr->cb.table_cell || !rndr->cb.table_row)
+ return;
+
+ row_work = rndr_newbuf(rndr, BUFFER_SPAN);
+
+ if (i < size && data[i] == '|')
+ i++;
+
+ for (col = 0; col < columns && i < size; ++col) {
+ size_t cell_start, cell_end;
+ struct buf *cell_work;
+
+ cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
+
+ while (i < size && _isspace(data[i]))
+ i++;
+
+ cell_start = i;
+
+ while (i < size && data[i] != '|')
+ i++;
+
+ cell_end = i - 1;
+
+ while (cell_end > cell_start && _isspace(data[cell_end]))
+ cell_end--;
+
+ parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
+ rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
+
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ i++;
+ }
+
+ for (; col < columns; ++col) {
+ struct buf empty_cell = { 0, 0, 0, 0 };
+ rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
+ }
+
+ rndr->cb.table_row(ob, row_work, rndr->opaque);
+
+ rndr_popbuf(rndr, BUFFER_SPAN);
+}
+
+static size_t
+parse_table_header(
+ struct buf *ob,
+ struct sd_markdown *rndr,
+ uint8_t *data,
+ size_t size,
+ size_t *columns,
+ int **column_data)
+{
+ int pipes;
+ size_t i = 0, col, header_end, under_end;
+
+ pipes = 0;
+ while (i < size && data[i] != '\n')
+ if (data[i++] == '|')
+ pipes++;
+
+ if (i == size || pipes == 0)
+ return 0;
+
+ header_end = i;
+
+ while (header_end > 0 && _isspace(data[header_end - 1]))
+ header_end--;
+
+ if (data[0] == '|')
+ pipes--;
+
+ if (header_end && data[header_end - 1] == '|')
+ pipes--;
+
+ *columns = pipes + 1;
+ *column_data = calloc(*columns, sizeof(int));
+
+ /* Parse the header underline */
+ i++;
+ if (i < size && data[i] == '|')
+ i++;
+
+ under_end = i;
+ while (under_end < size && data[under_end] != '\n')
+ under_end++;
+
+ for (col = 0; col < *columns && i < under_end; ++col) {
+ size_t dashes = 0;
+
+ while (i < under_end && data[i] == ' ')
+ i++;
+
+ if (data[i] == ':') {
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
+ dashes++;
+ }
+
+ while (i < under_end && data[i] == '-') {
+ i++; dashes++;
+ }
+
+ if (i < under_end && data[i] == ':') {
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
+ dashes++;
+ }
+
+ while (i < under_end && data[i] == ' ')
+ i++;
+
+ if (i < under_end && data[i] != '|')
+ break;
+
+ if (dashes < 3)
+ break;
+
+ i++;
+ }
+
+ if (col < *columns)
+ return 0;
+
+ parse_table_row(
+ ob, rndr, data,
+ header_end,
+ *columns,
+ *column_data,
+ MKD_TABLE_HEADER
+ );
+
+ return under_end + 1;
+}
+
+static size_t
+parse_table(
+ struct buf *ob,
+ struct sd_markdown *rndr,
+ uint8_t *data,
+ size_t size)
+{
+ size_t i;
+
+ struct buf *header_work = 0;
+ struct buf *body_work = 0;
+
+ size_t columns;
+ int *col_data = NULL;
+
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
+ body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
+
+ i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
+ if (i > 0) {
+
+ while (i < size) {
+ size_t row_start;
+ int pipes = 0;
+
+ row_start = i;
+
+ while (i < size && data[i] != '\n')
+ if (data[i++] == '|')
+ pipes++;
+
+ if (pipes == 0 || i == size) {
+ i = row_start;
+ break;
+ }
+
+ parse_table_row(
+ body_work,
+ rndr,
+ data + row_start,
+ i - row_start,
+ columns,
+ col_data, 0
+ );
+
+ i++;
+ }
+
+ if (rndr->cb.table)
+ rndr->cb.table(ob, header_work, body_work, rndr->opaque);
+ }
+
+ free(col_data);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ rndr_popbuf(rndr, BUFFER_BLOCK);
+ return i;
+}
+
+/* parse_block • parsing of one block, returning next uint8_t to parse */
+static void
+parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
+{
+ size_t beg, end, i;
+ uint8_t *txt_data;
+ beg = 0;
+
+ if (rndr->work_bufs[BUFFER_SPAN].size +
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
+ return;
+
+ while (beg < size) {
+ txt_data = data + beg;
+ end = size - beg;
+
+ if (is_atxheader(rndr, txt_data, end))
+ beg += parse_atxheader(ob, rndr, txt_data, end);
+
+ else if (data[beg] == '<' && rndr->cb.blockhtml &&
+ (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
+ beg += i;
+
+ else if ((i = is_empty(txt_data, end)) != 0)
+ beg += i;
+
+ else if (is_hrule(txt_data, end)) {
+ if (rndr->cb.hrule)
+ rndr->cb.hrule(ob, rndr->opaque);
+
+ while (beg < size && data[beg] != '\n')
+ beg++;
+
+ beg++;
+ }
+
+ else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
+ (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
+ beg += i;
+
+ else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
+ (i = parse_table(ob, rndr, txt_data, end)) != 0)
+ beg += i;
+
+ else if (prefix_quote(txt_data, end))
+ beg += parse_blockquote(ob, rndr, txt_data, end);
+
+ else if (prefix_code(txt_data, end))
+ beg += parse_blockcode(ob, rndr, txt_data, end);
+
+ else if (prefix_uli(txt_data, end))
+ beg += parse_list(ob, rndr, txt_data, end, 0);
+
+ else if (prefix_oli(txt_data, end))
+ beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
+
+ else
+ beg += parse_paragraph(ob, rndr, txt_data, end);
+ }
+}
+
+
+
+/*********************
+ * REFERENCE PARSING *
+ *********************/
+
+/* is_ref • returns whether a line is a reference or not */
+static int
+is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
+{
+/* int n; */
+ size_t i = 0;
+ size_t id_offset, id_end;
+ size_t link_offset, link_end;
+ size_t title_offset, title_end;
+ size_t line_end;
+
+ /* up to 3 optional leading spaces */
+ if (beg + 3 >= end) return 0;
+ if (data[beg] == ' ') { i = 1;
+ if (data[beg + 1] == ' ') { i = 2;
+ if (data[beg + 2] == ' ') { i = 3;
+ if (data[beg + 3] == ' ') return 0; } } }
+ i += beg;
+
+ /* id part: anything but a newline between brackets */
+ if (data[i] != '[') return 0;
+ i++;
+ id_offset = i;
+ while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
+ i++;
+ if (i >= end || data[i] != ']') return 0;
+ id_end = i;
+
+ /* spacer: colon (space | tab)* newline? (space | tab)* */
+ i++;
+ if (i >= end || data[i] != ':') return 0;
+ i++;
+ while (i < end && data[i] == ' ') i++;
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
+ i++;
+ if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
+ while (i < end && data[i] == ' ') i++;
+ if (i >= end) return 0;
+
+ /* link: whitespace-free sequence, optionally between angle brackets */
+ if (data[i] == '<')
+ i++;
+
+ link_offset = i;
+
+ while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
+ i++;
+
+ if (data[i - 1] == '>') link_end = i - 1;
+ else link_end = i;
+
+ /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
+ while (i < end && data[i] == ' ') i++;
+ if (i < end && data[i] != '\n' && data[i] != '\r'
+ && data[i] != '\'' && data[i] != '"' && data[i] != '(')
+ return 0;
+ line_end = 0;
+ /* computing end-of-line */
+ if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
+ line_end = i + 1;
+
+ /* optional (space|tab)* spacer after a newline */
+ if (line_end) {
+ i = line_end + 1;
+ while (i < end && data[i] == ' ') i++; }
+
+ /* optional title: any non-newline sequence enclosed in '"()
+ alone on its line */
+ title_offset = title_end = 0;
+ if (i + 1 < end
+ && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
+ i++;
+ title_offset = i;
+ /* looking for EOL */
+ while (i < end && data[i] != '\n' && data[i] != '\r') i++;
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
+ title_end = i + 1;
+ else title_end = i;
+ /* stepping back */
+ i -= 1;
+ while (i > title_offset && data[i] == ' ')
+ i -= 1;
+ if (i > title_offset
+ && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
+ line_end = title_end;
+ title_end = i; } }
+
+ if (!line_end || link_end == link_offset)
+ return 0; /* garbage after the link empty link */
+
+ /* a valid ref has been found, filling-in return structures */
+ if (last)
+ *last = line_end;
+
+ if (refs) {
+ struct link_ref *ref;
+
+ ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
+ if (!ref)
+ return 0;
+
+ ref->link = bufnew(link_end - link_offset);
+ bufput(ref->link, data + link_offset, link_end - link_offset);
+
+ if (title_end > title_offset) {
+ ref->title = bufnew(title_end - title_offset);
+ bufput(ref->title, data + title_offset, title_end - title_offset);
+ }
+ }
+
+ return 1;
+}
+
+static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
+{
+ size_t i = 0, tab = 0;
+
+ while (i < size) {
+ size_t org = i;
+
+ while (i < size && line[i] != '\t') {
+ i++; tab++;
+ }
+
+ if (i > org)
+ bufput(ob, line + org, i - org);
+
+ if (i >= size)
+ break;
+
+ do {
+ bufputc(ob, ' '); tab++;
+ } while (tab % 4);
+
+ i++;
+ }
+}
+
+/**********************
+ * EXPORTED FUNCTIONS *
+ **********************/
+
+struct sd_markdown *
+sd_markdown_new(
+ unsigned int extensions,
+ size_t max_nesting,
+ const struct sd_callbacks *callbacks,
+ void *opaque)
+{
+ struct sd_markdown *md = NULL;
+
+ assert(max_nesting > 0 && callbacks);
+
+ md = malloc(sizeof(struct sd_markdown));
+ if (!md)
+ return NULL;
+
+ memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
+
+ stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
+ stack_init(&md->work_bufs[BUFFER_SPAN], 8);
+
+ memset(md->active_char, 0x0, 256);
+
+ if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
+ md->active_char['*'] = MD_CHAR_EMPHASIS;
+ md->active_char['_'] = MD_CHAR_EMPHASIS;
+ if (extensions & MKDEXT_STRIKETHROUGH)
+ md->active_char['~'] = MD_CHAR_EMPHASIS;
+ }
+
+ if (md->cb.codespan)
+ md->active_char['`'] = MD_CHAR_CODESPAN;
+
+ if (md->cb.linebreak)
+ md->active_char['\n'] = MD_CHAR_LINEBREAK;
+
+ if (md->cb.image || md->cb.link)
+ md->active_char['['] = MD_CHAR_LINK;
+
+ md->active_char['<'] = MD_CHAR_LANGLE;
+ md->active_char['\\'] = MD_CHAR_ESCAPE;
+ md->active_char['&'] = MD_CHAR_ENTITITY;
+
+ if (extensions & MKDEXT_AUTOLINK) {
+ md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
+ md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
+ md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
+ }
+
+ if (extensions & MKDEXT_SUPERSCRIPT)
+ md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
+
+ /* Extension data */
+ md->ext_flags = extensions;
+ md->opaque = opaque;
+ md->max_nesting = max_nesting;
+ md->in_link_body = 0;
+
+ return md;
+}
+
+void
+sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
+{
+#define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
+ static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
+
+ struct buf *text;
+ size_t beg, end;
+
+ text = bufnew(64);
+ if (!text)
+ return;
+
+ /* Preallocate enough space for our buffer to avoid expanding while copying */
+ bufgrow(text, doc_size);
+
+ /* reset the references table */
+ memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
+
+ /* first pass: looking for references, copying everything else */
+ beg = 0;
+
+ /* Skip a possible UTF-8 BOM, even though the Unicode standard
+ * discourages having these in UTF-8 documents */
+ if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
+ beg += 3;
+
+ while (beg < doc_size) /* iterating over lines */
+ if (is_ref(document, beg, doc_size, &end, md->refs))
+ beg = end;
+ else { /* skipping to the next line */
+ end = beg;
+ while (end < doc_size && document[end] != '\n' && document[end] != '\r')
+ end++;
+
+ /* adding the line body if present */
+ if (end > beg)
+ expand_tabs(text, document + beg, end - beg);
+
+ while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
+ /* add one \n per newline */
+ if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
+ bufputc(text, '\n');
+ end++;
+ }
+
+ beg = end;
+ }
+
+ /* pre-grow the output buffer to minimize allocations */
+ bufgrow(ob, MARKDOWN_GROW(text->size));
+
+ /* second pass: actual rendering */
+ if (md->cb.doc_header)
+ md->cb.doc_header(ob, md->opaque);
+
+ if (text->size) {
+ /* adding a final newline if not already present */
+ if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
+ bufputc(text, '\n');
+
+ parse_block(ob, md, text->data, text->size);
+ }
+
+ if (md->cb.doc_footer)
+ md->cb.doc_footer(ob, md->opaque);
+
+ /* clean-up */
+ bufrelease(text);
+ free_link_refs(md->refs);
+
+ assert(md->work_bufs[BUFFER_SPAN].size == 0);
+ assert(md->work_bufs[BUFFER_BLOCK].size == 0);
+}
+
+void
+sd_markdown_free(struct sd_markdown *md)
+{
+ size_t i;
+
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
+ bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
+
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
+ bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
+
+ stack_free(&md->work_bufs[BUFFER_SPAN]);
+ stack_free(&md->work_bufs[BUFFER_BLOCK]);
+
+ free(md);
+}
+
+void
+sd_version(int *ver_major, int *ver_minor, int *ver_revision)
+{
+ *ver_major = SUNDOWN_VER_MAJOR;
+ *ver_minor = SUNDOWN_VER_MINOR;
+ *ver_revision = SUNDOWN_VER_REVISION;
+}
+
+/* vim: set filetype=c: */