diff options
Diffstat (limited to 'src/buf_text.c')
-rw-r--r-- | src/buf_text.c | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/src/buf_text.c b/src/buf_text.c new file mode 100644 index 000000000..443454b5f --- /dev/null +++ b/src/buf_text.c @@ -0,0 +1,291 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "buf_text.h" + +int git_buf_text_puts_escaped( + git_buf *buf, + const char *string, + const char *esc_chars, + const char *esc_with) +{ + const char *scan; + size_t total = 0, esc_len = strlen(esc_with), count; + + if (!string) + return 0; + + for (scan = string; *scan; ) { + /* count run of non-escaped characters */ + count = strcspn(scan, esc_chars); + total += count; + scan += count; + /* count run of escaped characters */ + count = strspn(scan, esc_chars); + total += count * (esc_len + 1); + scan += count; + } + + if (git_buf_grow(buf, buf->size + total + 1) < 0) + return -1; + + for (scan = string; *scan; ) { + count = strcspn(scan, esc_chars); + + memmove(buf->ptr + buf->size, scan, count); + scan += count; + buf->size += count; + + for (count = strspn(scan, esc_chars); count > 0; --count) { + /* copy escape sequence */ + memmove(buf->ptr + buf->size, esc_with, esc_len); + buf->size += esc_len; + /* copy character to be escaped */ + buf->ptr[buf->size] = *scan; + buf->size++; + scan++; + } + } + + buf->ptr[buf->size] = '\0'; + + return 0; +} + +void git_buf_text_unescape(git_buf *buf) +{ + buf->size = git__unescape(buf->ptr); +} + +int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) +{ + const char *scan = src->ptr; + const char *scan_end = src->ptr + src->size; + const char *next = memchr(scan, '\r', src->size); + char *out; + + assert(tgt != src); + + if (!next) + return GIT_ENOTFOUND; + + /* reduce reallocs while in the loop */ + if (git_buf_grow(tgt, src->size) < 0) + return -1; + out = tgt->ptr; + tgt->size = 0; + + /* Find the next \r and copy whole chunk up to there to tgt */ + for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) { + if (next > scan) { + size_t copylen = next - scan; + memcpy(out, scan, copylen); + out += copylen; + } + + /* Do not drop \r unless it is followed by \n */ + if (next[1] != '\n') + *out++ = '\r'; + } + + /* Copy remaining input into dest */ + memcpy(out, scan, scan_end - scan + 1); /* +1 for NUL byte */ + out += (scan_end - scan); + tgt->size = out - tgt->ptr; + + return 0; +} + +int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src) +{ + const char *start = src->ptr; + const char *end = start + src->size; + const char *scan = start; + const char *next = memchr(scan, '\n', src->size); + + assert(tgt != src); + + if (!next) + return GIT_ENOTFOUND; + + /* attempt to reduce reallocs while in the loop */ + if (git_buf_grow(tgt, src->size + (src->size >> 4) + 1) < 0) + return -1; + tgt->size = 0; + + for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) { + size_t copylen = next - scan; + /* don't convert existing \r\n to \r\r\n */ + size_t extralen = (next > start && next[-1] == '\r') ? 1 : 2; + size_t needsize = tgt->size + copylen + extralen + 1; + + if (tgt->asize < needsize && git_buf_grow(tgt, needsize) < 0) + return -1; + + if (next > scan) { + memcpy(tgt->ptr + tgt->size, scan, copylen); + tgt->size += copylen; + } + if (extralen == 2) + tgt->ptr[tgt->size++] = '\r'; + tgt->ptr[tgt->size++] = '\n'; + } + + return git_buf_put(tgt, scan, end - scan); +} + +int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings) +{ + size_t i; + const char *str, *pfx; + + git_buf_clear(buf); + + if (!strings || !strings->count) + return 0; + + /* initialize common prefix to first string */ + if (git_buf_sets(buf, strings->strings[0]) < 0) + return -1; + + /* go through the rest of the strings, truncating to shared prefix */ + for (i = 1; i < strings->count; ++i) { + + for (str = strings->strings[i], pfx = buf->ptr; + *str && *str == *pfx; str++, pfx++) + /* scanning */; + + git_buf_truncate(buf, pfx - buf->ptr); + + if (!buf->size) + break; + } + + return 0; +} + +bool git_buf_text_is_binary(const git_buf *buf) +{ + const char *scan = buf->ptr, *end = buf->ptr + buf->size; + int printable = 0, nonprintable = 0; + + while (scan < end) { + unsigned char c = *scan++; + + if (c > 0x1F && c < 0x7F) + printable++; + else if (c == '\0') + return true; + else if (!git__isspace(c)) + nonprintable++; + } + + return ((printable >> 7) < nonprintable); +} + +bool git_buf_text_contains_nul(const git_buf *buf) +{ + return (memchr(buf->ptr, '\0', buf->size) != NULL); +} + +int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset) +{ + const char *ptr; + size_t len; + + *bom = GIT_BOM_NONE; + /* need at least 2 bytes after offset to look for any BOM */ + if (buf->size < offset + 2) + return 0; + + ptr = buf->ptr + offset; + len = buf->size - offset; + + switch (*ptr++) { + case 0: + if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') { + *bom = GIT_BOM_UTF32_BE; + return 4; + } + break; + case '\xEF': + if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') { + *bom = GIT_BOM_UTF8; + return 3; + } + break; + case '\xFE': + if (*ptr == '\xFF') { + *bom = GIT_BOM_UTF16_BE; + return 2; + } + break; + case '\xFF': + if (*ptr != '\xFE') + break; + if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) { + *bom = GIT_BOM_UTF32_LE; + return 4; + } else { + *bom = GIT_BOM_UTF16_LE; + return 2; + } + break; + default: + break; + } + + return 0; +} + +bool git_buf_text_gather_stats( + git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) +{ + const char *scan = buf->ptr, *end = buf->ptr + buf->size; + int skip; + + memset(stats, 0, sizeof(*stats)); + + /* BOM detection */ + skip = git_buf_text_detect_bom(&stats->bom, buf, 0); + if (skip_bom) + scan += skip; + + /* Ignore EOF character */ + if (buf->size > 0 && end[-1] == '\032') + end--; + + /* Counting loop */ + while (scan < end) { + unsigned char c = *scan++; + + if ((c > 0x1F && c < 0x7F) || c > 0x9f) + stats->printable++; + else switch (c) { + case '\0': + stats->nul++; + stats->nonprintable++; + break; + case '\n': + stats->lf++; + break; + case '\r': + stats->cr++; + if (scan < end && *scan == '\n') + stats->crlf++; + break; + case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ + stats->printable++; + break; + default: + stats->nonprintable++; + break; + } + } + + return (stats->nul > 0 || + ((stats->printable >> 7) < stats->nonprintable)); +} |