diff --git a/src/markup.c b/src/markup.c index 7affc34..dba630f 100644 --- a/src/markup.c +++ b/src/markup.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "log.h" @@ -242,6 +243,80 @@ char *markup_strip(char *str) return str; } +/** + * Determine if an & character pointed to by \p str is a markup & entity or + * part of the text + * + * @return true if it's an entity otherwise false + */ +static bool markup_is_entity(const char *str) +{ + assert(str); + assert(*str == '&'); + + char *end = strchr(str, ';'); + if (!end) + return false; + + // Parse (hexa)decimal entities with the format Ӓ or ઼ + if (str[1] == '#') { + const char *cur = str + 2; + + if (*cur == 'x') { + cur++; + + // Reject &#x; + if (*cur == ';') + return false; + + while (isxdigit(*cur) && cur < end) + cur++; + } else { + + // Reject &#; + if (*cur == ';') + return false; + + while (isdigit(*cur) && cur < end) + cur++; + } + + return (cur == end); + } else { + const char *supported_tags[] = {"&", "<", ">", """, "'"}; + for (int i = 0; i < sizeof(supported_tags)/sizeof(*supported_tags); i++) { + if (g_str_has_prefix(str, supported_tags[i])) + return true; + } + return false; + } +} + +/** + * Escape all unsupported and invalid &-entities in a string. If the resulting + * string does not fit it will be reallocated. + * + * @param str The string to be transformed + */ +static char *markup_escape_unsupported(char *str) +{ + if (!str) + return NULL; + + char *match = str; + while ((match = strchr(match, '&'))) { + if (!markup_is_entity(match)) { + int pos = match - str; + str = string_replace_at(str, pos, 1, "&"); + match = str + pos + strlen("&"); + } else { + match++; + } + } + + return str; +} + /* * Transform the string in accordance with `markup_mode` and * `settings.ignore_newline` @@ -265,6 +340,7 @@ char *markup_transform(char *str, enum markup_mode markup_mode) str = markup_quote(str); break; case MARKUP_FULL: + str = markup_escape_unsupported(str); str = markup_br2nl(str); markup_strip_a(&str, NULL); markup_strip_img(&str, NULL); diff --git a/test/markup.c b/test/markup.c index 76f2b91..b00888d 100644 --- a/test/markup.c +++ b/test/markup.c @@ -55,6 +55,17 @@ TEST test_markup_transform(void) ASSERT_STR_EQ("bar baz", (ptr=markup_transform(g_strdup("bar baz"), MARKUP_FULL))); g_free(ptr); + ASSERT_STR_EQ("Ψ", (ptr=markup_transform(g_strdup("Ψ"), MARKUP_FULL))); + free(ptr); + ASSERT_STR_EQ("Ψ Ψ", (ptr=markup_transform(g_strdup("Ψ Ψ"), MARKUP_FULL))); + free(ptr); + ASSERT_STR_EQ("> <", (ptr=markup_transform(g_strdup("> <"), MARKUP_FULL))); + free(ptr); + ASSERT_STR_EQ("&invalid; &#abc; &#xG;", (ptr=markup_transform(g_strdup("&invalid; &#abc; &#xG;"), MARKUP_FULL))); + free(ptr); + ASSERT_STR_EQ("&; &#; &#x;", (ptr=markup_transform(g_strdup("&; &#; &#x;"), MARKUP_FULL))); + free(ptr); + PASS(); }