From acd8be51ab169debec751cb3da695c28b6295675 Mon Sep 17 00:00:00 2001 From: Benedikt Heine Date: Fri, 3 Nov 2017 02:02:44 +0100 Subject: [PATCH] Remove a and img tags from msg While the notification spec allows tags like ... and ..., pango cannot parse these tags and therefore these tags should be removed before passed to pango. Also the method notification_extract_markup_urls is not needed anymore, as markup_strip_a can return URLs optionally. This implies, that URL replacement is now indicated via show_indicators for URLs and the dmenu string is in the format of '[text between a tags] URL\n'. This is similarly handled for images, too. --- src/markup.c | 176 +++++++++++++++++++++++++++++++++++++++++++++ src/markup.h | 4 ++ src/notification.c | 65 +++++------------ src/notification.h | 2 +- test/markup.c | 96 +++++++++++++++++++++++++ 5 files changed, 295 insertions(+), 48 deletions(-) diff --git a/src/markup.c b/src/markup.c index cd91ff8..ad4484f 100644 --- a/src/markup.c +++ b/src/markup.c @@ -4,6 +4,8 @@ #include #include +#include +#include #include "settings.h" #include "utils.h" @@ -44,6 +46,178 @@ static char *markup_br2nl(char *str) return str; } +/* + * Remove HTML hyperlinks of a string. + * + * @str: The string to replace a tags + * @urls: (nullable): If any href-attributes found, an '\n' concatenated + * string of the URLs in format '[] ' + */ +void markup_strip_a(char **str, char **urls) +{ + char *tag1 = NULL; + + if (urls) + *urls = NULL; + + while ((tag1 = strstr(*str, ""); + char *tag2 = strstr(tag1, ""); + + // the tag is broken, ignore it + if (!tag1_end) { + fprintf(stderr, + "WARNING: Given link is broken: '%s'\n", + tag1); + string_replace_at(*str, tag1-*str, strlen(tag1), ""); + break; + } + if (tag2 && tag2 < tag1_end) { + int repl_len = (tag2 - tag1) + strlen(""); + fprintf(stderr, + "WARNING: Given link is broken: '%.*s.'\n", + repl_len, tag1); + string_replace_at(*str, tag1-*str, repl_len, ""); + break; + } + + // search contents of href attribute + char *plain_url = NULL; + if (href && href < tag1_end) { + + // shift href to the actual begin of the value + href = href+6; + + const char *quote = strstr(href, "\""); + + if (quote && quote < tag1_end) { + plain_url = g_strndup(href, quote-href); + } + } + + // text between a tags + int text_len; + if (tag2) + text_len = tag2 - (tag1_end+1); + else + text_len = strlen(tag1_end+1); + + char *text = g_strndup(tag1_end+1, text_len); + + int repl_len = text_len + (tag1_end-tag1) + 1; + repl_len += tag2 ? strlen("") : 0; + + *str = string_replace_at(*str, tag1-*str, repl_len, text); + + // if there had been a href attribute, + // add it to the URLs + if (plain_url && urls) { + text = string_replace_all("]", "", text); + text = string_replace_all("[", "", text); + + char *url = g_strdup_printf("[%s] %s", text, plain_url); + + *urls = string_append(*urls, url, "\n"); + g_free(url); + } + + g_free(plain_url); + g_free(text); + } +} + +/* + * Remove img-tags of a string. If alt attribute given, use this as replacement. + * + * @str: The string to replace img tags + * @urls: (nullable): If any src-attributes found, an '\n' concatenated string of + * the URLs in format '[] ' + */ +void markup_strip_img(char **str, char **urls) +{ + const char *start = *str; + + if (urls) + *urls = NULL; + + while ((start = strstr(*str, ""); + + // the tag is broken, ignore it + if (!end) { + fprintf(stderr, "WARNING: Given image is broken: '%s'\n", start); + string_replace_at(*str, start-*str, strlen(start), ""); + break; + } + + // use attribute=" as stated in the notification spec + const char *alt_s = strstr(start, "alt=\""); + const char *src_s = strstr(start, "src=\""); + + char *text_alt = NULL; + char *text_src = NULL; + + const char *src_e = NULL, *alt_e = NULL; + if (alt_s) + alt_e = strstr(alt_s + strlen("alt=\""), "\""); + if (src_s) + src_e = strstr(src_s + strlen("src=\""), "\""); + + // Move pointer to the actual start + alt_s = alt_s ? alt_s + strlen("alt=\"") : NULL; + src_s = src_s ? src_s + strlen("src=\"") : NULL; + + /* check if alt and src attribute are given + * If both given, check the alignment of all pointers */ + if ( alt_s && alt_e + && src_s && src_e + && ( (alt_s < src_s && alt_e < src_s-strlen("src=\"") && src_e < end) + ||(src_s < alt_s && src_e < alt_s-strlen("alt=\"") && alt_e < end)) ) { + + text_alt = g_strndup(alt_s, alt_e-alt_s); + text_src = g_strndup(src_s, src_e-src_s); + + /* check if single valid alt attribute is available */ + } else if (alt_s && alt_e && alt_e < end && (!src_s || src_s < alt_s || alt_e < src_s - strlen("src=\""))) { + text_alt = g_strndup(alt_s, alt_e-alt_s); + + /* check if single valid src attribute is available */ + } else if (src_s && src_e && src_e < end && (!alt_s || alt_s < src_s || src_e < alt_s - strlen("alt=\""))) { + text_src = g_strndup(src_s, src_e-src_s); + + } else { + fprintf(stderr, + "WARNING: Given image argument is broken: '%.*s'\n", + (int)(end-start), start); + } + + // replacement text for alt + int repl_len = end - start + 1; + + if (!text_alt) + text_alt = g_strdup("[image]"); + + *str = string_replace_at(*str, start-*str, repl_len, text_alt); + + // if there had been a href attribute, + // add it to the URLs + if (text_src && urls) { + text_alt = string_replace_all("]", "", text_alt); + text_alt = string_replace_all("[", "", text_alt); + + char *url = g_strdup_printf("[%s] %s", text_alt, text_src); + + *urls = string_append(*urls, url, "\n"); + g_free(url); + } + + g_free(text_src); + g_free(text_alt); + } +} + /* * Strip any markup from text; turn it in to plain text. * @@ -96,6 +270,8 @@ char *markup_transform(char *str, enum markup_mode markup_mode) break; case MARKUP_FULL: str = markup_br2nl(str); + markup_strip_a(&str, NULL); + markup_strip_img(&str, NULL); break; } diff --git a/src/markup.h b/src/markup.h index 8304e2d..9d5cda7 100644 --- a/src/markup.h +++ b/src/markup.h @@ -5,6 +5,10 @@ #include "settings.h" char *markup_strip(char *str); + +void markup_strip_a(char **str, char **urls); +void markup_strip_img(char **str, char **urls); + char *markup_transform(char *str, enum markup_mode markup_mode); #endif diff --git a/src/notification.c b/src/notification.c index 7981a5d..7ae2254 100644 --- a/src/notification.c +++ b/src/notification.c @@ -252,46 +252,6 @@ void notification_replace_single_field(char **haystack, g_free(input); } -char *notification_extract_markup_urls(char **str_ptr) -{ - char *start, *end, *replace_buf, *str, *urls = NULL, *url, *index_buf; - int linkno = 1; - - str = *str_ptr; - while ((start = strstr(str, ""); - if (end != NULL) { - replace_buf = g_strndup(start, end - start + 1); - url = extract_urls(replace_buf); - if (url != NULL) { - str = string_replace(replace_buf, "[", str); - - index_buf = g_strdup_printf("[#%d]", linkno++); - if (urls == NULL) { - urls = g_strconcat(index_buf, " ", url, NULL); - } else { - char *tmp = urls; - urls = g_strconcat(tmp, "\n", index_buf, " ", url, NULL); - g_free(tmp); - } - - index_buf[0] = ' '; - str = string_replace("", index_buf, str); - g_free(index_buf); - g_free(url); - } else { - str = string_replace(replace_buf, "", str); - str = string_replace("", "", str); - } - g_free(replace_buf); - } else { - break; - } - } - *str_ptr = str; - return urls; -} - /* * Create notification struct and initialise all fields with either * - the default (if it's not needed to be freed later) @@ -479,15 +439,26 @@ static void notification_format_message(notification *n) static void notification_extract_urls(notification *n) { - // DO markup urls processing here until we split this out correctly - n->urls = notification_extract_markup_urls(&(n->body)); + g_clear_pointer(&n->urls, g_free); - char *tmp = g_strconcat(n->summary, " ", n->body, NULL); + char *urls_in = string_append(g_strdup(n->summary), n->body, " "); - char *tmp_urls = extract_urls(tmp); - n->urls = string_append(n->urls, tmp_urls, "\n"); - g_free(tmp_urls); - g_free(tmp); + char *urls_a = NULL; + char *urls_img = NULL; + markup_strip_a(&urls_in, &urls_a); + markup_strip_img(&urls_in, &urls_img); + // remove links and images first to not confuse + // plain urls extraction + char *urls_text = extract_urls(urls_in); + + n->urls = string_append(n->urls, urls_a, "\n"); + n->urls = string_append(n->urls, urls_img, "\n"); + n->urls = string_append(n->urls, urls_text, "\n"); + + g_free(urls_in); + g_free(urls_a); + g_free(urls_img); + g_free(urls_text); } static void notification_dmenu_string(notification *n) diff --git a/src/notification.h b/src/notification.h index bffeec1..c83077a 100644 --- a/src/notification.h +++ b/src/notification.h @@ -72,7 +72,7 @@ typedef struct _notification { /* derived fields */ char *msg; /* formatted message */ char *text_to_render; /* formatted message (with age and action indicators) */ - char *urls; /* urllist */ + char *urls; /* urllist delimited by '\n' */ } notification; notification *notification_create(void); diff --git a/test/markup.c b/test/markup.c index cbd8bad..76f2b91 100644 --- a/test/markup.c +++ b/test/markup.c @@ -45,12 +45,108 @@ TEST test_markup_transform(void) ASSERT_STR_EQ("foo bar baz", (ptr=markup_transform(g_strdup("foo
bar\nbaz"), MARKUP_FULL))); g_free(ptr); + // Test replacement of img and a tags, not renderable by pango + ASSERT_STR_EQ("foo bar bar baz", (ptr=markup_transform(g_strdup("\"foo
bar\nbaz"), MARKUP_FULL))); + g_free(ptr); + ASSERT_STR_EQ("test ", (ptr=markup_transform(g_strdup("test \"foo image"), MARKUP_FULL))); + g_free(ptr); + ASSERT_STR_EQ("bar baz", (ptr=markup_transform(g_strdup("bar baz"), MARKUP_FULL))); + g_free(ptr); + + PASS(); +} + +TEST helper_markup_strip_a (const char *in, const char *exp, const char *urls) +{ + // out_urls is a return parameter and the content should be ignored + char *out_urls = (char *)0x04; //Chosen by a fair dice roll + char *out = g_strdup(in); + char *msg = g_strconcat("url: ", in, NULL); + + markup_strip_a(&out, &out_urls); + + ASSERT_STR_EQm(msg, exp, out); + + if (urls) { + ASSERT_STR_EQm(msg, urls, out_urls); + } else { + ASSERT_EQm(msg, urls, out_urls); + } + + g_free(out_urls); + g_free(out); + g_free(msg); + + PASS(); +} + +TEST test_markup_strip_a(void) +{ + RUN_TESTp(helper_markup_strip_a, "valid link", "valid link", "[valid] https://url.com"); + RUN_TESTp(helper_markup_strip_a, "valid link", "valid link", "[valid] "); + RUN_TESTp(helper_markup_strip_a, "valid link", "valid link", NULL); + RUN_TESTp(helper_markup_strip_a, "valid link", "valid link", "[valid link] https://url.com"); + + RUN_TESTp(helper_markup_strip_a, " link", " link", NULL); + RUN_TESTp(helper_markup_strip_a, " link", " link", NULL); + + PASS(); +} + +TEST helper_markup_strip_img (const char *in, const char *exp, const char *urls) +{ + // out_urls is a return parameter and the content should be ignored + char *out_urls = (char *)0x04; //Chosen by a fair dice roll + char *out = g_strdup(in); + char *msg = g_strconcat("url: ", in, NULL); + + markup_strip_img(&out, &out_urls); + + ASSERT_STR_EQm(msg, exp, out); + + if (urls) { + ASSERT_STR_EQm(msg, urls, out_urls); + } else { + ASSERT_EQm(msg, urls, out_urls); + } + + g_free(out_urls); + g_free(out); + g_free(msg); + + PASS(); +} + +TEST test_markup_strip_img(void) +{ + RUN_TESTp(helper_markup_strip_img, "v img", "v [image] img", NULL); + RUN_TESTp(helper_markup_strip_img, "v \"valid\" img", "v valid img", NULL); + RUN_TESTp(helper_markup_strip_img, "v img", "v [image] img", "[image] url.com"); + + RUN_TESTp(helper_markup_strip_img, "v \"valid\" img", "v valid img", "[valid] url.com"); + RUN_TESTp(helper_markup_strip_img, "v \"valid\" img", "v valid img", "[valid] url.com"); + RUN_TESTp(helper_markup_strip_img, "v \"valid\" img", "v valid img", "[valid] url.com"); + + RUN_TESTp(helper_markup_strip_img, "i \"invalid img", "i [image] img", "[image] https://url.com"); + RUN_TESTp(helper_markup_strip_img, "i \"broken\" img", "i broken img", NULL); + RUN_TESTp(helper_markup_strip_img, "i \"invalid img", "i [image] img", NULL); + + RUN_TESTp(helper_markup_strip_img, "i \"broken\" img", "i broken img", NULL); + RUN_TESTp(helper_markup_strip_img, "i \"invalid img", "i [image] img", "[image] url.com"); + RUN_TESTp(helper_markup_strip_img, "i \"invalid img", "i [image] img", NULL); + + RUN_TESTp(helper_markup_strip_img, "i