Remove a and img tags from msg
While the notification spec allows tags like <a href="...">...</a> and <img src="..." alt="...">, pango cannot parse these tags and therefore these tags should be removed before passed to pango. Also the method notification_extract_markup_urls is not needed anymore, as markup_strip_a can return URLs optionally. This implies, that URL replacement is now indicated via show_indicators for URLs and the dmenu string is in the format of '[text between a tags] URL\n'. This is similarly handled for images, too.
This commit is contained in:
parent
4bfae81f18
commit
acd8be51ab
176
src/markup.c
176
src/markup.c
@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "settings.h"
|
#include "settings.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
@ -44,6 +46,178 @@ static char *markup_br2nl(char *str)
|
|||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove HTML hyperlinks of a string.
|
||||||
|
*
|
||||||
|
* @str: The string to replace a tags
|
||||||
|
* @urls: (nullable): If any href-attributes found, an '\n' concatenated
|
||||||
|
* string of the URLs in format '[<text between tags>] <href>'
|
||||||
|
*/
|
||||||
|
void markup_strip_a(char **str, char **urls)
|
||||||
|
{
|
||||||
|
char *tag1 = NULL;
|
||||||
|
|
||||||
|
if (urls)
|
||||||
|
*urls = NULL;
|
||||||
|
|
||||||
|
while ((tag1 = strstr(*str, "<a"))) {
|
||||||
|
// use href=" as stated in the notification spec
|
||||||
|
char *href = strstr(tag1, "href=\"");
|
||||||
|
char *tag1_end = strstr(tag1, ">");
|
||||||
|
char *tag2 = strstr(tag1, "</a>");
|
||||||
|
|
||||||
|
// the tag is broken, ignore it
|
||||||
|
if (!tag1_end) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Given link is broken: '%s'\n",
|
||||||
|
tag1);
|
||||||
|
string_replace_at(*str, tag1-*str, strlen(tag1), "");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (tag2 && tag2 < tag1_end) {
|
||||||
|
int repl_len = (tag2 - tag1) + strlen("</a>");
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Given link is broken: '%.*s.'\n",
|
||||||
|
repl_len, tag1);
|
||||||
|
string_replace_at(*str, tag1-*str, repl_len, "");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// search contents of href attribute
|
||||||
|
char *plain_url = NULL;
|
||||||
|
if (href && href < tag1_end) {
|
||||||
|
|
||||||
|
// shift href to the actual begin of the value
|
||||||
|
href = href+6;
|
||||||
|
|
||||||
|
const char *quote = strstr(href, "\"");
|
||||||
|
|
||||||
|
if (quote && quote < tag1_end) {
|
||||||
|
plain_url = g_strndup(href, quote-href);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// text between a tags
|
||||||
|
int text_len;
|
||||||
|
if (tag2)
|
||||||
|
text_len = tag2 - (tag1_end+1);
|
||||||
|
else
|
||||||
|
text_len = strlen(tag1_end+1);
|
||||||
|
|
||||||
|
char *text = g_strndup(tag1_end+1, text_len);
|
||||||
|
|
||||||
|
int repl_len = text_len + (tag1_end-tag1) + 1;
|
||||||
|
repl_len += tag2 ? strlen("</a>") : 0;
|
||||||
|
|
||||||
|
*str = string_replace_at(*str, tag1-*str, repl_len, text);
|
||||||
|
|
||||||
|
// if there had been a href attribute,
|
||||||
|
// add it to the URLs
|
||||||
|
if (plain_url && urls) {
|
||||||
|
text = string_replace_all("]", "", text);
|
||||||
|
text = string_replace_all("[", "", text);
|
||||||
|
|
||||||
|
char *url = g_strdup_printf("[%s] %s", text, plain_url);
|
||||||
|
|
||||||
|
*urls = string_append(*urls, url, "\n");
|
||||||
|
g_free(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free(plain_url);
|
||||||
|
g_free(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove img-tags of a string. If alt attribute given, use this as replacement.
|
||||||
|
*
|
||||||
|
* @str: The string to replace img tags
|
||||||
|
* @urls: (nullable): If any src-attributes found, an '\n' concatenated string of
|
||||||
|
* the URLs in format '[<alt>] <src>'
|
||||||
|
*/
|
||||||
|
void markup_strip_img(char **str, char **urls)
|
||||||
|
{
|
||||||
|
const char *start = *str;
|
||||||
|
|
||||||
|
if (urls)
|
||||||
|
*urls = NULL;
|
||||||
|
|
||||||
|
while ((start = strstr(*str, "<img"))) {
|
||||||
|
const char *end = strstr(start, ">");
|
||||||
|
|
||||||
|
// the tag is broken, ignore it
|
||||||
|
if (!end) {
|
||||||
|
fprintf(stderr, "WARNING: Given image is broken: '%s'\n", start);
|
||||||
|
string_replace_at(*str, start-*str, strlen(start), "");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// use attribute=" as stated in the notification spec
|
||||||
|
const char *alt_s = strstr(start, "alt=\"");
|
||||||
|
const char *src_s = strstr(start, "src=\"");
|
||||||
|
|
||||||
|
char *text_alt = NULL;
|
||||||
|
char *text_src = NULL;
|
||||||
|
|
||||||
|
const char *src_e = NULL, *alt_e = NULL;
|
||||||
|
if (alt_s)
|
||||||
|
alt_e = strstr(alt_s + strlen("alt=\""), "\"");
|
||||||
|
if (src_s)
|
||||||
|
src_e = strstr(src_s + strlen("src=\""), "\"");
|
||||||
|
|
||||||
|
// Move pointer to the actual start
|
||||||
|
alt_s = alt_s ? alt_s + strlen("alt=\"") : NULL;
|
||||||
|
src_s = src_s ? src_s + strlen("src=\"") : NULL;
|
||||||
|
|
||||||
|
/* check if alt and src attribute are given
|
||||||
|
* If both given, check the alignment of all pointers */
|
||||||
|
if ( alt_s && alt_e
|
||||||
|
&& src_s && src_e
|
||||||
|
&& ( (alt_s < src_s && alt_e < src_s-strlen("src=\"") && src_e < end)
|
||||||
|
||(src_s < alt_s && src_e < alt_s-strlen("alt=\"") && alt_e < end)) ) {
|
||||||
|
|
||||||
|
text_alt = g_strndup(alt_s, alt_e-alt_s);
|
||||||
|
text_src = g_strndup(src_s, src_e-src_s);
|
||||||
|
|
||||||
|
/* check if single valid alt attribute is available */
|
||||||
|
} else if (alt_s && alt_e && alt_e < end && (!src_s || src_s < alt_s || alt_e < src_s - strlen("src=\""))) {
|
||||||
|
text_alt = g_strndup(alt_s, alt_e-alt_s);
|
||||||
|
|
||||||
|
/* check if single valid src attribute is available */
|
||||||
|
} else if (src_s && src_e && src_e < end && (!alt_s || alt_s < src_s || src_e < alt_s - strlen("alt=\""))) {
|
||||||
|
text_src = g_strndup(src_s, src_e-src_s);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Given image argument is broken: '%.*s'\n",
|
||||||
|
(int)(end-start), start);
|
||||||
|
}
|
||||||
|
|
||||||
|
// replacement text for alt
|
||||||
|
int repl_len = end - start + 1;
|
||||||
|
|
||||||
|
if (!text_alt)
|
||||||
|
text_alt = g_strdup("[image]");
|
||||||
|
|
||||||
|
*str = string_replace_at(*str, start-*str, repl_len, text_alt);
|
||||||
|
|
||||||
|
// if there had been a href attribute,
|
||||||
|
// add it to the URLs
|
||||||
|
if (text_src && urls) {
|
||||||
|
text_alt = string_replace_all("]", "", text_alt);
|
||||||
|
text_alt = string_replace_all("[", "", text_alt);
|
||||||
|
|
||||||
|
char *url = g_strdup_printf("[%s] %s", text_alt, text_src);
|
||||||
|
|
||||||
|
*urls = string_append(*urls, url, "\n");
|
||||||
|
g_free(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free(text_src);
|
||||||
|
g_free(text_alt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Strip any markup from text; turn it in to plain text.
|
* Strip any markup from text; turn it in to plain text.
|
||||||
*
|
*
|
||||||
@ -96,6 +270,8 @@ char *markup_transform(char *str, enum markup_mode markup_mode)
|
|||||||
break;
|
break;
|
||||||
case MARKUP_FULL:
|
case MARKUP_FULL:
|
||||||
str = markup_br2nl(str);
|
str = markup_br2nl(str);
|
||||||
|
markup_strip_a(&str, NULL);
|
||||||
|
markup_strip_img(&str, NULL);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,10 @@
|
|||||||
#include "settings.h"
|
#include "settings.h"
|
||||||
|
|
||||||
char *markup_strip(char *str);
|
char *markup_strip(char *str);
|
||||||
|
|
||||||
|
void markup_strip_a(char **str, char **urls);
|
||||||
|
void markup_strip_img(char **str, char **urls);
|
||||||
|
|
||||||
char *markup_transform(char *str, enum markup_mode markup_mode);
|
char *markup_transform(char *str, enum markup_mode markup_mode);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -252,46 +252,6 @@ void notification_replace_single_field(char **haystack,
|
|||||||
g_free(input);
|
g_free(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
char *notification_extract_markup_urls(char **str_ptr)
|
|
||||||
{
|
|
||||||
char *start, *end, *replace_buf, *str, *urls = NULL, *url, *index_buf;
|
|
||||||
int linkno = 1;
|
|
||||||
|
|
||||||
str = *str_ptr;
|
|
||||||
while ((start = strstr(str, "<a href")) != NULL) {
|
|
||||||
end = strstr(start, ">");
|
|
||||||
if (end != NULL) {
|
|
||||||
replace_buf = g_strndup(start, end - start + 1);
|
|
||||||
url = extract_urls(replace_buf);
|
|
||||||
if (url != NULL) {
|
|
||||||
str = string_replace(replace_buf, "[", str);
|
|
||||||
|
|
||||||
index_buf = g_strdup_printf("[#%d]", linkno++);
|
|
||||||
if (urls == NULL) {
|
|
||||||
urls = g_strconcat(index_buf, " ", url, NULL);
|
|
||||||
} else {
|
|
||||||
char *tmp = urls;
|
|
||||||
urls = g_strconcat(tmp, "\n", index_buf, " ", url, NULL);
|
|
||||||
g_free(tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
index_buf[0] = ' ';
|
|
||||||
str = string_replace("</a>", index_buf, str);
|
|
||||||
g_free(index_buf);
|
|
||||||
g_free(url);
|
|
||||||
} else {
|
|
||||||
str = string_replace(replace_buf, "", str);
|
|
||||||
str = string_replace("</a>", "", str);
|
|
||||||
}
|
|
||||||
g_free(replace_buf);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*str_ptr = str;
|
|
||||||
return urls;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create notification struct and initialise all fields with either
|
* Create notification struct and initialise all fields with either
|
||||||
* - the default (if it's not needed to be freed later)
|
* - the default (if it's not needed to be freed later)
|
||||||
@ -479,15 +439,26 @@ static void notification_format_message(notification *n)
|
|||||||
|
|
||||||
static void notification_extract_urls(notification *n)
|
static void notification_extract_urls(notification *n)
|
||||||
{
|
{
|
||||||
// DO markup urls processing here until we split this out correctly
|
g_clear_pointer(&n->urls, g_free);
|
||||||
n->urls = notification_extract_markup_urls(&(n->body));
|
|
||||||
|
|
||||||
char *tmp = g_strconcat(n->summary, " ", n->body, NULL);
|
char *urls_in = string_append(g_strdup(n->summary), n->body, " ");
|
||||||
|
|
||||||
char *tmp_urls = extract_urls(tmp);
|
char *urls_a = NULL;
|
||||||
n->urls = string_append(n->urls, tmp_urls, "\n");
|
char *urls_img = NULL;
|
||||||
g_free(tmp_urls);
|
markup_strip_a(&urls_in, &urls_a);
|
||||||
g_free(tmp);
|
markup_strip_img(&urls_in, &urls_img);
|
||||||
|
// remove links and images first to not confuse
|
||||||
|
// plain urls extraction
|
||||||
|
char *urls_text = extract_urls(urls_in);
|
||||||
|
|
||||||
|
n->urls = string_append(n->urls, urls_a, "\n");
|
||||||
|
n->urls = string_append(n->urls, urls_img, "\n");
|
||||||
|
n->urls = string_append(n->urls, urls_text, "\n");
|
||||||
|
|
||||||
|
g_free(urls_in);
|
||||||
|
g_free(urls_a);
|
||||||
|
g_free(urls_img);
|
||||||
|
g_free(urls_text);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void notification_dmenu_string(notification *n)
|
static void notification_dmenu_string(notification *n)
|
||||||
|
@ -72,7 +72,7 @@ typedef struct _notification {
|
|||||||
/* derived fields */
|
/* derived fields */
|
||||||
char *msg; /* formatted message */
|
char *msg; /* formatted message */
|
||||||
char *text_to_render; /* formatted message (with age and action indicators) */
|
char *text_to_render; /* formatted message (with age and action indicators) */
|
||||||
char *urls; /* urllist */
|
char *urls; /* urllist delimited by '\n' */
|
||||||
} notification;
|
} notification;
|
||||||
|
|
||||||
notification *notification_create(void);
|
notification *notification_create(void);
|
||||||
|
@ -45,12 +45,108 @@ TEST test_markup_transform(void)
|
|||||||
ASSERT_STR_EQ("<i>foo</i> bar baz", (ptr=markup_transform(g_strdup("<i>foo</i><br>bar\nbaz"), MARKUP_FULL)));
|
ASSERT_STR_EQ("<i>foo</i> bar baz", (ptr=markup_transform(g_strdup("<i>foo</i><br>bar\nbaz"), MARKUP_FULL)));
|
||||||
g_free(ptr);
|
g_free(ptr);
|
||||||
|
|
||||||
|
// Test replacement of img and a tags, not renderable by pango
|
||||||
|
ASSERT_STR_EQ("foo bar bar baz", (ptr=markup_transform(g_strdup("<img alt=\"foo bar\"><br>bar\nbaz"), MARKUP_FULL)));
|
||||||
|
g_free(ptr);
|
||||||
|
ASSERT_STR_EQ("test ", (ptr=markup_transform(g_strdup("test <img alt=\"foo bar\""), MARKUP_FULL)));
|
||||||
|
g_free(ptr);
|
||||||
|
ASSERT_STR_EQ("test [image] image", (ptr=markup_transform(g_strdup("test <img src=\"nothing.jpg\"> image"), MARKUP_FULL)));
|
||||||
|
g_free(ptr);
|
||||||
|
ASSERT_STR_EQ("bar baz", (ptr=markup_transform(g_strdup("<a href=\"asdf\">bar</a> baz"), MARKUP_FULL)));
|
||||||
|
g_free(ptr);
|
||||||
|
|
||||||
|
PASS();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST helper_markup_strip_a (const char *in, const char *exp, const char *urls)
|
||||||
|
{
|
||||||
|
// out_urls is a return parameter and the content should be ignored
|
||||||
|
char *out_urls = (char *)0x04; //Chosen by a fair dice roll
|
||||||
|
char *out = g_strdup(in);
|
||||||
|
char *msg = g_strconcat("url: ", in, NULL);
|
||||||
|
|
||||||
|
markup_strip_a(&out, &out_urls);
|
||||||
|
|
||||||
|
ASSERT_STR_EQm(msg, exp, out);
|
||||||
|
|
||||||
|
if (urls) {
|
||||||
|
ASSERT_STR_EQm(msg, urls, out_urls);
|
||||||
|
} else {
|
||||||
|
ASSERT_EQm(msg, urls, out_urls);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free(out_urls);
|
||||||
|
g_free(out);
|
||||||
|
g_free(msg);
|
||||||
|
|
||||||
|
PASS();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST test_markup_strip_a(void)
|
||||||
|
{
|
||||||
|
RUN_TESTp(helper_markup_strip_a, "<a href=\"https://url.com\">valid</a> link", "valid link", "[valid] https://url.com");
|
||||||
|
RUN_TESTp(helper_markup_strip_a, "<a href=\"\">valid</a> link", "valid link", "[valid] ");
|
||||||
|
RUN_TESTp(helper_markup_strip_a, "<a>valid</a> link", "valid link", NULL);
|
||||||
|
RUN_TESTp(helper_markup_strip_a, "<a href=\"https://url.com\">valid link", "valid link", "[valid link] https://url.com");
|
||||||
|
|
||||||
|
RUN_TESTp(helper_markup_strip_a, "<a href=\"https://url.com\" invalid</a> link", " link", NULL);
|
||||||
|
RUN_TESTp(helper_markup_strip_a, "<a invalid</a> link", " link", NULL);
|
||||||
|
|
||||||
|
PASS();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST helper_markup_strip_img (const char *in, const char *exp, const char *urls)
|
||||||
|
{
|
||||||
|
// out_urls is a return parameter and the content should be ignored
|
||||||
|
char *out_urls = (char *)0x04; //Chosen by a fair dice roll
|
||||||
|
char *out = g_strdup(in);
|
||||||
|
char *msg = g_strconcat("url: ", in, NULL);
|
||||||
|
|
||||||
|
markup_strip_img(&out, &out_urls);
|
||||||
|
|
||||||
|
ASSERT_STR_EQm(msg, exp, out);
|
||||||
|
|
||||||
|
if (urls) {
|
||||||
|
ASSERT_STR_EQm(msg, urls, out_urls);
|
||||||
|
} else {
|
||||||
|
ASSERT_EQm(msg, urls, out_urls);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free(out_urls);
|
||||||
|
g_free(out);
|
||||||
|
g_free(msg);
|
||||||
|
|
||||||
|
PASS();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST test_markup_strip_img(void)
|
||||||
|
{
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "v <img> img", "v [image] img", NULL);
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "v <img alt=\"valid\" alt=\"invalid\"> img", "v valid img", NULL);
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "v <img src=\"url.com\"> img", "v [image] img", "[image] url.com");
|
||||||
|
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "v <img alt=\"valid\" src=\"url.com\"> img", "v valid img", "[valid] url.com");
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "v <img src=\"url.com\" alt=\"valid\"> img", "v valid img", "[valid] url.com");
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "v <img src=\"url.com\" alt=\"valid\" alt=\"i\"> img", "v valid img", "[valid] url.com");
|
||||||
|
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "i <img alt=\"invalid src=\"https://url.com\"> img", "i [image] img", "[image] https://url.com");
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "i <img alt=\"broken\" src=\"https://url.com > img", "i broken img", NULL);
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "i <img alt=\"invalid src=\"https://url.com > img", "i [image] img", NULL);
|
||||||
|
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "i <img src=\"url.com alt=\"broken\"> img", "i broken img", NULL);
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "i <img src=\"url.com\" alt=\"invalid > img", "i [image] img", "[image] url.com");
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "i <img src=\"url.com alt=\"invalid > img", "i [image] img", NULL);
|
||||||
|
|
||||||
|
RUN_TESTp(helper_markup_strip_img, "i <img src=\"url.com\" alt=\"invalid\" img", "i ", NULL);
|
||||||
|
|
||||||
PASS();
|
PASS();
|
||||||
}
|
}
|
||||||
|
|
||||||
SUITE(suite_markup)
|
SUITE(suite_markup)
|
||||||
{
|
{
|
||||||
RUN_TEST(test_markup_strip);
|
RUN_TEST(test_markup_strip);
|
||||||
|
RUN_TEST(test_markup_strip_a);
|
||||||
|
RUN_TEST(test_markup_strip_img);
|
||||||
RUN_TEST(test_markup_transform);
|
RUN_TEST(test_markup_transform);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user