Merge pull request #558 from tsipinakis/feature/markup-escape
Implement smarter markup escaping
This commit is contained in:
		
						commit
						b805273fb9
					
				
							
								
								
									
										76
									
								
								src/markup.c
									
									
									
									
									
								
							
							
						
						
									
										76
									
								
								src/markup.c
									
									
									
									
									
								
							| @ -5,6 +5,7 @@ | ||||
| #include <assert.h> | ||||
| #include <stdbool.h> | ||||
| #include <string.h> | ||||
| #include <ctype.h> | ||||
| #include <stdio.h> | ||||
| 
 | ||||
| #include "log.h" | ||||
| @ -242,6 +243,80 @@ char *markup_strip(char *str) | ||||
|         return str; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Determine if an & character pointed to by \p str is a markup & entity or | ||||
|  * part of the text | ||||
|  * | ||||
|  * @return true if it's an entity otherwise false | ||||
|  */ | ||||
| static bool markup_is_entity(const char *str) | ||||
| { | ||||
|         assert(str); | ||||
|         assert(*str == '&'); | ||||
| 
 | ||||
|         char *end = strchr(str, ';'); | ||||
|         if (!end) | ||||
|                 return false; | ||||
| 
 | ||||
|         // Parse (hexa)decimal entities with the format Ӓ or ઼
 | ||||
|         if (str[1] == '#') { | ||||
|                 const char *cur = str + 2; | ||||
| 
 | ||||
|                 if (*cur == 'x') { | ||||
|                         cur++; | ||||
| 
 | ||||
|                         // Reject &#x;
 | ||||
|                         if (*cur == ';') | ||||
|                                 return false; | ||||
| 
 | ||||
|                         while (isxdigit(*cur) && cur < end) | ||||
|                                 cur++; | ||||
|                 } else { | ||||
| 
 | ||||
|                         // Reject &#;
 | ||||
|                         if (*cur == ';') | ||||
|                                 return false; | ||||
| 
 | ||||
|                         while (isdigit(*cur) && cur < end) | ||||
|                                 cur++; | ||||
|                 } | ||||
| 
 | ||||
|                 return (cur == end); | ||||
|         } else { | ||||
|                 const char *supported_tags[] = {"&", "<", ">", """, "'"}; | ||||
|                 for (int i = 0; i < sizeof(supported_tags)/sizeof(*supported_tags); i++) { | ||||
|                         if (g_str_has_prefix(str, supported_tags[i])) | ||||
|                                 return true; | ||||
|                 } | ||||
|                 return false; | ||||
|         } | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Escape all unsupported and invalid &-entities in a string. If the resulting | ||||
|  * string does not fit it will be reallocated. | ||||
|  * | ||||
|  * @param str The string to be transformed | ||||
|  */ | ||||
| static char *markup_escape_unsupported(char *str) | ||||
| { | ||||
|         if (!str) | ||||
|                 return NULL; | ||||
| 
 | ||||
|         char *match = str; | ||||
|         while ((match = strchr(match, '&'))) { | ||||
|                 if (!markup_is_entity(match)) { | ||||
|                         int pos = match - str; | ||||
|                         str = string_replace_at(str, pos, 1, "&"); | ||||
|                         match = str + pos + strlen("&"); | ||||
|                 } else { | ||||
|                         match++; | ||||
|                 } | ||||
|         } | ||||
| 
 | ||||
|         return str; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Transform the string in accordance with `markup_mode` and | ||||
|  * `settings.ignore_newline` | ||||
| @ -265,6 +340,7 @@ char *markup_transform(char *str, enum markup_mode markup_mode) | ||||
|                 str = markup_quote(str); | ||||
|                 break; | ||||
|         case MARKUP_FULL: | ||||
|                 str = markup_escape_unsupported(str); | ||||
|                 str = markup_br2nl(str); | ||||
|                 markup_strip_a(&str, NULL); | ||||
|                 markup_strip_img(&str, NULL); | ||||
|  | ||||
| @ -55,6 +55,17 @@ TEST test_markup_transform(void) | ||||
|         ASSERT_STR_EQ("bar baz",            (ptr=markup_transform(g_strdup("<a href=\"asdf\">bar</a> baz"), MARKUP_FULL))); | ||||
|         g_free(ptr); | ||||
| 
 | ||||
|         ASSERT_STR_EQ("Ψ", (ptr=markup_transform(g_strdup("Ψ"), MARKUP_FULL))); | ||||
|         free(ptr); | ||||
|         ASSERT_STR_EQ("Ψ Ψ", (ptr=markup_transform(g_strdup("Ψ Ψ"), MARKUP_FULL))); | ||||
|         free(ptr); | ||||
|         ASSERT_STR_EQ("> <", (ptr=markup_transform(g_strdup("> <"), MARKUP_FULL))); | ||||
|         free(ptr); | ||||
|         ASSERT_STR_EQ("&invalid; &#abc; &#xG;", (ptr=markup_transform(g_strdup("&invalid; &#abc; &#xG;"), MARKUP_FULL))); | ||||
|         free(ptr); | ||||
|         ASSERT_STR_EQ("&; &#; &#x;", (ptr=markup_transform(g_strdup("&; &#; &#x;"), MARKUP_FULL))); | ||||
|         free(ptr); | ||||
| 
 | ||||
|         PASS(); | ||||
| } | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Nikos Tsipinakis
						Nikos Tsipinakis