From 82fa79c78687f183e16a84ce2edee27e6e6b801c Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 21 Feb 2017 15:00:36 -0500 Subject: [PATCH] option_parser.c: Allow comments on lines with quoted strings. The current behavior is - If the value contains a double-quote: - 1. Verify that it must contains at least two quotes. - 2. If one of the quotes is the first character, trim it. - 3. If one of the quotes is the last character, trim it. - Else: - 1. Trim a trailing comment from the value. This has the effect that `key = "value" # comment` => `value" #comment` This is surprising and almost certainly not what the user wants. However, it allows simple nested quotes like: `key = "A string "with quotes""` => `A string "with quotes"` Fix the brokenness of the first example at the expense of breaking the second. A user seeking that value will now have to type: key = "A string \"with quotes\"" Do this by treating double-quote as a toggle that simply changes whether `;` and `#` start comments (not too different than Bash using it to toggle field separation). In order to have strings that contain a literal double-quote, add rudimentary support for backslash-escaping. For now, only recognize double-quote and backslash-itself; anything else is undefined; and the program is free to do whatever it likes with them; for now, silently treat the backslash as an ordinary character. Note that this formulation of quoting implies that backslash-escaping works identically both inside and outside of quotes. --- src/option_parser.c | 76 ++++++++++++++++++++++++++------------------ test/data/test-ini | 10 +++++- test/option_parser.c | 18 +++++++++++ 3 files changed, 72 insertions(+), 32 deletions(-) diff --git a/src/option_parser.c b/src/option_parser.c index 861fdcf..63ba22d 100644 --- a/src/option_parser.c +++ b/src/option_parser.c @@ -29,7 +29,7 @@ static section_t *new_section(char *name); static section_t *get_section(char *name); static void add_entry(char *section_name, char *key, char *value); static char *get_value(char *section, char *key); -static char *clean_value(char *value); +static char *clean_value(char *value, int line_num); static int cmdline_argc; static char **cmdline_argv; @@ -91,7 +91,7 @@ void add_entry(char *section_name, char *key, char *value) int len = s->entry_count; s->entries = g_realloc(s->entries, sizeof(entry_t) * len); s->entries[s->entry_count - 1].key = g_strdup(key); - s->entries[s->entry_count - 1].value = clean_value(value); + s->entries[s->entry_count - 1].value = g_strdup(value); } char *get_value(char *section, char *key) @@ -186,20 +186,50 @@ int ini_get_bool(char *section, char *key, int def) } } -char *clean_value(char *value) +char *clean_value(char *value, int line_num) { - char *s; + char *unparsed = value; - if (value[0] == '"') - s = g_strdup(value + 1); - else - s = g_strdup(value); - - if (s[strlen(s) - 1] == '"') - s[strlen(s) - 1] = '\0'; - - return s; + bool in_quote = false; + while ((unparsed = strpbrk(unparsed, "\"\\#;")) != NULL) { + switch (*unparsed) { + case '"': + memmove(unparsed, unparsed + 1, strlen(unparsed)); + in_quote = !in_quote; + break; + case '\\': + switch (unparsed[1]) { + case '\\': + case '"': + memmove(unparsed, unparsed + 1, strlen(unparsed)); + unparsed++; + break; + default: + // Unrecognized backslash sequence; + // treat the backslash as an ordinary character. + // Consider issuing an error or warning here instead. + unparsed++; + break; + } + break; + case '#': + case ';': + if (in_quote) + unparsed++; + else + *unparsed = '\0'; + break; + } + } + if (in_quote) { + fprintf(stderr, + "Warning: invalid config file at line %d\n", + line_num); + fprintf(stderr, "Missing '\"'\n"); + return NULL; + } + return g_strstrip(value); } int load_ini_file(FILE * fp) @@ -250,24 +280,8 @@ int load_ini_file(FILE * fp) *equal = '\0'; char *key = g_strstrip(start); - char *value = g_strstrip(equal + 1); - - char *quote = strchr(value, '"'); - if (quote) { - char *closing_quote = strchr(quote + 1, '"'); - if (!closing_quote) { - fprintf(stderr, - "Warning: invalid config file at line %d\n", - line_num); - fprintf(stderr, "Missing '\"'\n"); - continue; - } - } else { - char *comment = strpbrk(value, "#;"); - if (comment) - *comment = '\0'; - } - value = g_strstrip(value); + char *value = clean_value(equal + 1, line_num); + if (!value) continue; if (!current_section) { fprintf(stderr, diff --git a/test/data/test-ini b/test/data/test-ini index 4afb1d2..4326b0a 100644 --- a/test/data/test-ini +++ b/test/data/test-ini @@ -20,8 +20,16 @@ [string] simple = A simple string + simple_with_hcomment = A simple string # a comment + simple_with_scomment = A simple string ; a comment quoted = "A quoted string" - quoted_with_quotes = "A string "with quotes"" + quoted_with_hcomment = "A quoted string" # a comment + quoted_with_scomment = "A quoted string" ; a comment + quoted_with_quotes = "A string \"with quotes\"" + quoted_with_escapes = "A string \\\"with escapes\\" + quoted_with_cchar = "A string; with #comment characters" # a comment + quoted_in_middle = A string"; with #comment" characters # a comment + escaped_quotes = String \"with quotes\" [int] simple = 5 diff --git a/test/option_parser.c b/test/option_parser.c index 8147136..173f16e 100644 --- a/test/option_parser.c +++ b/test/option_parser.c @@ -45,13 +45,31 @@ TEST test_ini_get_string(void) { char *string_section = "string"; char *ptr; + ASSERT_STR_EQ("A simple string", (ptr = ini_get_string(string_section, "simple", ""))); free(ptr); + ASSERT_STR_EQ("A simple string", (ptr = ini_get_string(string_section, "simple_with_hcomment", ""))); + free(ptr); + ASSERT_STR_EQ("A simple string", (ptr = ini_get_string(string_section, "simple_with_scomment", ""))); + free(ptr); ASSERT_STR_EQ("A quoted string", (ptr = ini_get_string(string_section, "quoted", ""))); free(ptr); + ASSERT_STR_EQ("A quoted string", (ptr = ini_get_string(string_section, "quoted_with_hcomment", ""))); + free(ptr); + ASSERT_STR_EQ("A quoted string", (ptr = ini_get_string(string_section, "quoted_with_scomment", ""))); + free(ptr); ASSERT_STR_EQ("A string \"with quotes\"", (ptr = ini_get_string(string_section, "quoted_with_quotes", ""))); free(ptr); + ASSERT_STR_EQ("A string \\\"with escapes\\", (ptr = ini_get_string(string_section, "quoted_with_escapes", ""))); + free(ptr); + ASSERT_STR_EQ("A string; with #comment characters", (ptr = ini_get_string(string_section, "quoted_with_cchar", ""))); + free(ptr); + ASSERT_STR_EQ("A string; with #comment characters", (ptr = ini_get_string(string_section, "quoted_in_middle", ""))); + free(ptr); + + ASSERT_STR_EQ("String \"with quotes\"", (ptr = ini_get_string(string_section, "escaped_quotes", ""))); + free(ptr); ASSERT_STR_EQ("default value", (ptr = ini_get_string(string_section, "nonexistent", "default value"))); free(ptr);