From a3c565b2435919c701f1db8095ca372794734579 Mon Sep 17 00:00:00 2001 From: George Murray Date: Thu, 24 Sep 2020 07:31:10 -0700 Subject: [PATCH 1/4] Add to_search_normalized to search for characters with accents --- library/MiscUtils.cpp | 39 +++++++++++++++++++++++++++++++++++++ library/include/MiscUtils.h | 1 + plugins/search.cpp | 4 ++-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/library/MiscUtils.cpp b/library/MiscUtils.cpp index 84dddbbd6..018ddd6c3 100644 --- a/library/MiscUtils.cpp +++ b/library/MiscUtils.cpp @@ -128,6 +128,45 @@ std::string toLower(const std::string &str) return rv; } +static const char *normalized_table[256] = { + //.0 .1 .2 .3 .4 .5 .6 .7 .8 .9 .A .B .C .D .E .F + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 0. + NULL, NULL, NULL, NULL, NULL, "S", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 1. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 2. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 3. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 4. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 5. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 6. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 7. + "C", "u", "e", "a", "a", "a", "a", "c", "e", "e", "e", "i", "i", "i", "A", "A", // 8. + "E", "ae", "Ae", "o", "o", "o", "u", "u", "y", "O", "U", "c", "L", "Y", NULL, "f", // 9. + "a", "i", "o", "u", "n", "N", "a", "o", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // A. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // B. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // C. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // D. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // E. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // F. +}; + +std::string to_search_normalized(const std::string &str) +{ + std::stringbuf result; + for (char c : str) + { + const char *mapped = normalized_table[(uint8_t)c]; + if (mapped == NULL) + result.sputc(tolower(c)); + else + while (*mapped != '\0') + { + result.sputc(tolower(c)); + ++mapped; + } + } + + return result.str(); +} + bool word_wrap(std::vector *out, const std::string &str, size_t line_length) { out->clear(); diff --git a/library/include/MiscUtils.h b/library/include/MiscUtils.h index 35f8be73b..4249abeef 100644 --- a/library/include/MiscUtils.h +++ b/library/include/MiscUtils.h @@ -366,6 +366,7 @@ DFHACK_EXPORT std::string join_strings(const std::string &separator, const std:: DFHACK_EXPORT std::string toUpper(const std::string &str); DFHACK_EXPORT std::string toLower(const std::string &str); +DFHACK_EXPORT std::string to_search_normalized(const std::string &str); DFHACK_EXPORT bool word_wrap(std::vector *out, const std::string &str, diff --git a/plugins/search.cpp b/plugins/search.cpp index 6e0b49e16..f053967d8 100644 --- a/plugins/search.cpp +++ b/plugins/search.cpp @@ -396,7 +396,7 @@ protected: clear_viewscreen_vectors(); - string search_string_l = toLower(search_string); + string search_string_l = to_search_normalized(search_string); for (size_t i = 0; i < saved_list1.size(); i++ ) { if (force_in_search(i)) @@ -409,7 +409,7 @@ protected: continue; T element = saved_list1[i]; - string desc = toLower(get_element_description(element)); + string desc = to_search_normalized(get_element_description(element)); if (desc.find(search_string_l) != string::npos) { add_to_filtered_list(i); From 38c9c2825012b5700a5e477f3e9e2e38e9a1c7ef Mon Sep 17 00:00:00 2001 From: George Murray Date: Thu, 24 Sep 2020 10:52:03 -0700 Subject: [PATCH 2/4] Fix wrong variable usage in to_search_normalized --- library/MiscUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/MiscUtils.cpp b/library/MiscUtils.cpp index 018ddd6c3..893c27b2c 100644 --- a/library/MiscUtils.cpp +++ b/library/MiscUtils.cpp @@ -159,7 +159,7 @@ std::string to_search_normalized(const std::string &str) else while (*mapped != '\0') { - result.sputc(tolower(c)); + result.sputc(tolower(*mapped)); ++mapped; } } From f5c3712778bb9021c2b3792cd88417ec6bbdb357 Mon Sep 17 00:00:00 2001 From: George Murray Date: Thu, 24 Sep 2020 14:32:58 -0700 Subject: [PATCH 3/4] Nicer formatting for the normalized character mapping table --- library/MiscUtils.cpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/library/MiscUtils.cpp b/library/MiscUtils.cpp index 893c27b2c..2659d5d43 100644 --- a/library/MiscUtils.cpp +++ b/library/MiscUtils.cpp @@ -129,23 +129,23 @@ std::string toLower(const std::string &str) } static const char *normalized_table[256] = { - //.0 .1 .2 .3 .4 .5 .6 .7 .8 .9 .A .B .C .D .E .F - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 0. - NULL, NULL, NULL, NULL, NULL, "S", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 1. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 2. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 3. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 4. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 5. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 6. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 7. - "C", "u", "e", "a", "a", "a", "a", "c", "e", "e", "e", "i", "i", "i", "A", "A", // 8. - "E", "ae", "Ae", "o", "o", "o", "u", "u", "y", "O", "U", "c", "L", "Y", NULL, "f", // 9. - "a", "i", "o", "u", "n", "N", "a", "o", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // A. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // B. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // C. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // D. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // E. - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // F. + //.0 .1 .2 .3 .4 .5 .6 .7 .8 .9 .A .B .C .D .E .F + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 0. + NULL, NULL, NULL, NULL, NULL, "S", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 1. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 2. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 3. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 4. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 5. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 6. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 7. + "C", "u", "e", "a", "a", "a", "a", "c", "e", "e", "e", "i", "i", "i", "A", "A", // 8. + "E", "ae", "Ae", "o", "o", "o", "u", "u", "y", "O", "U", "c", "L", "Y", NULL, "f", // 9. + "a", "i", "o", "u", "n", "N", "a", "o", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // A. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // B. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // C. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // D. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // E. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // F. }; std::string to_search_normalized(const std::string &str) From 74a3e0eddf360534a028b29d7076cf65212b940b Mon Sep 17 00:00:00 2001 From: George Murray Date: Thu, 24 Sep 2020 15:22:58 -0700 Subject: [PATCH 4/4] Use a string and .reserve for normalizing instead of stringbuf --- library/MiscUtils.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/library/MiscUtils.cpp b/library/MiscUtils.cpp index 2659d5d43..56af85afe 100644 --- a/library/MiscUtils.cpp +++ b/library/MiscUtils.cpp @@ -150,21 +150,22 @@ static const char *normalized_table[256] = { std::string to_search_normalized(const std::string &str) { - std::stringbuf result; + std::string result; + result.reserve(str.size()); for (char c : str) { const char *mapped = normalized_table[(uint8_t)c]; if (mapped == NULL) - result.sputc(tolower(c)); + result += tolower(c); else while (*mapped != '\0') { - result.sputc(tolower(*mapped)); + result += tolower(*mapped); ++mapped; } } - return result.str(); + return result; } bool word_wrap(std::vector *out, const std::string &str, size_t line_length)