From b1ee80d55ed65bff17566f9f2b2bafbc76550550 Mon Sep 17 00:00:00 2001 From: Christian Dywan Date: Mon, 28 Dec 2009 23:44:16 +0100 Subject: [PATCH] Implement katze_utf8_stristr, katze_collfold and use in completion Evidently normalizing any whole string is too slow for completion, so we need to even out case and composition while iterating through the strings. The decompositing version of katze_utf8_stristr is disabled, since it is too slow, and an ascii only version is used, for now. A unit test 'compare' is added that solely measures performance of katze_collfold and katze_utf8_stristr. --- katze/katze-utils.c | 93 ++++++++++++++++++++++++++++++++++ katze/katze-utils.h | 7 +++ midori/midori-locationaction.c | 40 +-------------- tests/completion.c | 33 ++++++++++++ 4 files changed, 135 insertions(+), 38 deletions(-) diff --git a/katze/katze-utils.c b/katze/katze-utils.c index 60329eb1..86a7f90f 100644 --- a/katze/katze-utils.c +++ b/katze/katze-utils.c @@ -1482,3 +1482,96 @@ katze_load_cached_icon (const gchar* uri, return icon || !widget ? icon : gtk_widget_render_icon (widget, GTK_STOCK_FILE, GTK_ICON_SIZE_MENU, NULL); } + +/** + * katze_collfold: + * @str: a non-NULL UTF-8 string + * + * Computes a string without case and decomposited so + * it can be used for comparison. + * + * Return value: a normalized string + * + * Since: 0.2.3 + **/ +gchar* +katze_collfold (const gchar* str) +{ + GString* result = g_string_new (NULL); + const gchar* p = str; + + while (*p) + { + gunichar ch = g_unichar_tolower (g_utf8_get_char (p)); + gsize len; + gunichar* sch = g_unicode_canonical_decomposition (ch, &len); + guint i = 0; + while (i < len) + g_string_append_unichar (result, sch[i++]); + + p = g_utf8_next_char (p); + } + + return g_string_free (result, FALSE); +} + +/** + * katze_utf8_stristr: + * @haystack: a non-NULL UTF-8 string + * @needle: a normalized non-NULL UTF-8 string + * + * Determines whether @needle is in @haystack, disregarding + * differences in case. + * + * Return value: %TRUE if @needle is found in @haystack + * + * Since: 0.2.3 + **/ +gboolean +katze_utf8_stristr (const gchar* haystack, + const gchar* needle) +{ + #if 0 /* 0,000159 seconds */ + /* Too slow for use in completion */ + gchar* nhaystack = g_utf8_normalize (haystack, -1, G_NORMALIZE_DEFAULT); + const gchar *p = nhaystack; + gsize len = strlen (needle); + gsize i; + + while (*p) + { + for (i = 0; i < len; i++) + if (g_unichar_tolower (g_utf8_get_char (p + i)) + != g_unichar_tolower (g_utf8_get_char (needle + i))) + goto next; + + g_free (nhaystack); + return TRUE; + + next: + p = g_utf8_next_char (p); + } + + g_free (nhaystack); + return FALSE; + #else /* 0,000044 seconds */ + /* No unicode matching */ + const gchar *p = haystack; + gsize len = strlen (needle); + gsize i; + + while (*p) + { + for (i = 0; i < len; i++) + if (g_ascii_tolower (p[i]) != g_ascii_tolower (needle[i])) + goto next; + + return TRUE; + + next: + p++; + } + + return FALSE; + #endif +} diff --git a/katze/katze-utils.h b/katze/katze-utils.h index c8648ff2..6a5843f6 100644 --- a/katze/katze-utils.h +++ b/katze/katze-utils.h @@ -151,6 +151,13 @@ GdkPixbuf* katze_load_cached_icon (const gchar* uri, GtkWidget* widget); +gchar* +katze_collfold (const gchar* str); + +gboolean +katze_utf8_stristr (const gchar* haystack, + const gchar* needle); + G_END_DECLS #endif /* __KATZE_UTILS_H__ */ diff --git a/midori/midori-locationaction.c b/midori/midori-locationaction.c index f05a9d10..4939b567 100644 --- a/midori/midori-locationaction.c +++ b/midori/midori-locationaction.c @@ -755,47 +755,11 @@ midori_location_entry_completion_match_cb (GtkEntryCompletion* completion, match = FALSE; if (G_LIKELY (uri)) { - gchar* nkey; - gchar* fkey; - gchar* nuri; - gchar* furi; - - if ((nkey = g_utf8_normalize (key, -1, G_NORMALIZE_ALL))) - { - fkey = g_utf8_casefold (nkey, -1); - g_free (nkey); - } - else - fkey = g_utf8_casefold (key, -1); - if ((nuri = g_utf8_normalize (uri, -1, G_NORMALIZE_ALL))) - { - furi = g_utf8_casefold (nuri, -1); - g_free (nuri); - } - else - furi = g_utf8_casefold (uri, -1); + match = katze_utf8_stristr (uri, key); g_free (uri); - match = strstr (furi, fkey) != NULL; - g_free (furi); if (!match && G_LIKELY (title)) - { - gchar* ntitle; - gchar* ftitle; - - if ((ntitle = g_utf8_normalize (title, -1, G_NORMALIZE_ALL))) - { - ftitle = g_utf8_casefold (ntitle, -1); - g_free (ntitle); - } - else - ftitle = g_utf8_casefold (title, -1); - - match = strstr (ftitle, fkey) != NULL; - g_free (ftitle); - } - - g_free (fkey); + match = katze_utf8_stristr (title, key); } g_free (title); diff --git a/tests/completion.c b/tests/completion.c index ff802bc1..26614dba 100644 --- a/tests/completion.c +++ b/tests/completion.c @@ -19,6 +19,38 @@ GtkWidget* midori_location_action_entry_for_proxy (GtkWidget* proxy); +static const gchar* compare_urls[] = { + "http://en.wikipedia.org/wiki/Foul", + "http://de.wikipedia.org/wiki/Düsseldorf", + "http://de.wikipedia.org/wiki/Düsseldorf", + "http://ja.wikipedia.org/wiki/若井はんじ・けんじ", + "http://www.johannkönig.com", + "http://şøñđëřżēıċħęŋđőmæîņĭśŧşũþėŗ.de", + }; + +static void +completion_compare (void) +{ + const guint runs = 10000; + guint t; + gdouble elapsed = 0.0; + + for (t = 0; t < runs; t++) + { + g_test_timer_start (); + guint i, j; + for (i = 0; i < G_N_ELEMENTS (compare_urls); i++) + { + gchar* url = katze_collfold (compare_urls[i]); + for (j = 0; j < G_N_ELEMENTS (compare_urls); j++) + katze_utf8_stristr (compare_urls[i], url); + g_free (url); + } + elapsed += g_test_timer_elapsed (); + } + g_print ("%f seconds for comparison\n", elapsed / runs); +} + typedef struct { const gchar* uri; @@ -287,6 +319,7 @@ main (int argc, g_test_init (&argc, &argv, NULL); gtk_init_check (&argc, &argv); + g_test_add_func ("/completion/compare", completion_compare); g_test_add_func ("/completion/count", completion_count); g_test_add_func ("/completion/fill", completion_fill); g_test_add_func ("/completion/match", completion_match);