Implement katze_utf8_stristr, katze_collfold and use in completion
Evidently normalizing any whole string is too slow for completion, so we need to even out case and composition while iterating through the strings. The decompositing version of katze_utf8_stristr is disabled, since it is too slow, and an ascii only version is used, for now. A unit test 'compare' is added that solely measures performance of katze_collfold and katze_utf8_stristr.
This commit is contained in:
parent
613f47627e
commit
b1ee80d55e
4 changed files with 135 additions and 38 deletions
|
@ -1482,3 +1482,96 @@ katze_load_cached_icon (const gchar* uri,
|
||||||
return icon || !widget ? icon : gtk_widget_render_icon (widget,
|
return icon || !widget ? icon : gtk_widget_render_icon (widget,
|
||||||
GTK_STOCK_FILE, GTK_ICON_SIZE_MENU, NULL);
|
GTK_STOCK_FILE, GTK_ICON_SIZE_MENU, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* katze_collfold:
|
||||||
|
* @str: a non-NULL UTF-8 string
|
||||||
|
*
|
||||||
|
* Computes a string without case and decomposited so
|
||||||
|
* it can be used for comparison.
|
||||||
|
*
|
||||||
|
* Return value: a normalized string
|
||||||
|
*
|
||||||
|
* Since: 0.2.3
|
||||||
|
**/
|
||||||
|
gchar*
|
||||||
|
katze_collfold (const gchar* str)
|
||||||
|
{
|
||||||
|
GString* result = g_string_new (NULL);
|
||||||
|
const gchar* p = str;
|
||||||
|
|
||||||
|
while (*p)
|
||||||
|
{
|
||||||
|
gunichar ch = g_unichar_tolower (g_utf8_get_char (p));
|
||||||
|
gsize len;
|
||||||
|
gunichar* sch = g_unicode_canonical_decomposition (ch, &len);
|
||||||
|
guint i = 0;
|
||||||
|
while (i < len)
|
||||||
|
g_string_append_unichar (result, sch[i++]);
|
||||||
|
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return g_string_free (result, FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* katze_utf8_stristr:
|
||||||
|
* @haystack: a non-NULL UTF-8 string
|
||||||
|
* @needle: a normalized non-NULL UTF-8 string
|
||||||
|
*
|
||||||
|
* Determines whether @needle is in @haystack, disregarding
|
||||||
|
* differences in case.
|
||||||
|
*
|
||||||
|
* Return value: %TRUE if @needle is found in @haystack
|
||||||
|
*
|
||||||
|
* Since: 0.2.3
|
||||||
|
**/
|
||||||
|
gboolean
|
||||||
|
katze_utf8_stristr (const gchar* haystack,
|
||||||
|
const gchar* needle)
|
||||||
|
{
|
||||||
|
#if 0 /* 0,000159 seconds */
|
||||||
|
/* Too slow for use in completion */
|
||||||
|
gchar* nhaystack = g_utf8_normalize (haystack, -1, G_NORMALIZE_DEFAULT);
|
||||||
|
const gchar *p = nhaystack;
|
||||||
|
gsize len = strlen (needle);
|
||||||
|
gsize i;
|
||||||
|
|
||||||
|
while (*p)
|
||||||
|
{
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
if (g_unichar_tolower (g_utf8_get_char (p + i))
|
||||||
|
!= g_unichar_tolower (g_utf8_get_char (needle + i)))
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
g_free (nhaystack);
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
next:
|
||||||
|
p = g_utf8_next_char (p);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free (nhaystack);
|
||||||
|
return FALSE;
|
||||||
|
#else /* 0,000044 seconds */
|
||||||
|
/* No unicode matching */
|
||||||
|
const gchar *p = haystack;
|
||||||
|
gsize len = strlen (needle);
|
||||||
|
gsize i;
|
||||||
|
|
||||||
|
while (*p)
|
||||||
|
{
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
if (g_ascii_tolower (p[i]) != g_ascii_tolower (needle[i]))
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
next:
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
|
@ -151,6 +151,13 @@ GdkPixbuf*
|
||||||
katze_load_cached_icon (const gchar* uri,
|
katze_load_cached_icon (const gchar* uri,
|
||||||
GtkWidget* widget);
|
GtkWidget* widget);
|
||||||
|
|
||||||
|
gchar*
|
||||||
|
katze_collfold (const gchar* str);
|
||||||
|
|
||||||
|
gboolean
|
||||||
|
katze_utf8_stristr (const gchar* haystack,
|
||||||
|
const gchar* needle);
|
||||||
|
|
||||||
G_END_DECLS
|
G_END_DECLS
|
||||||
|
|
||||||
#endif /* __KATZE_UTILS_H__ */
|
#endif /* __KATZE_UTILS_H__ */
|
||||||
|
|
|
@ -755,47 +755,11 @@ midori_location_entry_completion_match_cb (GtkEntryCompletion* completion,
|
||||||
match = FALSE;
|
match = FALSE;
|
||||||
if (G_LIKELY (uri))
|
if (G_LIKELY (uri))
|
||||||
{
|
{
|
||||||
gchar* nkey;
|
match = katze_utf8_stristr (uri, key);
|
||||||
gchar* fkey;
|
|
||||||
gchar* nuri;
|
|
||||||
gchar* furi;
|
|
||||||
|
|
||||||
if ((nkey = g_utf8_normalize (key, -1, G_NORMALIZE_ALL)))
|
|
||||||
{
|
|
||||||
fkey = g_utf8_casefold (nkey, -1);
|
|
||||||
g_free (nkey);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
fkey = g_utf8_casefold (key, -1);
|
|
||||||
if ((nuri = g_utf8_normalize (uri, -1, G_NORMALIZE_ALL)))
|
|
||||||
{
|
|
||||||
furi = g_utf8_casefold (nuri, -1);
|
|
||||||
g_free (nuri);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
furi = g_utf8_casefold (uri, -1);
|
|
||||||
g_free (uri);
|
g_free (uri);
|
||||||
match = strstr (furi, fkey) != NULL;
|
|
||||||
g_free (furi);
|
|
||||||
|
|
||||||
if (!match && G_LIKELY (title))
|
if (!match && G_LIKELY (title))
|
||||||
{
|
match = katze_utf8_stristr (title, key);
|
||||||
gchar* ntitle;
|
|
||||||
gchar* ftitle;
|
|
||||||
|
|
||||||
if ((ntitle = g_utf8_normalize (title, -1, G_NORMALIZE_ALL)))
|
|
||||||
{
|
|
||||||
ftitle = g_utf8_casefold (ntitle, -1);
|
|
||||||
g_free (ntitle);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
ftitle = g_utf8_casefold (title, -1);
|
|
||||||
|
|
||||||
match = strstr (ftitle, fkey) != NULL;
|
|
||||||
g_free (ftitle);
|
|
||||||
}
|
|
||||||
|
|
||||||
g_free (fkey);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
g_free (title);
|
g_free (title);
|
||||||
|
|
|
@ -19,6 +19,38 @@
|
||||||
GtkWidget*
|
GtkWidget*
|
||||||
midori_location_action_entry_for_proxy (GtkWidget* proxy);
|
midori_location_action_entry_for_proxy (GtkWidget* proxy);
|
||||||
|
|
||||||
|
static const gchar* compare_urls[] = {
|
||||||
|
"http://en.wikipedia.org/wiki/Foul",
|
||||||
|
"http://de.wikipedia.org/wiki/Düsseldorf",
|
||||||
|
"http://de.wikipedia.org/wiki/Düsseldorf",
|
||||||
|
"http://ja.wikipedia.org/wiki/若井はんじ・けんじ",
|
||||||
|
"http://www.johannkönig.com",
|
||||||
|
"http://şøñđëřżēıċħęŋđőmæîņĭśŧşũþėŗ.de",
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
completion_compare (void)
|
||||||
|
{
|
||||||
|
const guint runs = 10000;
|
||||||
|
guint t;
|
||||||
|
gdouble elapsed = 0.0;
|
||||||
|
|
||||||
|
for (t = 0; t < runs; t++)
|
||||||
|
{
|
||||||
|
g_test_timer_start ();
|
||||||
|
guint i, j;
|
||||||
|
for (i = 0; i < G_N_ELEMENTS (compare_urls); i++)
|
||||||
|
{
|
||||||
|
gchar* url = katze_collfold (compare_urls[i]);
|
||||||
|
for (j = 0; j < G_N_ELEMENTS (compare_urls); j++)
|
||||||
|
katze_utf8_stristr (compare_urls[i], url);
|
||||||
|
g_free (url);
|
||||||
|
}
|
||||||
|
elapsed += g_test_timer_elapsed ();
|
||||||
|
}
|
||||||
|
g_print ("%f seconds for comparison\n", elapsed / runs);
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
const gchar* uri;
|
const gchar* uri;
|
||||||
|
@ -287,6 +319,7 @@ main (int argc,
|
||||||
g_test_init (&argc, &argv, NULL);
|
g_test_init (&argc, &argv, NULL);
|
||||||
gtk_init_check (&argc, &argv);
|
gtk_init_check (&argc, &argv);
|
||||||
|
|
||||||
|
g_test_add_func ("/completion/compare", completion_compare);
|
||||||
g_test_add_func ("/completion/count", completion_count);
|
g_test_add_func ("/completion/count", completion_count);
|
||||||
g_test_add_func ("/completion/fill", completion_fill);
|
g_test_add_func ("/completion/fill", completion_fill);
|
||||||
g_test_add_func ("/completion/match", completion_match);
|
g_test_add_func ("/completion/match", completion_match);
|
||||||
|
|
Loading…
Reference in a new issue