Implement katze_utf8_stristr, katze_collfold and use in completion

Evidently normalizing any whole string is too slow for completion,
so we need to even out case and composition while iterating through
the strings.

The decompositing version of katze_utf8_stristr is disabled, since
it is too slow, and an ascii only version is used, for now.

A unit test 'compare' is added that solely measures performance of
katze_collfold and katze_utf8_stristr.
This commit is contained in:
Christian Dywan 2009-12-28 23:44:16 +01:00
parent 613f47627e
commit b1ee80d55e
4 changed files with 135 additions and 38 deletions

View file

@ -1482,3 +1482,96 @@ katze_load_cached_icon (const gchar* uri,
return icon || !widget ? icon : gtk_widget_render_icon (widget, return icon || !widget ? icon : gtk_widget_render_icon (widget,
GTK_STOCK_FILE, GTK_ICON_SIZE_MENU, NULL); GTK_STOCK_FILE, GTK_ICON_SIZE_MENU, NULL);
} }
/**
* katze_collfold:
* @str: a non-NULL UTF-8 string
*
* Computes a string without case and decomposited so
* it can be used for comparison.
*
* Return value: a normalized string
*
* Since: 0.2.3
**/
gchar*
katze_collfold (const gchar* str)
{
GString* result = g_string_new (NULL);
const gchar* p = str;
while (*p)
{
gunichar ch = g_unichar_tolower (g_utf8_get_char (p));
gsize len;
gunichar* sch = g_unicode_canonical_decomposition (ch, &len);
guint i = 0;
while (i < len)
g_string_append_unichar (result, sch[i++]);
p = g_utf8_next_char (p);
}
return g_string_free (result, FALSE);
}
/**
* katze_utf8_stristr:
* @haystack: a non-NULL UTF-8 string
* @needle: a normalized non-NULL UTF-8 string
*
* Determines whether @needle is in @haystack, disregarding
* differences in case.
*
* Return value: %TRUE if @needle is found in @haystack
*
* Since: 0.2.3
**/
gboolean
katze_utf8_stristr (const gchar* haystack,
const gchar* needle)
{
#if 0 /* 0,000159 seconds */
/* Too slow for use in completion */
gchar* nhaystack = g_utf8_normalize (haystack, -1, G_NORMALIZE_DEFAULT);
const gchar *p = nhaystack;
gsize len = strlen (needle);
gsize i;
while (*p)
{
for (i = 0; i < len; i++)
if (g_unichar_tolower (g_utf8_get_char (p + i))
!= g_unichar_tolower (g_utf8_get_char (needle + i)))
goto next;
g_free (nhaystack);
return TRUE;
next:
p = g_utf8_next_char (p);
}
g_free (nhaystack);
return FALSE;
#else /* 0,000044 seconds */
/* No unicode matching */
const gchar *p = haystack;
gsize len = strlen (needle);
gsize i;
while (*p)
{
for (i = 0; i < len; i++)
if (g_ascii_tolower (p[i]) != g_ascii_tolower (needle[i]))
goto next;
return TRUE;
next:
p++;
}
return FALSE;
#endif
}

View file

@ -151,6 +151,13 @@ GdkPixbuf*
katze_load_cached_icon (const gchar* uri, katze_load_cached_icon (const gchar* uri,
GtkWidget* widget); GtkWidget* widget);
gchar*
katze_collfold (const gchar* str);
gboolean
katze_utf8_stristr (const gchar* haystack,
const gchar* needle);
G_END_DECLS G_END_DECLS
#endif /* __KATZE_UTILS_H__ */ #endif /* __KATZE_UTILS_H__ */

View file

@ -755,47 +755,11 @@ midori_location_entry_completion_match_cb (GtkEntryCompletion* completion,
match = FALSE; match = FALSE;
if (G_LIKELY (uri)) if (G_LIKELY (uri))
{ {
gchar* nkey; match = katze_utf8_stristr (uri, key);
gchar* fkey;
gchar* nuri;
gchar* furi;
if ((nkey = g_utf8_normalize (key, -1, G_NORMALIZE_ALL)))
{
fkey = g_utf8_casefold (nkey, -1);
g_free (nkey);
}
else
fkey = g_utf8_casefold (key, -1);
if ((nuri = g_utf8_normalize (uri, -1, G_NORMALIZE_ALL)))
{
furi = g_utf8_casefold (nuri, -1);
g_free (nuri);
}
else
furi = g_utf8_casefold (uri, -1);
g_free (uri); g_free (uri);
match = strstr (furi, fkey) != NULL;
g_free (furi);
if (!match && G_LIKELY (title)) if (!match && G_LIKELY (title))
{ match = katze_utf8_stristr (title, key);
gchar* ntitle;
gchar* ftitle;
if ((ntitle = g_utf8_normalize (title, -1, G_NORMALIZE_ALL)))
{
ftitle = g_utf8_casefold (ntitle, -1);
g_free (ntitle);
}
else
ftitle = g_utf8_casefold (title, -1);
match = strstr (ftitle, fkey) != NULL;
g_free (ftitle);
}
g_free (fkey);
} }
g_free (title); g_free (title);

View file

@ -19,6 +19,38 @@
GtkWidget* GtkWidget*
midori_location_action_entry_for_proxy (GtkWidget* proxy); midori_location_action_entry_for_proxy (GtkWidget* proxy);
static const gchar* compare_urls[] = {
"http://en.wikipedia.org/wiki/Foul",
"http://de.wikipedia.org/wiki/Düsseldorf",
"http://de.wikipedia.org/wiki/Düsseldorf",
"http://ja.wikipedia.org/wiki/若井はんじ・けんじ",
"http://www.johannkönig.com",
"http://şøñđëřżēıċħęŋđőmæîņĭśŧşũþėŗ.de",
};
static void
completion_compare (void)
{
const guint runs = 10000;
guint t;
gdouble elapsed = 0.0;
for (t = 0; t < runs; t++)
{
g_test_timer_start ();
guint i, j;
for (i = 0; i < G_N_ELEMENTS (compare_urls); i++)
{
gchar* url = katze_collfold (compare_urls[i]);
for (j = 0; j < G_N_ELEMENTS (compare_urls); j++)
katze_utf8_stristr (compare_urls[i], url);
g_free (url);
}
elapsed += g_test_timer_elapsed ();
}
g_print ("%f seconds for comparison\n", elapsed / runs);
}
typedef struct typedef struct
{ {
const gchar* uri; const gchar* uri;
@ -287,6 +319,7 @@ main (int argc,
g_test_init (&argc, &argv, NULL); g_test_init (&argc, &argv, NULL);
gtk_init_check (&argc, &argv); gtk_init_check (&argc, &argv);
g_test_add_func ("/completion/compare", completion_compare);
g_test_add_func ("/completion/count", completion_count); g_test_add_func ("/completion/count", completion_count);
g_test_add_func ("/completion/fill", completion_fill); g_test_add_func ("/completion/fill", completion_fill);
g_test_add_func ("/completion/match", completion_match); g_test_add_func ("/completion/match", completion_match);