Speedup adblock with blacklists and reorganize unit tests

1. Blacklist for pattern matching. 1 regexp has more than 1
signature. If the 1st match failed, there is no need to
match with the same regex a second time if the signature matched.
This gives us 20% speedup.

2. Cleanup for performance tests.

3. Tests reorganized. now we are checking pattern validation
plus performance timing. Simlifies testing.

4. Bump version to 0.5. The next patch will add option matching
so we will bump it to 1.0 once it is tested.
This commit is contained in:
Alexander Butenko 2010-01-10 22:52:06 +01:00 committed by Christian Dywan
parent 96a03e91bf
commit 089315bcaa

View file

@ -589,6 +589,7 @@ adblock_is_matched_by_key (const gchar* opts,
gchar* uri; gchar* uri;
gint len; gint len;
int pos = 0; int pos = 0;
GList* regex_bl = NULL;
uri = adblock_fixup_regexp ((gchar*)req_uri); uri = adblock_fixup_regexp ((gchar*)req_uri);
len = strlen (uri); len = strlen (uri);
@ -596,24 +597,30 @@ adblock_is_matched_by_key (const gchar* opts,
{ {
gchar* sig = g_strndup (uri + pos, SIGNATURE_SIZE); gchar* sig = g_strndup (uri + pos, SIGNATURE_SIZE);
GRegex* regex = g_hash_table_lookup (keys, sig); GRegex* regex = g_hash_table_lookup (keys, sig);
if (regex) if (regex && !g_list_find (regex_bl, regex))
{ {
if (g_regex_match_full (regex, req_uri, -1, 0, 0, NULL, NULL)) if (g_regex_match_full (regex, req_uri, -1, 0, 0, NULL, NULL))
{ {
g_free (uri); g_free (uri);
g_free (sig); g_free (sig);
if (opts && adblock_check_filter_options (regex, opts, req_uri, page_uri)) if (opts && adblock_check_filter_options (regex, opts, req_uri, page_uri))
{
g_list_free (regex_bl);
return FALSE; return FALSE;
}
else else
{ {
/* g_debug("blocked by key sig=%s regexp=%s -- %s", sig, g_regex_get_pattern (regex), req_uri); */ /* g_debug("blocked by key sig=%s regexp=%s -- %s", sig, g_regex_get_pattern (regex), req_uri); */
g_list_free (regex_bl);
return TRUE; return TRUE;
} }
} }
regex_bl = g_list_prepend (regex_bl, regex);
} }
g_free (sig); g_free (sig);
} }
g_free (uri); g_free (uri);
g_list_free (regex_bl);
return FALSE; return FALSE;
} }
@ -660,14 +667,11 @@ adblock_resource_request_starting_cb (WebKitWebView* web_view,
if (!page_uri || !strcmp (page_uri, "about:blank")) if (!page_uri || !strcmp (page_uri, "about:blank"))
page_uri = req_uri; page_uri = req_uri;
/* gdouble elapsed = 0.0; /* g_test_timer_start (); */
g_test_timer_start (); */
/* TODO: opts should be defined */ /* TODO: opts should be defined */
if (adblock_is_matched (NULL, req_uri, page_uri)) if (adblock_is_matched (NULL, req_uri, page_uri))
webkit_network_request_set_uri (request, "about:blank"); webkit_network_request_set_uri (request, "about:blank");
/* elapsed += g_test_timer_elapsed (); /* g_debug ("%f", g_test_timer_elapsed ()); */
g_debug ("%f", elapsed); */
} }
#else #else
@ -864,9 +868,9 @@ adblock_fixup_regexp (gchar* src)
case '*': case '*':
g_string_append (str, ".*"); g_string_append (str, ".*");
break; break;
case '.': /*case '.':
g_string_append (str, "\\."); g_string_append (str, "\\.");
break; break;*/
case '?': case '?':
g_string_append (str, "\\?"); g_string_append (str, "\\?");
break; break;
@ -1206,8 +1210,8 @@ test_adblock_parse (void)
g_assert_cmpstr (adblock_parse_line ("?foo"), ==, "\\?foo"); g_assert_cmpstr (adblock_parse_line ("?foo"), ==, "\\?foo");
g_assert_cmpstr (adblock_parse_line ("foo?"), ==, "foo\\?"); g_assert_cmpstr (adblock_parse_line ("foo?"), ==, "foo\\?");
g_assert_cmpstr (adblock_parse_line (".*foo/bar"), ==, "\\..*foo/bar"); g_assert_cmpstr (adblock_parse_line (".*foo/bar"), ==, "..*foo/bar");
g_assert_cmpstr (adblock_parse_line ("http://bla.blub/*"), ==, "http://bla\\.blub/"); g_assert_cmpstr (adblock_parse_line ("http://bla.blub/*"), ==, "http://bla.blub/");
} }
static void static void
@ -1235,6 +1239,7 @@ test_adblock_pattern (void)
adblock_parse_file (filename); adblock_parse_file (filename);
g_test_timer_start ();
g_assert (adblock_is_matched (NULL, "http://test.dom/test?var=1", "")); g_assert (adblock_is_matched (NULL, "http://test.dom/test?var=1", ""));
g_assert (adblock_is_matched (NULL, "http://ads.foo.bar/teddy", "")); g_assert (adblock_is_matched (NULL, "http://ads.foo.bar/teddy", ""));
g_assert (!adblock_is_matched (NULL, "http://ads.fuu.bar/teddy", "")); g_assert (!adblock_is_matched (NULL, "http://ads.fuu.bar/teddy", ""));
@ -1252,6 +1257,12 @@ test_adblock_pattern (void)
g_assert (adblock_is_matched (NULL, "http://qq.videostrip.com/sub/admatcherclient.php", "")); g_assert (adblock_is_matched (NULL, "http://qq.videostrip.com/sub/admatcherclient.php", ""));
g_assert (adblock_is_matched (NULL, "http://qq.videostrip.com/sub/admatcherclient.php", "")); g_assert (adblock_is_matched (NULL, "http://qq.videostrip.com/sub/admatcherclient.php", ""));
g_assert (adblock_is_matched (NULL, "http://br.gcl.ru/cgi-bin/br/test", "")); g_assert (adblock_is_matched (NULL, "http://br.gcl.ru/cgi-bin/br/test", ""));
g_assert (!adblock_is_matched (NULL, "https://bugs.webkit.org/buglist.cgi?query_format=advanced&short_desc_type=allwordssubstr&short_desc=&long_desc_type=substring&long_desc=&bug_file_loc_type=allwordssubstr&bug_file_loc=&keywords_type=allwords&keywords=&bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&emailassigned_to1=1&emailtype1=substring&email1=&emailassigned_to2=1&emailreporter2=1&emailcc2=1&emailtype2=substring&email2=&bugidtype=include&bug_id=&votes=&chfieldfrom=&chfieldto=Now&chfieldvalue=&query_based_on=gtkport&field0-0-0=keywords&type0-0-0=anywordssubstr&value0-0-0=Gtk%20Cairo%20soup&field0-0-1=short_desc&type0-0-1=anywordssubstr&value0-0-1=Gtk%20Cairo%20soup%20autoconf%20automake%20autotool&field0-0-2=component&type0-0-2=equals&value0-0-2=WebKit%20Gtk", ""));
g_assert (!adblock_is_matched (NULL, "http://www.engadget.com/2009/09/24/google-hits-android-rom-modder-with-a-cease-and-desist-letter/", ""));
g_assert (!adblock_is_matched (NULL, "http://karibik-invest.com/es/bienes_raices/search.php?sqT=19&sqN=&sqMp=&sqL=0&qR=1&sqMb=&searchMode=1&action=B%FAsqueda", ""));
g_assert (!adblock_is_matched (NULL, "http://google.com", ""));
g_print ("Search took %f seconds\n", g_test_timer_elapsed ());
close (temp); close (temp);
g_unlink (filename); g_unlink (filename);
@ -1259,42 +1270,11 @@ test_adblock_pattern (void)
g_hash_table_destroy (pattern); g_hash_table_destroy (pattern);
} }
static void
test_adblock_count (void)
{
adblock_init_db ();
gchar* urls[6] = {
"https://bugs.webkit.org/buglist.cgi?query_format=advanced&short_desc_type=allwordssubstr&short_desc=&long_desc_type=substring&long_desc=&bug_file_loc_type=allwordssubstr&bug_file_loc=&keywords_type=allwords&keywords=&bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&emailassigned_to1=1&emailtype1=substring&email1=&emailassigned_to2=1&emailreporter2=1&emailcc2=1&emailtype2=substring&email2=&bugidtype=include&bug_id=&votes=&chfieldfrom=&chfieldto=Now&chfieldvalue=&query_based_on=gtkport&field0-0-0=keywords&type0-0-0=anywordssubstr&value0-0-0=Gtk%20Cairo%20soup&field0-0-1=short_desc&type0-0-1=anywordssubstr&value0-0-1=Gtk%20Cairo%20soup%20autoconf%20automake%20autotool&field0-0-2=component&type0-0-2=equals&value0-0-2=WebKit%20Gtk",
"http://www.engadget.com/2009/09/24/google-hits-android-rom-modder-with-a-cease-and-desist-letter/",
"http://karibik-invest.com/es/bienes_raices/search.php?sqT=19&sqN=&sqMp=&sqL=0&qR=1&sqMb=&searchMode=1&action=B%FAsqueda",
"http://google.com",
"http://ya.ru",
"http://google.com"
};
/* FIXME */
gchar* filename = "/home/avb/.cache/midori/adblock/bb6cd38a4579b3605946b1228fa65297";
gdouble elapsed = 0.0;
gchar* str;
int i;
adblock_parse_file (filename);
for (i = 0; i < 6; i++)
{
str = urls[i];
g_test_timer_start ();
adblock_is_matched (NULL, str, "");
elapsed += g_test_timer_elapsed ();
}
g_print ("Search took %f seconds\n", elapsed);
g_hash_table_destroy (pattern);
}
void void
extension_test (void) extension_test (void)
{ {
g_test_add_func ("/extensions/adblock/parse", test_adblock_parse); g_test_add_func ("/extensions/adblock/parse", test_adblock_parse);
g_test_add_func ("/extensions/adblock/pattern", test_adblock_pattern); g_test_add_func ("/extensions/adblock/pattern", test_adblock_pattern);
g_test_add_func ("/extensions/adblock/count", test_adblock_count);
} }
#endif #endif
@ -1304,7 +1284,7 @@ extension_init (void)
MidoriExtension* extension = g_object_new (MIDORI_TYPE_EXTENSION, MidoriExtension* extension = g_object_new (MIDORI_TYPE_EXTENSION,
"name", _("Advertisement blocker"), "name", _("Advertisement blocker"),
"description", _("Block advertisements according to a filter list"), "description", _("Block advertisements according to a filter list"),
"version", "0.1", "version", "0.5",
"authors", "Christian Dywan <christian@twotoasts.de>", "authors", "Christian Dywan <christian@twotoasts.de>",
NULL); NULL);
midori_extension_install_string_list (extension, "filters", NULL, G_MAXSIZE); midori_extension_install_string_list (extension, "filters", NULL, G_MAXSIZE);