404 lines
15 KiB
Vala
404 lines
15 KiB
Vala
/*
|
|
Copyright (C) 2009-2014 Christian Dywan <christian@twotoasts.de>
|
|
Copyright (C) 2009-2012 Alexander Butenko <a.butenka@gmail.com>
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
See the file COPYING for the full license text.
|
|
*/
|
|
|
|
namespace Adblock {
|
|
public abstract class Feature : GLib.Object {
|
|
public virtual bool header (string key, string value) {
|
|
return false;
|
|
}
|
|
public virtual bool parsed (File file) {
|
|
return true;
|
|
}
|
|
public virtual Directive? match (string request_uri, string page_uri) throws Error {
|
|
return null;
|
|
}
|
|
public virtual void clear () {
|
|
}
|
|
}
|
|
|
|
public class Subscription : GLib.Object {
|
|
public string? path;
|
|
bool debug_parse;
|
|
public string uri { get; set; default = null; }
|
|
public string title { get; set; default = null; }
|
|
public bool active { get; set; default = true; }
|
|
public bool mutable { get; set; default = true; }
|
|
public bool valid { get; private set; default = true; }
|
|
HashTable<string, Directive?> cache;
|
|
List<Feature> features;
|
|
public Pattern pattern;
|
|
public Keys keys;
|
|
public Options optslist;
|
|
public Whitelist whitelist;
|
|
public Element element;
|
|
#if !HAVE_WEBKIT2
|
|
WebKit.Download? download;
|
|
#endif
|
|
|
|
public Subscription (string uri) {
|
|
debug_parse = "adblock:parse" in (Environment.get_variable ("MIDORI_DEBUG") ?? "");
|
|
|
|
this.uri = uri;
|
|
|
|
this.optslist = new Options ();
|
|
this.whitelist = new Whitelist (optslist);
|
|
add_feature (this.whitelist);
|
|
this.keys = new Keys (optslist);
|
|
add_feature (this.keys);
|
|
this.pattern = new Pattern (optslist);
|
|
add_feature (this.pattern);
|
|
this.element = new Element ();
|
|
add_feature (this.element);
|
|
clear ();
|
|
}
|
|
|
|
public void add_feature (Feature feature) {
|
|
features.append (feature);
|
|
size++;
|
|
}
|
|
|
|
/* foreach support */
|
|
public new unowned Feature? get (uint index) {
|
|
return features.nth_data (index);
|
|
}
|
|
public uint size { get; private set; }
|
|
|
|
public void clear () {
|
|
cache = new HashTable<string, Directive?> (str_hash, str_equal);
|
|
foreach (unowned Feature feature in features)
|
|
feature.clear ();
|
|
optslist.clear ();
|
|
}
|
|
|
|
internal void parse_line (string? line) throws Error {
|
|
if (line.has_prefix ("@@")) {
|
|
if (line.contains("$") && line.contains ("domain"))
|
|
return;
|
|
if (line.has_prefix ("@@||"))
|
|
add_url_pattern ("^", "whitelist", line.offset (4));
|
|
else if (line.has_prefix ("@@|"))
|
|
add_url_pattern ("^", "whitelist", line.offset (3));
|
|
else
|
|
add_url_pattern ("", "whitelist", line.offset (2));
|
|
return;
|
|
}
|
|
/* TODO: [include] [exclude] */
|
|
if (line[0] == '[')
|
|
return;
|
|
|
|
/* CSS block hider */
|
|
if (line.has_prefix ("##")) {
|
|
/* TODO */
|
|
return;
|
|
}
|
|
if (line[0] == '#')
|
|
return;
|
|
|
|
/* TODO: CSS hider whitelist */
|
|
if ("#@#" in line)
|
|
return;
|
|
|
|
/* Per domain CSS hider rule */
|
|
if ("##" in line) {
|
|
frame_add_private (line, "##");
|
|
return;
|
|
}
|
|
if ("#" in line) {
|
|
frame_add_private (line, "#");
|
|
return;
|
|
}
|
|
|
|
/* URL blocker rule */
|
|
if (line.has_prefix ("|")) {
|
|
/* TODO: handle options and domains excludes */
|
|
if (line.contains("$"))
|
|
return;
|
|
|
|
if (line.has_prefix ("||"))
|
|
add_url_pattern ("", "fulluri", line.offset (2));
|
|
else
|
|
add_url_pattern ("^", "fulluri", line.offset (1));
|
|
return /* add_url_pattern */;
|
|
}
|
|
|
|
add_url_pattern ("", "uri", line);
|
|
return /* add_url_pattern */;
|
|
}
|
|
|
|
void frame_add_private (string line, string sep) {
|
|
string[] data = line.split (sep, 2);
|
|
if (!(data[1] != null && data[1] != "")
|
|
|| data[1].chr (-1, '\'') != null
|
|
|| (data[1].chr (-1, ':') != null
|
|
&& !Regex.match_simple (".*\\[.*:.*\\].*", data[1],
|
|
RegexCompileFlags.CASELESS, RegexMatchFlags.NOTEMPTY))) {
|
|
return;
|
|
}
|
|
|
|
if (data[0].chr (-1, ',') != null) {
|
|
string[] domains = data[0].split (",", -1);
|
|
|
|
foreach (unowned string domain in domains) {
|
|
/* Ignore Firefox-specific option */
|
|
if (domain == "~pregecko2")
|
|
continue;
|
|
string stripped = domain.strip ();
|
|
/* FIXME: ~ should negate match */
|
|
if (stripped[0] == '~')
|
|
stripped = stripped.substring (1, -1);
|
|
update_css_hash (stripped, data[1]);
|
|
}
|
|
}
|
|
else {
|
|
update_css_hash (data[0], data[1]);
|
|
}
|
|
}
|
|
|
|
bool css_element_seems_valid (string element) {
|
|
bool is_valid = true;
|
|
string[] valid_elements = { "::after", "::before", "a", "abbr", "address", "article", "aside",
|
|
"b", "blockquote", "caption", "center", "cite", "code", "div", "dl", "dt", "dd", "em",
|
|
"feed", "fieldset", "figcaption", "figure", "font", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6",
|
|
"header", "hgroup", "i", "iframe", "iframe html *", "img", "kbd", "label", "legend", "li",
|
|
"m", "main", "marquee", "menu", "nav", "ol", "option", "p", "pre", "q", "samp", "section",
|
|
"small", "span", "strong", "summary", "table", "tr", "tbody", "td", "th", "thead", "tt", "ul" };
|
|
|
|
if (!element.has_prefix (".") && !element.has_prefix ("#")
|
|
&& !(element.split("[")[0] in valid_elements))
|
|
is_valid = false;
|
|
|
|
|
|
bool debug_selectors = "adblock:css" in (Environment.get_variable ("MIDORI_DEBUG") ?? "");
|
|
if (debug_selectors)
|
|
stdout.printf ("Adblock '%s' %s: %s\n",
|
|
this.title, is_valid ? "selector" : "INVALID?", element);
|
|
|
|
return is_valid;
|
|
}
|
|
|
|
void update_css_hash (string domain, string value) {
|
|
if (css_element_seems_valid (value)) {
|
|
string? olddata = element.lookup (domain);
|
|
if (olddata != null) {
|
|
string newdata = olddata + " , " + value;
|
|
element.insert (domain, newdata);
|
|
} else {
|
|
element.insert (domain, value);
|
|
}
|
|
}
|
|
}
|
|
|
|
void add_url_pattern (string prefix, string type, string line) throws Error {
|
|
string[]? data = line.split ("$", 2);
|
|
if (data == null || data[0] == null)
|
|
return;
|
|
|
|
string patt, opts;
|
|
patt = data[0];
|
|
opts = type;
|
|
|
|
if (data[1] != null)
|
|
opts = type + "," + data[1];
|
|
|
|
if (Regex.match_simple ("subdocument", opts,
|
|
RegexCompileFlags.CASELESS, RegexMatchFlags.NOTEMPTY))
|
|
return;
|
|
|
|
string format_patt = fixup_regex (prefix, patt);
|
|
if (debug_parse)
|
|
stdout.printf ("got: %s opts %s\n", format_patt, opts);
|
|
compile_regexp (format_patt, opts);
|
|
/* return format_patt */
|
|
}
|
|
|
|
bool compile_regexp (string? patt, string opts) throws Error {
|
|
if (patt == null)
|
|
return false;
|
|
try {
|
|
var regex = new Regex (patt, RegexCompileFlags.OPTIMIZE, RegexMatchFlags.NOTEMPTY);
|
|
/* is pattern is already a regex? */
|
|
if (Regex.match_simple ("^/.*[\\^\\$\\*].*/$", patt,
|
|
RegexCompileFlags.UNGREEDY, RegexMatchFlags.NOTEMPTY)
|
|
|| (opts != null && opts.contains ("whitelist"))) {
|
|
if (debug_parse)
|
|
stdout.printf ("patt: %s\n", patt);
|
|
if (opts.contains ("whitelist"))
|
|
this.whitelist.insert (patt, regex);
|
|
else
|
|
this.pattern.insert (patt, regex);
|
|
this.optslist.insert (patt, opts);
|
|
return false;
|
|
} else { /* nope, no regex */
|
|
int pos = 0, len;
|
|
int signature_size = 8;
|
|
string sig;
|
|
len = patt.length;
|
|
|
|
/* chop up pattern into substrings for faster matching */
|
|
for (pos = len - signature_size; pos>=0; pos--)
|
|
{
|
|
sig = patt.offset (pos).ndup (signature_size);
|
|
/* we don't have a * nor \\, does not look like regex, save chunk as "key" */
|
|
if (!Regex.match_simple ("[\\*]", sig, RegexCompileFlags.UNGREEDY, RegexMatchFlags.NOTEMPTY) && keys.lookup (sig) == null) {
|
|
this.keys.insert (sig, regex);
|
|
this.optslist.insert (sig, opts);
|
|
} else {
|
|
/* starts with * or \\ - save as regex */
|
|
if ((sig.has_prefix ("*") || sig.has_prefix("\\")) && this.pattern.lookup (sig) == null) {
|
|
this.pattern.insert (sig, regex);
|
|
this.optslist.insert (sig, opts);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
catch (Error error) {
|
|
warning ("Adblock compile regexp: %s", error.message);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
public void parse_header (string header) throws Error {
|
|
/* Headers come in two forms
|
|
! Foo: Bar
|
|
! Some freeform text
|
|
*/
|
|
string key = header;
|
|
string value = "";
|
|
if (header.contains (":")) {
|
|
string[] parts = header.split (":", 2);
|
|
if (parts[0] != null && parts[0] != ""
|
|
&& parts[1] != null && parts[1] != "") {
|
|
key = parts[0].substring (2, -1);
|
|
value = parts[1].substring (1, -1);
|
|
}
|
|
}
|
|
debug ("Header '%s' says '%s'", key, value);
|
|
if (key == "Title")
|
|
title = value;
|
|
foreach (unowned Feature feature in features) {
|
|
if (feature.header (key, value))
|
|
break;
|
|
}
|
|
}
|
|
|
|
#if !HAVE_WEBKIT2
|
|
void download_status (ParamSpec pspec) {
|
|
if (download.get_status () != WebKit.DownloadStatus.FINISHED)
|
|
return;
|
|
|
|
download = null;
|
|
try {
|
|
parse ();
|
|
} catch (Error error) {
|
|
warning ("Error parsing %s: %s", uri, error.message);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
public void parse () throws Error
|
|
{
|
|
if (!active)
|
|
return;
|
|
|
|
debug ("Parsing %s (%s)", uri, path);
|
|
|
|
clear ();
|
|
|
|
if (uri.has_prefix ("file://"))
|
|
path = Filename.from_uri (uri);
|
|
else {
|
|
string cache_dir = GLib.Path.build_filename (GLib.Environment.get_user_cache_dir (), PACKAGE_NAME, "adblock");
|
|
Midori.Paths.mkdir_with_parents (cache_dir);
|
|
string filename = Checksum.compute_for_string (ChecksumType.MD5, this.uri, -1);
|
|
path = GLib.Path.build_filename (cache_dir, filename);
|
|
}
|
|
|
|
File filter_file = File.new_for_path (path);
|
|
DataInputStream stream;
|
|
try {
|
|
stream = new DataInputStream (filter_file.read ());
|
|
} catch (IOError.NOT_FOUND exist_error) {
|
|
#if HAVE_WEBKIT2
|
|
/* TODO */
|
|
#else
|
|
/* Don't bother trying to download local files */
|
|
if (!uri.has_prefix ("file://")) {
|
|
if (download != null)
|
|
return;
|
|
|
|
string destination_uri = Filename.to_uri (path, null);
|
|
debug ("Fetching %s to %s now", uri, destination_uri);
|
|
download = new WebKit.Download (new WebKit.NetworkRequest (uri));
|
|
if (!Midori.Download.has_enough_space (download, destination_uri, true))
|
|
throw new FileError.EXIST ("Can't download to \"%s\"", path);
|
|
download.destination_uri = destination_uri;
|
|
download.notify["status"].connect (download_status);
|
|
download.start ();
|
|
}
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
valid = false;
|
|
string? line;
|
|
while ((line = stream.read_line (null)) != null) {
|
|
if (line == null)
|
|
continue;
|
|
string chomped = line.chomp ();
|
|
if (chomped == "")
|
|
continue;
|
|
if (line[0] == '!')
|
|
parse_header (chomped);
|
|
else
|
|
parse_line (chomped);
|
|
/* The file isn't completely empty */
|
|
valid = true;
|
|
}
|
|
|
|
foreach (unowned Feature feature in features) {
|
|
if (!feature.parsed (filter_file))
|
|
valid = false;
|
|
}
|
|
}
|
|
|
|
public Directive? get_directive (string request_uri, string page_uri) {
|
|
try {
|
|
Directive? directive = cache.lookup (request_uri);
|
|
if (directive != null)
|
|
return directive;
|
|
foreach (unowned Feature feature in features) {
|
|
directive = feature.match (request_uri, page_uri);
|
|
if (directive != null) {
|
|
debug ("%s gave %s for %s (%s)\n",
|
|
feature.get_type ().name (), directive.to_string (), request_uri, page_uri);
|
|
return directive;
|
|
}
|
|
}
|
|
} catch (Error error) {
|
|
warning ("Adblock match error: %s\n", error.message);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public void add_rule (string rule) {
|
|
try {
|
|
var file = File.new_for_uri (uri);
|
|
file.append_to (FileCreateFlags.NONE).write (("%s\n".printf (rule)).data);
|
|
parse ();
|
|
} catch (Error error) {
|
|
warning ("Failed to add custom rule: %s", error.message);
|
|
}
|
|
}
|
|
}
|
|
}
|