8ef3eeaa89
With WebKit 1.1.11 and newer, we can sniff the content type which is needed in some cases to load for instance Flash videos. We are writing chunks into temporary files and commit completed files to the cache, so that we don't end up serving partial files. Old cache entries are removed before updating. Length detection is improved. File creation moved from got-chunk to got-headers.
594 lines
19 KiB
C
594 lines
19 KiB
C
/*
|
|
Copyright (C) 2009 Christian Dywan <christian@twotoasts.de>
|
|
Copyright (C) 2009 Alexander Butenko <a.butenka@gmail.com>
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
See the file COPYING for the full license text.
|
|
*/
|
|
|
|
#include <midori/midori.h>
|
|
|
|
#include <midori/sokoke.h>
|
|
#include "config.h"
|
|
|
|
#include <glib/gstdio.h>
|
|
#include <stdlib.h>
|
|
#if HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
static gboolean offline_mode = FALSE;
|
|
#define HAVE_WEBKIT_RESOURCE_REQUEST WEBKIT_CHECK_VERSION (1, 1, 14)
|
|
#define MAXLENGTH 1024 * 1024
|
|
|
|
static gchar*
|
|
web_cache_get_cached_path (MidoriExtension* extension,
|
|
const gchar* uri)
|
|
{
|
|
static const gchar* cache_path = NULL;
|
|
gchar* checksum;
|
|
gchar* folder;
|
|
gchar* sub_path;
|
|
gchar* encoded;
|
|
gchar* ext;
|
|
gchar* cached_filename;
|
|
gchar* cached_path;
|
|
|
|
if (!cache_path)
|
|
cache_path = midori_extension_get_string (extension, "path");
|
|
checksum = g_compute_checksum_for_string (G_CHECKSUM_MD5, uri, -1);
|
|
folder = g_strdup_printf ("%c%c", checksum[0], checksum[1]);
|
|
sub_path = g_build_path (G_DIR_SEPARATOR_S, cache_path, folder, NULL);
|
|
g_mkdir (sub_path, 0700);
|
|
g_free (folder);
|
|
|
|
encoded = soup_uri_encode (uri, "/");
|
|
ext = g_strdup (g_strrstr (encoded, "."));
|
|
/* Make sure ext isn't becoming too long */
|
|
if (ext && ext[0] && ext[1] && ext[2] && ext[3] && ext[4])
|
|
ext[4] = '\0';
|
|
cached_filename = g_strdup_printf ("%s%s", checksum, ext ? ext : "");
|
|
g_free (ext);
|
|
g_free (encoded);
|
|
g_free (checksum);
|
|
cached_path = g_build_filename (sub_path, cached_filename, NULL);
|
|
g_free (cached_filename);
|
|
return cached_path;
|
|
}
|
|
|
|
static gboolean
|
|
web_cache_replace_frame_uri (MidoriExtension* extension,
|
|
const gchar* uri,
|
|
WebKitWebFrame* web_frame)
|
|
{
|
|
gchar* filename;
|
|
gboolean handled = FALSE;
|
|
|
|
filename = web_cache_get_cached_path (extension, uri);
|
|
|
|
/* g_debug ("cache lookup: %s => %s", uri, filename); */
|
|
|
|
if (g_file_test (filename, G_FILE_TEST_EXISTS))
|
|
{
|
|
gchar* data;
|
|
g_file_get_contents (filename, &data, NULL, NULL);
|
|
webkit_web_frame_load_alternate_string (web_frame, data, NULL, uri);
|
|
g_free (data);
|
|
handled = TRUE;
|
|
}
|
|
|
|
g_free (filename);
|
|
return handled;
|
|
}
|
|
|
|
static gboolean
|
|
web_cache_navigation_decision_cb (WebKitWebView* web_view,
|
|
WebKitWebFrame* web_frame,
|
|
WebKitNetworkRequest* request,
|
|
WebKitWebNavigationAction* action,
|
|
WebKitWebPolicyDecision* decision,
|
|
MidoriExtension* extension)
|
|
{
|
|
const gchar* uri = webkit_network_request_get_uri (request);
|
|
if (!(uri && g_str_has_prefix (uri, "http://")))
|
|
return FALSE;
|
|
if (offline_mode == FALSE)
|
|
return FALSE;
|
|
|
|
return web_cache_replace_frame_uri (extension, uri, web_frame);
|
|
}
|
|
|
|
#if WEBKIT_CHECK_VERSION (1, 1, 6)
|
|
static gboolean
|
|
web_cache_load_error_cb (WebKitWebView* web_view,
|
|
WebKitWebFrame* web_frame,
|
|
const gchar* uri,
|
|
GError* error,
|
|
MidoriExtension* extension)
|
|
{
|
|
if (offline_mode == FALSE)
|
|
return FALSE;
|
|
if (!(uri && g_str_has_prefix (uri, "http://")))
|
|
return FALSE;
|
|
|
|
return web_cache_replace_frame_uri (extension, uri, web_frame);
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
web_cache_save_headers (SoupMessage* msg,
|
|
gchar* filename)
|
|
{
|
|
gchar* dsc_filename = g_strdup_printf ("%s.dsc.tmp", filename);
|
|
SoupMessageHeaders* hdrs = msg->response_headers;
|
|
SoupMessageHeadersIter iter;
|
|
const gchar* name, *value;
|
|
FILE* dscfd;
|
|
|
|
soup_message_headers_iter_init (&iter, hdrs);
|
|
dscfd = g_fopen (dsc_filename, "w");
|
|
while (soup_message_headers_iter_next (&iter, &name, &value))
|
|
g_fprintf (dscfd, "%s: %s\n", name, value);
|
|
fclose (dscfd);
|
|
|
|
g_free (dsc_filename);
|
|
}
|
|
|
|
GHashTable*
|
|
web_cache_get_headers (gchar* filename)
|
|
{
|
|
GHashTable* headers;
|
|
FILE* file;
|
|
gchar* dsc_filename;
|
|
|
|
headers = g_hash_table_new_full (g_str_hash, g_str_equal,
|
|
(GDestroyNotify)g_free,
|
|
(GDestroyNotify)g_free);
|
|
|
|
if (!filename)
|
|
return headers;
|
|
if (!g_file_test (filename, G_FILE_TEST_EXISTS))
|
|
return headers;
|
|
|
|
dsc_filename = g_strdup_printf ("%s.dsc", filename);
|
|
if (!g_file_test (dsc_filename, G_FILE_TEST_EXISTS))
|
|
{
|
|
g_free (dsc_filename);
|
|
return headers;
|
|
}
|
|
if ((file = g_fopen (dsc_filename, "r")))
|
|
{
|
|
gchar line[128];
|
|
while (fgets (line, 128, file))
|
|
{
|
|
if (line==NULL)
|
|
continue;
|
|
g_strchomp (line);
|
|
gchar** data;
|
|
data = g_strsplit (line, ":", 2);
|
|
if (data[0] && data[1])
|
|
g_hash_table_insert (headers, g_strdup (data[0]),
|
|
g_strdup (g_strchug (data[1])));
|
|
g_strfreev (data);
|
|
}
|
|
}
|
|
fclose (file);
|
|
/* g_hash_table_destroy (headers); */
|
|
g_free (dsc_filename);
|
|
return headers;
|
|
}
|
|
|
|
static void
|
|
web_cache_tmp_prepare (gchar* filename)
|
|
{
|
|
gchar* tmp_filename = g_strdup_printf ("%s.tmp", filename);
|
|
|
|
/* If load was interruped we are ending up with a partical cache file
|
|
FIXME: What if a page asks to download the same file more than once?
|
|
Seems then we are ending up with a broken cache again */
|
|
if (g_file_test (tmp_filename, G_FILE_TEST_EXISTS))
|
|
g_unlink (tmp_filename);
|
|
g_file_set_contents (tmp_filename, "", -1, NULL);
|
|
g_free (tmp_filename);
|
|
}
|
|
|
|
static void
|
|
web_cache_set_content_type (SoupMessage* msg,
|
|
SoupBuffer* buffer)
|
|
{
|
|
#if WEBKIT_CHECK_VERSION (1, 1, 15)
|
|
const char *ct;
|
|
SoupContentSniffer* sniffer = soup_content_sniffer_new ();
|
|
ct = soup_content_sniffer_sniff (sniffer, msg, buffer, NULL);
|
|
if (!ct)
|
|
ct = soup_message_headers_get_one (msg->response_headers, "Content-Type");
|
|
if (ct)
|
|
g_signal_emit_by_name (msg, "content-sniffed", ct, NULL);
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
web_cache_message_finished_cb (SoupMessage* msg,
|
|
gchar* filename)
|
|
{
|
|
gchar* headers;
|
|
gchar* tmp_headers;
|
|
gchar* tmp_data;
|
|
|
|
headers = g_strdup_printf ("%s.dsc", filename);
|
|
tmp_headers = g_strdup_printf ("%s.dsc.tmp", filename);
|
|
tmp_data = g_strdup_printf ("%s.tmp", filename);
|
|
|
|
g_rename (tmp_data, filename);
|
|
g_rename (tmp_headers, headers);
|
|
|
|
g_free (headers);
|
|
g_free (tmp_headers);
|
|
g_free (tmp_data);
|
|
}
|
|
|
|
static void
|
|
web_cache_message_got_chunk_cb (SoupMessage* msg,
|
|
SoupBuffer* chunk,
|
|
gchar* filename)
|
|
{
|
|
GFile *file;
|
|
GOutputStream *stream;
|
|
gchar *tmp_filename;
|
|
|
|
if (!chunk->data || !chunk->length)
|
|
return;
|
|
|
|
tmp_filename = g_strdup_printf ("%s.tmp", filename);
|
|
file = g_file_new_for_path (tmp_filename);
|
|
if ((stream = (GOutputStream*)g_file_append_to (file, 0, NULL, NULL)))
|
|
{
|
|
g_output_stream_write (stream, chunk->data, chunk->length, NULL, NULL);
|
|
g_object_unref (stream);
|
|
}
|
|
g_object_unref (file);
|
|
g_free (tmp_filename);
|
|
}
|
|
|
|
static void
|
|
web_cache_message_rewrite (SoupMessage* msg,
|
|
gchar* filename)
|
|
{
|
|
GHashTable* cache_headers = web_cache_get_headers (filename);
|
|
GHashTableIter iter;
|
|
SoupBuffer *buffer;
|
|
gpointer key, value;
|
|
char *data;
|
|
gsize length;
|
|
|
|
soup_message_set_status (msg, SOUP_STATUS_OK);
|
|
g_hash_table_iter_init (&iter, cache_headers);
|
|
while (g_hash_table_iter_next (&iter, &key, &value))
|
|
soup_message_headers_replace (msg->response_headers, key, value);
|
|
g_signal_emit_by_name (msg, "got-headers", NULL);
|
|
|
|
msg->response_body = soup_message_body_new ();
|
|
g_file_get_contents (filename, &data, &length, NULL);
|
|
if (data && length)
|
|
{
|
|
buffer = soup_buffer_new (SOUP_MEMORY_TEMPORARY, data, length);
|
|
web_cache_set_content_type (msg, buffer);
|
|
soup_message_body_append_buffer (msg->response_body, buffer);
|
|
g_signal_emit_by_name (msg, "got-chunk", buffer, NULL);
|
|
soup_buffer_free (buffer);
|
|
}
|
|
soup_message_got_body (msg);
|
|
g_free (data);
|
|
|
|
#if 0
|
|
if (offline_mode == TRUE)
|
|
{
|
|
/* Workaroung for offline mode
|
|
FIXME: libsoup-CRITICAL **: queue_message: assertion `item != NULL' failed */
|
|
SoupSession *session = webkit_get_default_session ();
|
|
soup_session_requeue_message (session, msg);
|
|
}
|
|
soup_message_finished (msg);
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
web_cache_mesage_got_headers_cb (SoupMessage* msg,
|
|
MidoriExtension* extension)
|
|
{
|
|
SoupURI* soup_uri = soup_message_get_uri (msg);
|
|
gchar* uri;
|
|
gchar* filename;
|
|
const gchar* nocache;
|
|
SoupMessageHeaders *hdrs = msg->response_headers;
|
|
|
|
/* Skip files downloaded by the user */
|
|
if (g_object_get_data (G_OBJECT (msg), "midori-web-cache-download"))
|
|
return;
|
|
|
|
/* Skip big files */
|
|
const char* cl = soup_message_headers_get_one (hdrs, "Content-Length");
|
|
if (cl && atoi (cl) > MAXLENGTH)
|
|
return;
|
|
|
|
nocache = soup_message_headers_get_one (hdrs, "Pragma");
|
|
if (!nocache)
|
|
nocache = soup_message_headers_get_one (hdrs, "Cache-Control");
|
|
if (nocache && g_regex_match_simple ("no-cache|no-store", nocache,
|
|
G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY))
|
|
{
|
|
return;
|
|
}
|
|
|
|
uri = soup_uri ? soup_uri_to_string (soup_uri, FALSE) : g_strdup ("");
|
|
filename = web_cache_get_cached_path (extension, uri);
|
|
if (msg->status_code == SOUP_STATUS_NOT_MODIFIED)
|
|
{
|
|
/* g_debug ("loading from cache: %s -> %s", uri, filename); */
|
|
g_signal_handlers_disconnect_by_func (msg,
|
|
web_cache_mesage_got_headers_cb, extension);
|
|
web_cache_message_rewrite (msg, filename);
|
|
g_free (filename);
|
|
}
|
|
else if (msg->status_code == SOUP_STATUS_OK)
|
|
{
|
|
/* g_debug ("updating cache: %s -> %s", uri, filename); */
|
|
web_cache_tmp_prepare (filename);
|
|
web_cache_save_headers (msg, filename);
|
|
g_signal_connect_data (msg, "got-chunk",
|
|
G_CALLBACK (web_cache_message_got_chunk_cb),
|
|
filename, (GClosureNotify)g_free, 0);
|
|
g_signal_connect (msg, "finished",
|
|
G_CALLBACK (web_cache_message_finished_cb), filename);
|
|
}
|
|
g_free (uri);
|
|
}
|
|
|
|
#if HAVE_WEBKIT_RESOURCE_REQUEST
|
|
static void
|
|
web_cache_resource_request_starting_cb (WebKitWebView* web_view,
|
|
WebKitWebFrame* web_frame,
|
|
WebKitWebResource* web_resource,
|
|
WebKitNetworkRequest* request,
|
|
WebKitNetworkResponse* response,
|
|
MidoriExtension* extension)
|
|
{
|
|
const gchar* uri;
|
|
gchar* filename;
|
|
/* TODO: Good place to check are we offline */
|
|
uri = webkit_network_request_get_uri (request);
|
|
if (!(uri && g_str_has_prefix (uri, "http://")))
|
|
return;
|
|
|
|
if (offline_mode == FALSE)
|
|
return;
|
|
|
|
filename = web_cache_get_cached_path (extension, uri);
|
|
/* g_debug ("loading %s -> %s",uri, filename); */
|
|
if (!g_file_test (filename, G_FILE_TEST_EXISTS))
|
|
{
|
|
g_free (filename);
|
|
return;
|
|
}
|
|
|
|
if (!(g_strcmp0 (uri, webkit_web_frame_get_uri (web_frame))
|
|
&& g_strcmp0 (webkit_web_data_source_get_unreachable_uri (
|
|
webkit_web_frame_get_data_source (web_frame)), uri)))
|
|
{
|
|
web_cache_replace_frame_uri (extension, uri, web_frame);
|
|
g_free (filename);
|
|
return;
|
|
}
|
|
|
|
gchar* file_uri = g_filename_to_uri (filename, NULL, NULL);
|
|
webkit_network_request_set_uri (request, file_uri);
|
|
|
|
g_free (file_uri);
|
|
g_free (filename);
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
web_cache_session_request_queued_cb (SoupSession* session,
|
|
SoupMessage* msg,
|
|
MidoriExtension* extension)
|
|
{
|
|
/*FIXME: Should we need to free soupuri? */
|
|
SoupURI* soup_uri = soup_message_get_uri (msg);
|
|
gchar* uri = soup_uri ? soup_uri_to_string (soup_uri, FALSE) : g_strdup ("");
|
|
|
|
/* For now we are handling only online mode here */
|
|
if (offline_mode == TRUE)
|
|
return;
|
|
|
|
if (g_str_has_prefix (uri, "http") && !g_strcmp0 (msg->method, "GET"))
|
|
{
|
|
gchar* filename = web_cache_get_cached_path (extension, uri);
|
|
if (offline_mode == FALSE)
|
|
{
|
|
GHashTable* cache_headers;
|
|
gchar* etag;
|
|
gchar* last_modified;
|
|
|
|
cache_headers = web_cache_get_headers (filename);
|
|
etag = g_hash_table_lookup (cache_headers, "ETag");
|
|
last_modified = g_hash_table_lookup (cache_headers, "Last-Modified");
|
|
if (etag)
|
|
soup_message_headers_append (msg->request_headers,
|
|
"If-None-Match", etag);
|
|
if (last_modified)
|
|
soup_message_headers_replace (msg->request_headers,
|
|
"If-Modified-Since", last_modified);
|
|
/* FIXME: Do we need to disconnect signal after we are in unqueue? */
|
|
g_signal_connect (msg, "got-headers",
|
|
G_CALLBACK (web_cache_mesage_got_headers_cb), extension);
|
|
|
|
g_free (etag);
|
|
g_free (last_modified);
|
|
g_free (filename);
|
|
/* FIXME: uncoment this is leading to a crash
|
|
g_hash_table_destroy (cache_headers); */
|
|
return;
|
|
}
|
|
/*
|
|
else
|
|
{
|
|
g_debug("queued in offline mode: %s -> %s", uri, filename);
|
|
if (g_file_test (filename, G_FILE_TEST_EXISTS))
|
|
{
|
|
soup_message_set_status (msg, SOUP_STATUS_NOT_MODIFIED);
|
|
web_cache_message_rewrite (msg, filename);
|
|
}
|
|
}
|
|
*/
|
|
g_free (filename);
|
|
}
|
|
g_free (uri);
|
|
}
|
|
|
|
static void
|
|
web_cache_add_tab_cb (MidoriBrowser* browser,
|
|
MidoriView* view,
|
|
MidoriExtension* extension)
|
|
{
|
|
GtkWidget* web_view = gtk_bin_get_child (GTK_BIN (view));
|
|
g_signal_connect (web_view, "navigation-policy-decision-requested",
|
|
G_CALLBACK (web_cache_navigation_decision_cb), extension);
|
|
|
|
#if WEBKIT_CHECK_VERSION (1, 1, 6)
|
|
g_signal_connect (web_view, "load-error",
|
|
G_CALLBACK (web_cache_load_error_cb), extension);
|
|
#endif
|
|
|
|
#if HAVE_WEBKIT_RESOURCE_REQUEST
|
|
g_signal_connect (web_view, "resource-request-starting",
|
|
G_CALLBACK (web_cache_resource_request_starting_cb), extension);
|
|
#endif
|
|
}
|
|
|
|
#if WEBKIT_CHECK_VERSION (1, 1, 3)
|
|
static void
|
|
web_cache_add_download_cb (MidoriBrowser* browser,
|
|
WebKitDownload* download,
|
|
MidoriExtension* extension)
|
|
{
|
|
WebKitNetworkRequest* request = webkit_download_get_network_request (download);
|
|
SoupMessage* msg = webkit_network_request_get_message (request);
|
|
if (msg)
|
|
g_object_set_data (G_OBJECT (msg), "midori-web-cache-download",
|
|
(gpointer)0xdeadbeef);
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
web_cache_deactivate_cb (MidoriExtension* extension,
|
|
MidoriBrowser* browser);
|
|
|
|
static void
|
|
web_cache_add_tab_foreach_cb (MidoriView* view,
|
|
MidoriBrowser* browser,
|
|
MidoriExtension* extension)
|
|
{
|
|
web_cache_add_tab_cb (browser, view, extension);
|
|
}
|
|
|
|
static void
|
|
web_cache_app_add_browser_cb (MidoriApp* app,
|
|
MidoriBrowser* browser,
|
|
MidoriExtension* extension)
|
|
{
|
|
midori_browser_foreach (browser,
|
|
(GtkCallback)web_cache_add_tab_foreach_cb, extension);
|
|
g_signal_connect (browser, "add-tab",
|
|
G_CALLBACK (web_cache_add_tab_cb), extension);
|
|
#if WEBKIT_CHECK_VERSION (1, 1, 3)
|
|
g_signal_connect (browser, "add-download",
|
|
G_CALLBACK (web_cache_add_download_cb), extension);
|
|
#endif
|
|
g_signal_connect (extension, "deactivate",
|
|
G_CALLBACK (web_cache_deactivate_cb), browser);
|
|
}
|
|
|
|
static void
|
|
web_cache_deactivate_tabs (MidoriView* view,
|
|
MidoriExtension* extension)
|
|
{
|
|
GtkWidget* web_view = gtk_bin_get_child (GTK_BIN (view));
|
|
|
|
#if HAVE_WEBKIT_RESOURCE_REQUEST
|
|
g_signal_handlers_disconnect_by_func (
|
|
web_view, web_cache_resource_request_starting_cb, extension);
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
web_cache_deactivate_cb (MidoriExtension* extension,
|
|
MidoriBrowser* browser)
|
|
{
|
|
MidoriApp* app = midori_extension_get_app (extension);
|
|
SoupSession* session = webkit_get_default_session ();
|
|
|
|
g_signal_handlers_disconnect_by_func (
|
|
session, web_cache_session_request_queued_cb, extension);
|
|
g_signal_handlers_disconnect_by_func (
|
|
extension, web_cache_deactivate_cb, browser);
|
|
g_signal_handlers_disconnect_by_func (
|
|
app, web_cache_app_add_browser_cb, extension);
|
|
g_signal_handlers_disconnect_by_func (
|
|
browser, web_cache_add_tab_cb, extension);
|
|
#if WEBKIT_CHECK_VERSION (1, 1, 3)
|
|
g_signal_handlers_disconnect_by_func (
|
|
browser, web_cache_add_download_cb, extension);
|
|
#endif
|
|
midori_browser_foreach (browser, (GtkCallback)web_cache_deactivate_tabs, extension);
|
|
}
|
|
|
|
static void
|
|
web_cache_activate_cb (MidoriExtension* extension,
|
|
MidoriApp* app)
|
|
{
|
|
const gchar* cache_path = midori_extension_get_string (extension, "path");
|
|
KatzeArray* browsers;
|
|
MidoriBrowser* browser;
|
|
guint i;
|
|
SoupSession* session = webkit_get_default_session ();
|
|
|
|
katze_mkdir_with_parents (cache_path, 0700);
|
|
g_signal_connect (session, "request-queued",
|
|
G_CALLBACK (web_cache_session_request_queued_cb), extension);
|
|
|
|
browsers = katze_object_get_object (app, "browsers");
|
|
i = 0;
|
|
while ((browser = katze_array_get_nth_item (browsers, i++)))
|
|
web_cache_app_add_browser_cb (app, browser, extension);
|
|
g_signal_connect (app, "add-browser",
|
|
G_CALLBACK (web_cache_app_add_browser_cb), extension);
|
|
|
|
g_object_unref (browsers);
|
|
}
|
|
|
|
MidoriExtension*
|
|
extension_init (void)
|
|
{
|
|
gchar* cache_path = g_build_filename (g_get_user_cache_dir (),
|
|
PACKAGE_NAME, "web", NULL);
|
|
MidoriExtension* extension = g_object_new (MIDORI_TYPE_EXTENSION,
|
|
"name", _("Web Cache"),
|
|
"description", _("Cache HTTP communication on disk"),
|
|
"version", "0.1",
|
|
"authors", "Christian Dywan <christian@twotoasts.de>",
|
|
NULL);
|
|
midori_extension_install_string (extension, "path", cache_path);
|
|
midori_extension_install_integer (extension, "size", 50);
|
|
|
|
g_free (cache_path);
|
|
|
|
g_signal_connect (extension, "activate",
|
|
G_CALLBACK (web_cache_activate_cb), NULL);
|
|
|
|
return extension;
|
|
}
|