midori/extensions/web-cache.c
Alexander Butenko 8ef3eeaa89 Set proper content types and work with temporary cache file fragments
With WebKit 1.1.11 and newer, we can sniff the content type which
is needed in some cases to load for instance Flash videos.

We are writing chunks into temporary files and commit completed
files to the cache, so that we don't end up serving partial files.

Old cache entries are removed before updating.

Length detection is improved.

File creation moved from got-chunk to got-headers.
2009-10-27 20:03:22 +01:00

594 lines
19 KiB
C

/*
Copyright (C) 2009 Christian Dywan <christian@twotoasts.de>
Copyright (C) 2009 Alexander Butenko <a.butenka@gmail.com>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
See the file COPYING for the full license text.
*/
#include <midori/midori.h>
#include <midori/sokoke.h>
#include "config.h"
#include <glib/gstdio.h>
#include <stdlib.h>
#if HAVE_UNISTD_H
#include <unistd.h>
#endif
static gboolean offline_mode = FALSE;
#define HAVE_WEBKIT_RESOURCE_REQUEST WEBKIT_CHECK_VERSION (1, 1, 14)
#define MAXLENGTH 1024 * 1024
static gchar*
web_cache_get_cached_path (MidoriExtension* extension,
const gchar* uri)
{
static const gchar* cache_path = NULL;
gchar* checksum;
gchar* folder;
gchar* sub_path;
gchar* encoded;
gchar* ext;
gchar* cached_filename;
gchar* cached_path;
if (!cache_path)
cache_path = midori_extension_get_string (extension, "path");
checksum = g_compute_checksum_for_string (G_CHECKSUM_MD5, uri, -1);
folder = g_strdup_printf ("%c%c", checksum[0], checksum[1]);
sub_path = g_build_path (G_DIR_SEPARATOR_S, cache_path, folder, NULL);
g_mkdir (sub_path, 0700);
g_free (folder);
encoded = soup_uri_encode (uri, "/");
ext = g_strdup (g_strrstr (encoded, "."));
/* Make sure ext isn't becoming too long */
if (ext && ext[0] && ext[1] && ext[2] && ext[3] && ext[4])
ext[4] = '\0';
cached_filename = g_strdup_printf ("%s%s", checksum, ext ? ext : "");
g_free (ext);
g_free (encoded);
g_free (checksum);
cached_path = g_build_filename (sub_path, cached_filename, NULL);
g_free (cached_filename);
return cached_path;
}
static gboolean
web_cache_replace_frame_uri (MidoriExtension* extension,
const gchar* uri,
WebKitWebFrame* web_frame)
{
gchar* filename;
gboolean handled = FALSE;
filename = web_cache_get_cached_path (extension, uri);
/* g_debug ("cache lookup: %s => %s", uri, filename); */
if (g_file_test (filename, G_FILE_TEST_EXISTS))
{
gchar* data;
g_file_get_contents (filename, &data, NULL, NULL);
webkit_web_frame_load_alternate_string (web_frame, data, NULL, uri);
g_free (data);
handled = TRUE;
}
g_free (filename);
return handled;
}
static gboolean
web_cache_navigation_decision_cb (WebKitWebView* web_view,
WebKitWebFrame* web_frame,
WebKitNetworkRequest* request,
WebKitWebNavigationAction* action,
WebKitWebPolicyDecision* decision,
MidoriExtension* extension)
{
const gchar* uri = webkit_network_request_get_uri (request);
if (!(uri && g_str_has_prefix (uri, "http://")))
return FALSE;
if (offline_mode == FALSE)
return FALSE;
return web_cache_replace_frame_uri (extension, uri, web_frame);
}
#if WEBKIT_CHECK_VERSION (1, 1, 6)
static gboolean
web_cache_load_error_cb (WebKitWebView* web_view,
WebKitWebFrame* web_frame,
const gchar* uri,
GError* error,
MidoriExtension* extension)
{
if (offline_mode == FALSE)
return FALSE;
if (!(uri && g_str_has_prefix (uri, "http://")))
return FALSE;
return web_cache_replace_frame_uri (extension, uri, web_frame);
}
#endif
static void
web_cache_save_headers (SoupMessage* msg,
gchar* filename)
{
gchar* dsc_filename = g_strdup_printf ("%s.dsc.tmp", filename);
SoupMessageHeaders* hdrs = msg->response_headers;
SoupMessageHeadersIter iter;
const gchar* name, *value;
FILE* dscfd;
soup_message_headers_iter_init (&iter, hdrs);
dscfd = g_fopen (dsc_filename, "w");
while (soup_message_headers_iter_next (&iter, &name, &value))
g_fprintf (dscfd, "%s: %s\n", name, value);
fclose (dscfd);
g_free (dsc_filename);
}
GHashTable*
web_cache_get_headers (gchar* filename)
{
GHashTable* headers;
FILE* file;
gchar* dsc_filename;
headers = g_hash_table_new_full (g_str_hash, g_str_equal,
(GDestroyNotify)g_free,
(GDestroyNotify)g_free);
if (!filename)
return headers;
if (!g_file_test (filename, G_FILE_TEST_EXISTS))
return headers;
dsc_filename = g_strdup_printf ("%s.dsc", filename);
if (!g_file_test (dsc_filename, G_FILE_TEST_EXISTS))
{
g_free (dsc_filename);
return headers;
}
if ((file = g_fopen (dsc_filename, "r")))
{
gchar line[128];
while (fgets (line, 128, file))
{
if (line==NULL)
continue;
g_strchomp (line);
gchar** data;
data = g_strsplit (line, ":", 2);
if (data[0] && data[1])
g_hash_table_insert (headers, g_strdup (data[0]),
g_strdup (g_strchug (data[1])));
g_strfreev (data);
}
}
fclose (file);
/* g_hash_table_destroy (headers); */
g_free (dsc_filename);
return headers;
}
static void
web_cache_tmp_prepare (gchar* filename)
{
gchar* tmp_filename = g_strdup_printf ("%s.tmp", filename);
/* If load was interruped we are ending up with a partical cache file
FIXME: What if a page asks to download the same file more than once?
Seems then we are ending up with a broken cache again */
if (g_file_test (tmp_filename, G_FILE_TEST_EXISTS))
g_unlink (tmp_filename);
g_file_set_contents (tmp_filename, "", -1, NULL);
g_free (tmp_filename);
}
static void
web_cache_set_content_type (SoupMessage* msg,
SoupBuffer* buffer)
{
#if WEBKIT_CHECK_VERSION (1, 1, 15)
const char *ct;
SoupContentSniffer* sniffer = soup_content_sniffer_new ();
ct = soup_content_sniffer_sniff (sniffer, msg, buffer, NULL);
if (!ct)
ct = soup_message_headers_get_one (msg->response_headers, "Content-Type");
if (ct)
g_signal_emit_by_name (msg, "content-sniffed", ct, NULL);
#endif
}
static void
web_cache_message_finished_cb (SoupMessage* msg,
gchar* filename)
{
gchar* headers;
gchar* tmp_headers;
gchar* tmp_data;
headers = g_strdup_printf ("%s.dsc", filename);
tmp_headers = g_strdup_printf ("%s.dsc.tmp", filename);
tmp_data = g_strdup_printf ("%s.tmp", filename);
g_rename (tmp_data, filename);
g_rename (tmp_headers, headers);
g_free (headers);
g_free (tmp_headers);
g_free (tmp_data);
}
static void
web_cache_message_got_chunk_cb (SoupMessage* msg,
SoupBuffer* chunk,
gchar* filename)
{
GFile *file;
GOutputStream *stream;
gchar *tmp_filename;
if (!chunk->data || !chunk->length)
return;
tmp_filename = g_strdup_printf ("%s.tmp", filename);
file = g_file_new_for_path (tmp_filename);
if ((stream = (GOutputStream*)g_file_append_to (file, 0, NULL, NULL)))
{
g_output_stream_write (stream, chunk->data, chunk->length, NULL, NULL);
g_object_unref (stream);
}
g_object_unref (file);
g_free (tmp_filename);
}
static void
web_cache_message_rewrite (SoupMessage* msg,
gchar* filename)
{
GHashTable* cache_headers = web_cache_get_headers (filename);
GHashTableIter iter;
SoupBuffer *buffer;
gpointer key, value;
char *data;
gsize length;
soup_message_set_status (msg, SOUP_STATUS_OK);
g_hash_table_iter_init (&iter, cache_headers);
while (g_hash_table_iter_next (&iter, &key, &value))
soup_message_headers_replace (msg->response_headers, key, value);
g_signal_emit_by_name (msg, "got-headers", NULL);
msg->response_body = soup_message_body_new ();
g_file_get_contents (filename, &data, &length, NULL);
if (data && length)
{
buffer = soup_buffer_new (SOUP_MEMORY_TEMPORARY, data, length);
web_cache_set_content_type (msg, buffer);
soup_message_body_append_buffer (msg->response_body, buffer);
g_signal_emit_by_name (msg, "got-chunk", buffer, NULL);
soup_buffer_free (buffer);
}
soup_message_got_body (msg);
g_free (data);
#if 0
if (offline_mode == TRUE)
{
/* Workaroung for offline mode
FIXME: libsoup-CRITICAL **: queue_message: assertion `item != NULL' failed */
SoupSession *session = webkit_get_default_session ();
soup_session_requeue_message (session, msg);
}
soup_message_finished (msg);
#endif
}
static void
web_cache_mesage_got_headers_cb (SoupMessage* msg,
MidoriExtension* extension)
{
SoupURI* soup_uri = soup_message_get_uri (msg);
gchar* uri;
gchar* filename;
const gchar* nocache;
SoupMessageHeaders *hdrs = msg->response_headers;
/* Skip files downloaded by the user */
if (g_object_get_data (G_OBJECT (msg), "midori-web-cache-download"))
return;
/* Skip big files */
const char* cl = soup_message_headers_get_one (hdrs, "Content-Length");
if (cl && atoi (cl) > MAXLENGTH)
return;
nocache = soup_message_headers_get_one (hdrs, "Pragma");
if (!nocache)
nocache = soup_message_headers_get_one (hdrs, "Cache-Control");
if (nocache && g_regex_match_simple ("no-cache|no-store", nocache,
G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY))
{
return;
}
uri = soup_uri ? soup_uri_to_string (soup_uri, FALSE) : g_strdup ("");
filename = web_cache_get_cached_path (extension, uri);
if (msg->status_code == SOUP_STATUS_NOT_MODIFIED)
{
/* g_debug ("loading from cache: %s -> %s", uri, filename); */
g_signal_handlers_disconnect_by_func (msg,
web_cache_mesage_got_headers_cb, extension);
web_cache_message_rewrite (msg, filename);
g_free (filename);
}
else if (msg->status_code == SOUP_STATUS_OK)
{
/* g_debug ("updating cache: %s -> %s", uri, filename); */
web_cache_tmp_prepare (filename);
web_cache_save_headers (msg, filename);
g_signal_connect_data (msg, "got-chunk",
G_CALLBACK (web_cache_message_got_chunk_cb),
filename, (GClosureNotify)g_free, 0);
g_signal_connect (msg, "finished",
G_CALLBACK (web_cache_message_finished_cb), filename);
}
g_free (uri);
}
#if HAVE_WEBKIT_RESOURCE_REQUEST
static void
web_cache_resource_request_starting_cb (WebKitWebView* web_view,
WebKitWebFrame* web_frame,
WebKitWebResource* web_resource,
WebKitNetworkRequest* request,
WebKitNetworkResponse* response,
MidoriExtension* extension)
{
const gchar* uri;
gchar* filename;
/* TODO: Good place to check are we offline */
uri = webkit_network_request_get_uri (request);
if (!(uri && g_str_has_prefix (uri, "http://")))
return;
if (offline_mode == FALSE)
return;
filename = web_cache_get_cached_path (extension, uri);
/* g_debug ("loading %s -> %s",uri, filename); */
if (!g_file_test (filename, G_FILE_TEST_EXISTS))
{
g_free (filename);
return;
}
if (!(g_strcmp0 (uri, webkit_web_frame_get_uri (web_frame))
&& g_strcmp0 (webkit_web_data_source_get_unreachable_uri (
webkit_web_frame_get_data_source (web_frame)), uri)))
{
web_cache_replace_frame_uri (extension, uri, web_frame);
g_free (filename);
return;
}
gchar* file_uri = g_filename_to_uri (filename, NULL, NULL);
webkit_network_request_set_uri (request, file_uri);
g_free (file_uri);
g_free (filename);
}
#endif
static void
web_cache_session_request_queued_cb (SoupSession* session,
SoupMessage* msg,
MidoriExtension* extension)
{
/*FIXME: Should we need to free soupuri? */
SoupURI* soup_uri = soup_message_get_uri (msg);
gchar* uri = soup_uri ? soup_uri_to_string (soup_uri, FALSE) : g_strdup ("");
/* For now we are handling only online mode here */
if (offline_mode == TRUE)
return;
if (g_str_has_prefix (uri, "http") && !g_strcmp0 (msg->method, "GET"))
{
gchar* filename = web_cache_get_cached_path (extension, uri);
if (offline_mode == FALSE)
{
GHashTable* cache_headers;
gchar* etag;
gchar* last_modified;
cache_headers = web_cache_get_headers (filename);
etag = g_hash_table_lookup (cache_headers, "ETag");
last_modified = g_hash_table_lookup (cache_headers, "Last-Modified");
if (etag)
soup_message_headers_append (msg->request_headers,
"If-None-Match", etag);
if (last_modified)
soup_message_headers_replace (msg->request_headers,
"If-Modified-Since", last_modified);
/* FIXME: Do we need to disconnect signal after we are in unqueue? */
g_signal_connect (msg, "got-headers",
G_CALLBACK (web_cache_mesage_got_headers_cb), extension);
g_free (etag);
g_free (last_modified);
g_free (filename);
/* FIXME: uncoment this is leading to a crash
g_hash_table_destroy (cache_headers); */
return;
}
/*
else
{
g_debug("queued in offline mode: %s -> %s", uri, filename);
if (g_file_test (filename, G_FILE_TEST_EXISTS))
{
soup_message_set_status (msg, SOUP_STATUS_NOT_MODIFIED);
web_cache_message_rewrite (msg, filename);
}
}
*/
g_free (filename);
}
g_free (uri);
}
static void
web_cache_add_tab_cb (MidoriBrowser* browser,
MidoriView* view,
MidoriExtension* extension)
{
GtkWidget* web_view = gtk_bin_get_child (GTK_BIN (view));
g_signal_connect (web_view, "navigation-policy-decision-requested",
G_CALLBACK (web_cache_navigation_decision_cb), extension);
#if WEBKIT_CHECK_VERSION (1, 1, 6)
g_signal_connect (web_view, "load-error",
G_CALLBACK (web_cache_load_error_cb), extension);
#endif
#if HAVE_WEBKIT_RESOURCE_REQUEST
g_signal_connect (web_view, "resource-request-starting",
G_CALLBACK (web_cache_resource_request_starting_cb), extension);
#endif
}
#if WEBKIT_CHECK_VERSION (1, 1, 3)
static void
web_cache_add_download_cb (MidoriBrowser* browser,
WebKitDownload* download,
MidoriExtension* extension)
{
WebKitNetworkRequest* request = webkit_download_get_network_request (download);
SoupMessage* msg = webkit_network_request_get_message (request);
if (msg)
g_object_set_data (G_OBJECT (msg), "midori-web-cache-download",
(gpointer)0xdeadbeef);
}
#endif
static void
web_cache_deactivate_cb (MidoriExtension* extension,
MidoriBrowser* browser);
static void
web_cache_add_tab_foreach_cb (MidoriView* view,
MidoriBrowser* browser,
MidoriExtension* extension)
{
web_cache_add_tab_cb (browser, view, extension);
}
static void
web_cache_app_add_browser_cb (MidoriApp* app,
MidoriBrowser* browser,
MidoriExtension* extension)
{
midori_browser_foreach (browser,
(GtkCallback)web_cache_add_tab_foreach_cb, extension);
g_signal_connect (browser, "add-tab",
G_CALLBACK (web_cache_add_tab_cb), extension);
#if WEBKIT_CHECK_VERSION (1, 1, 3)
g_signal_connect (browser, "add-download",
G_CALLBACK (web_cache_add_download_cb), extension);
#endif
g_signal_connect (extension, "deactivate",
G_CALLBACK (web_cache_deactivate_cb), browser);
}
static void
web_cache_deactivate_tabs (MidoriView* view,
MidoriExtension* extension)
{
GtkWidget* web_view = gtk_bin_get_child (GTK_BIN (view));
#if HAVE_WEBKIT_RESOURCE_REQUEST
g_signal_handlers_disconnect_by_func (
web_view, web_cache_resource_request_starting_cb, extension);
#endif
}
static void
web_cache_deactivate_cb (MidoriExtension* extension,
MidoriBrowser* browser)
{
MidoriApp* app = midori_extension_get_app (extension);
SoupSession* session = webkit_get_default_session ();
g_signal_handlers_disconnect_by_func (
session, web_cache_session_request_queued_cb, extension);
g_signal_handlers_disconnect_by_func (
extension, web_cache_deactivate_cb, browser);
g_signal_handlers_disconnect_by_func (
app, web_cache_app_add_browser_cb, extension);
g_signal_handlers_disconnect_by_func (
browser, web_cache_add_tab_cb, extension);
#if WEBKIT_CHECK_VERSION (1, 1, 3)
g_signal_handlers_disconnect_by_func (
browser, web_cache_add_download_cb, extension);
#endif
midori_browser_foreach (browser, (GtkCallback)web_cache_deactivate_tabs, extension);
}
static void
web_cache_activate_cb (MidoriExtension* extension,
MidoriApp* app)
{
const gchar* cache_path = midori_extension_get_string (extension, "path");
KatzeArray* browsers;
MidoriBrowser* browser;
guint i;
SoupSession* session = webkit_get_default_session ();
katze_mkdir_with_parents (cache_path, 0700);
g_signal_connect (session, "request-queued",
G_CALLBACK (web_cache_session_request_queued_cb), extension);
browsers = katze_object_get_object (app, "browsers");
i = 0;
while ((browser = katze_array_get_nth_item (browsers, i++)))
web_cache_app_add_browser_cb (app, browser, extension);
g_signal_connect (app, "add-browser",
G_CALLBACK (web_cache_app_add_browser_cb), extension);
g_object_unref (browsers);
}
MidoriExtension*
extension_init (void)
{
gchar* cache_path = g_build_filename (g_get_user_cache_dir (),
PACKAGE_NAME, "web", NULL);
MidoriExtension* extension = g_object_new (MIDORI_TYPE_EXTENSION,
"name", _("Web Cache"),
"description", _("Cache HTTP communication on disk"),
"version", "0.1",
"authors", "Christian Dywan <christian@twotoasts.de>",
NULL);
midori_extension_install_string (extension, "path", cache_path);
midori_extension_install_integer (extension, "size", 50);
g_free (cache_path);
g_signal_connect (extension, "activate",
G_CALLBACK (web_cache_activate_cb), NULL);
return extension;
}