Set proper content types and work with temporary cache file fragments

With WebKit 1.1.11 and newer, we can sniff the content type which
is needed in some cases to load for instance Flash videos.

We are writing chunks into temporary files and commit completed
files to the cache, so that we don't end up serving partial files.

Old cache entries are removed before updating.

Length detection is improved.

File creation moved from got-chunk to got-headers.
This commit is contained in:
Alexander Butenko 2009-10-27 20:03:22 +01:00 committed by Christian Dywan
parent f0e7b93964
commit 8ef3eeaa89

View file

@ -16,6 +16,7 @@
#include "config.h" #include "config.h"
#include <glib/gstdio.h> #include <glib/gstdio.h>
#include <stdlib.h>
#if HAVE_UNISTD_H #if HAVE_UNISTD_H
#include <unistd.h> #include <unistd.h>
#endif #endif
@ -122,14 +123,14 @@ static void
web_cache_save_headers (SoupMessage* msg, web_cache_save_headers (SoupMessage* msg,
gchar* filename) gchar* filename)
{ {
gchar* dsc_filename = g_strdup_printf ("%s.dsc", filename); gchar* dsc_filename = g_strdup_printf ("%s.dsc.tmp", filename);
SoupMessageHeaders* hdrs = msg->response_headers; SoupMessageHeaders* hdrs = msg->response_headers;
SoupMessageHeadersIter iter; SoupMessageHeadersIter iter;
const gchar* name, *value; const gchar* name, *value;
FILE* dscfd; FILE* dscfd;
soup_message_headers_iter_init (&iter, hdrs); soup_message_headers_iter_init (&iter, hdrs);
dscfd = g_fopen (dsc_filename,"w+"); dscfd = g_fopen (dsc_filename, "w");
while (soup_message_headers_iter_next (&iter, &name, &value)) while (soup_message_headers_iter_next (&iter, &name, &value))
g_fprintf (dscfd, "%s: %s\n", name, value); g_fprintf (dscfd, "%s: %s\n", name, value);
fclose (dscfd); fclose (dscfd);
@ -181,6 +182,55 @@ web_cache_get_headers (gchar* filename)
return headers; return headers;
} }
static void
web_cache_tmp_prepare (gchar* filename)
{
gchar* tmp_filename = g_strdup_printf ("%s.tmp", filename);
/* If load was interruped we are ending up with a partical cache file
FIXME: What if a page asks to download the same file more than once?
Seems then we are ending up with a broken cache again */
if (g_file_test (tmp_filename, G_FILE_TEST_EXISTS))
g_unlink (tmp_filename);
g_file_set_contents (tmp_filename, "", -1, NULL);
g_free (tmp_filename);
}
static void
web_cache_set_content_type (SoupMessage* msg,
SoupBuffer* buffer)
{
#if WEBKIT_CHECK_VERSION (1, 1, 15)
const char *ct;
SoupContentSniffer* sniffer = soup_content_sniffer_new ();
ct = soup_content_sniffer_sniff (sniffer, msg, buffer, NULL);
if (!ct)
ct = soup_message_headers_get_one (msg->response_headers, "Content-Type");
if (ct)
g_signal_emit_by_name (msg, "content-sniffed", ct, NULL);
#endif
}
static void
web_cache_message_finished_cb (SoupMessage* msg,
gchar* filename)
{
gchar* headers;
gchar* tmp_headers;
gchar* tmp_data;
headers = g_strdup_printf ("%s.dsc", filename);
tmp_headers = g_strdup_printf ("%s.dsc.tmp", filename);
tmp_data = g_strdup_printf ("%s.tmp", filename);
g_rename (tmp_data, filename);
g_rename (tmp_headers, headers);
g_free (headers);
g_free (tmp_headers);
g_free (tmp_data);
}
static void static void
web_cache_message_got_chunk_cb (SoupMessage* msg, web_cache_message_got_chunk_cb (SoupMessage* msg,
SoupBuffer* chunk, SoupBuffer* chunk,
@ -188,20 +238,20 @@ web_cache_message_got_chunk_cb (SoupMessage* msg,
{ {
GFile *file; GFile *file;
GOutputStream *stream; GOutputStream *stream;
gchar *tmp_filename;
if (!chunk->data || !chunk->length) if (!chunk->data || !chunk->length)
return; return;
if (!(g_file_test (filename, G_FILE_TEST_EXISTS))) tmp_filename = g_strdup_printf ("%s.tmp", filename);
g_file_set_contents (filename, "", -1, NULL); file = g_file_new_for_path (tmp_filename);
file = g_file_new_for_path (filename);
if ((stream = (GOutputStream*)g_file_append_to (file, 0, NULL, NULL))) if ((stream = (GOutputStream*)g_file_append_to (file, 0, NULL, NULL)))
{ {
g_output_stream_write (stream, chunk->data, chunk->length, NULL, NULL); g_output_stream_write (stream, chunk->data, chunk->length, NULL, NULL);
g_object_unref (stream); g_object_unref (stream);
} }
g_object_unref (file); g_object_unref (file);
g_free (tmp_filename);
} }
static void static void
@ -215,7 +265,6 @@ web_cache_message_rewrite (SoupMessage* msg,
char *data; char *data;
gsize length; gsize length;
/* FIXME: Seems to open image in a new tab we need to set content-type separately */
soup_message_set_status (msg, SOUP_STATUS_OK); soup_message_set_status (msg, SOUP_STATUS_OK);
g_hash_table_iter_init (&iter, cache_headers); g_hash_table_iter_init (&iter, cache_headers);
while (g_hash_table_iter_next (&iter, &key, &value)) while (g_hash_table_iter_next (&iter, &key, &value))
@ -227,6 +276,7 @@ web_cache_message_rewrite (SoupMessage* msg,
if (data && length) if (data && length)
{ {
buffer = soup_buffer_new (SOUP_MEMORY_TEMPORARY, data, length); buffer = soup_buffer_new (SOUP_MEMORY_TEMPORARY, data, length);
web_cache_set_content_type (msg, buffer);
soup_message_body_append_buffer (msg->response_body, buffer); soup_message_body_append_buffer (msg->response_body, buffer);
g_signal_emit_by_name (msg, "got-chunk", buffer, NULL); g_signal_emit_by_name (msg, "got-chunk", buffer, NULL);
soup_buffer_free (buffer); soup_buffer_free (buffer);
@ -251,35 +301,31 @@ web_cache_mesage_got_headers_cb (SoupMessage* msg,
MidoriExtension* extension) MidoriExtension* extension)
{ {
SoupURI* soup_uri = soup_message_get_uri (msg); SoupURI* soup_uri = soup_message_get_uri (msg);
gchar* uri = soup_uri ? soup_uri_to_string (soup_uri, FALSE) : g_strdup (""); gchar* uri;
gchar* filename = web_cache_get_cached_path (extension, uri); gchar* filename;
const gchar* nocache; const gchar* nocache;
SoupMessageHeaders *hdrs = msg->response_headers; SoupMessageHeaders *hdrs = msg->response_headers;
gint length;
/* Skip files downloaded by the user */ /* Skip files downloaded by the user */
if (g_object_get_data (G_OBJECT (msg), "midori-web-cache-download")) if (g_object_get_data (G_OBJECT (msg), "midori-web-cache-download"))
return; return;
/* Skip big files */ /* Skip big files */
length = GPOINTER_TO_INT (soup_message_headers_get_one (hdrs, "Content-Length")); const char* cl = soup_message_headers_get_one (hdrs, "Content-Length");
if (length > MAXLENGTH) if (cl && atoi (cl) > MAXLENGTH)
return; return;
nocache = soup_message_headers_get_one (hdrs, "Pragma"); nocache = soup_message_headers_get_one (hdrs, "Pragma");
if (nocache == NULL) if (!nocache)
nocache = soup_message_headers_get_one (hdrs, "Cache-Control"); nocache = soup_message_headers_get_one (hdrs, "Cache-Control");
if (nocache) if (nocache && g_regex_match_simple ("no-cache|no-store", nocache,
G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY))
{ {
if (g_regex_match_simple ("no-cache|no-store", nocache, return;
G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY))
{
g_free (uri);
g_free (filename);
return;
}
} }
uri = soup_uri ? soup_uri_to_string (soup_uri, FALSE) : g_strdup ("");
filename = web_cache_get_cached_path (extension, uri);
if (msg->status_code == SOUP_STATUS_NOT_MODIFIED) if (msg->status_code == SOUP_STATUS_NOT_MODIFIED)
{ {
/* g_debug ("loading from cache: %s -> %s", uri, filename); */ /* g_debug ("loading from cache: %s -> %s", uri, filename); */
@ -291,11 +337,13 @@ web_cache_mesage_got_headers_cb (SoupMessage* msg,
else if (msg->status_code == SOUP_STATUS_OK) else if (msg->status_code == SOUP_STATUS_OK)
{ {
/* g_debug ("updating cache: %s -> %s", uri, filename); */ /* g_debug ("updating cache: %s -> %s", uri, filename); */
web_cache_tmp_prepare (filename);
web_cache_save_headers (msg, filename); web_cache_save_headers (msg, filename);
/* FIXME: Do we need to disconnect signal after we are in unqueue? */
g_signal_connect_data (msg, "got-chunk", g_signal_connect_data (msg, "got-chunk",
G_CALLBACK (web_cache_message_got_chunk_cb), G_CALLBACK (web_cache_message_got_chunk_cb),
filename, (GClosureNotify)g_free, 0); filename, (GClosureNotify)g_free, 0);
g_signal_connect (msg, "finished",
G_CALLBACK (web_cache_message_finished_cb), filename);
} }
g_free (uri); g_free (uri);
} }
@ -373,8 +421,8 @@ web_cache_session_request_queued_cb (SoupSession* session,
soup_message_headers_append (msg->request_headers, soup_message_headers_append (msg->request_headers,
"If-None-Match", etag); "If-None-Match", etag);
if (last_modified) if (last_modified)
soup_message_headers_append (msg->request_headers, soup_message_headers_replace (msg->request_headers,
"If-Modified-Since", last_modified); "If-Modified-Since", last_modified);
/* FIXME: Do we need to disconnect signal after we are in unqueue? */ /* FIXME: Do we need to disconnect signal after we are in unqueue? */
g_signal_connect (msg, "got-headers", g_signal_connect (msg, "got-headers",
G_CALLBACK (web_cache_mesage_got_headers_cb), extension); G_CALLBACK (web_cache_mesage_got_headers_cb), extension);