midori/extensions/feed-panel/feed-parse.c
Dale Whittaker ec44108653 Improve title and URI display in the feed panel
We strip HTML from "title" elements when required now. And we
always try to show at least the URI for feeds in the WebKit view.
And we fallback to using the URI in the treeview if the title/
description is empty.
2009-05-09 14:08:06 +02:00

254 lines
5.8 KiB
C

/*
Copyright (C) 2009 Dale Whittaker <dayul@users.sf.net>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
See the file COPYING for the full license text.
*/
#include "feed-parse.h"
#include <time.h>
gchar*
feed_get_element_string (FeedParser* fparser)
{
xmlNodePtr node;
node = fparser->node;
if (!node->children ||
xmlIsBlankNode (node->children) ||
node->children->type != XML_TEXT_NODE
)
{
/* Some servers add required elements with no content,
* create a dummy string to handle it.
*/
return g_strdup (" ");
}
return (gchar*)xmlNodeListGetString (fparser->doc, node->children, 1);
}
void
handle_markup_chars (void* user_data,
const xmlChar* ch,
int len)
{
gchar** markup;
gchar* temp;
markup = (gchar**)user_data;
temp = g_strndup ((gchar*)ch, len);
*markup = (*markup) ? g_strconcat (*markup, temp, NULL) : g_strdup (temp);
g_free (temp);
}
gchar*
feed_get_element_markup (FeedParser* fparser)
{
gchar* markup;
const xmlChar* stag;
markup = feed_get_element_string (fparser);
if ((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>'))
{
gchar* text = NULL;
htmlSAXHandlerPtr psax;
psax = g_new0 (htmlSAXHandler, 1);
psax->characters = handle_markup_chars;
htmlSAXParseDoc (BAD_CAST markup, NULL, psax, &text);
g_free (psax);
g_free (markup);
return text;
}
return markup;
}
gint64
feed_get_element_date (FeedParser* fparser)
{
time_t date;
gchar* content;
date = 0;
content = feed_get_element_string (fparser);
if (content)
{
SoupDate* sdate;
sdate = soup_date_new_from_string (content);
date = soup_date_to_time_t (sdate);
soup_date_free (sdate);
g_free (content);
}
return ((gint64)date);
}
KatzeItem*
feed_item_exists (KatzeArray* array,
KatzeItem* item)
{
const gchar* guid;
gchar* hstr;
guint hash;
guid = katze_item_get_token (item);
if (!guid)
{
hstr = g_strjoin (NULL,
katze_item_get_name (item),
katze_item_get_uri (item),
katze_item_get_text (item),
NULL);
hash = g_str_hash (hstr);
g_free (hstr);
hstr = g_strdup_printf ("%u", hash);
katze_item_set_token (item, hstr);
g_free (hstr);
guid = katze_item_get_token (item);
}
return (katze_array_find_token (array, guid));
}
void
feed_parse_node (FeedParser* fparser)
{
xmlNodePtr node;
xmlNodePtr child;
if (!*fparser->error)
{
if (fparser->preparse)
(*fparser->preparse) (fparser);
if (fparser->parse)
{
node = fparser->node;
child = node->last;
while (child)
{
if (child->type == XML_ELEMENT_NODE)
{
fparser->node = child;
(*fparser->parse) (fparser);
if (*fparser->error)
break;
}
child = child->prev;
}
fparser->node = node;
}
if (fparser->postparse)
(*fparser->postparse) (fparser);
}
}
static void
feed_parse_doc (xmlDocPtr doc,
GSList* parsers,
KatzeArray* array,
GError** error)
{
FeedParser* fparser;
xmlNodePtr root;
gboolean isvalid;
root = xmlDocGetRootElement (doc);
if (!root)
{
*error = g_error_new (FEED_PARSE_ERROR,
FEED_PARSE_ERROR_MISSING_ELEMENT,
_("Failed to find root element in feed XML data."));
return;
}
while (parsers)
{
fparser = (FeedParser*)parsers->data;
fparser->error = error;
fparser->doc = doc;
fparser->node = root;
if (fparser && fparser->isvalid)
{
isvalid = (*fparser->isvalid) (fparser);
if (*fparser->error)
return;
if (isvalid)
{
fparser->item = KATZE_ITEM (array);
if (fparser->update &&
(*fparser->update) (fparser))
feed_parse_node (fparser);
}
}
fparser->error = NULL;
fparser->doc = NULL;
fparser->node = NULL;
if (isvalid)
return;
parsers = g_slist_next (parsers);
}
*error = g_error_new (FEED_PARSE_ERROR,
FEED_PARSE_ERROR_INVALID_FORMAT,
_("Unsupported feed format."));
}
gboolean
parse_feed (gchar* data,
gint64 length,
GSList* parsers,
KatzeArray* array,
GError** error)
{
xmlDocPtr doc;
xmlErrorPtr xerror;
LIBXML_TEST_VERSION
doc = xmlReadMemory (
data, length, "feedfile.xml", NULL,
XML_PARSE_NOWARNING | XML_PARSE_NOERROR /*| XML_PARSE_RECOVER*/
);
if (doc)
{
feed_parse_doc (doc, parsers, array, error);
xmlFreeDoc (doc);
}
else
{
xerror = xmlGetLastError ();
*error = g_error_new (FEED_PARSE_ERROR,
FEED_PARSE_ERROR_PARSE,
_("Failed to parse XML feed: %s"),
xerror->message);
xmlResetLastError ();
}
xmlCleanupParser ();
xmlMemoryDump ();
return *error ? FALSE : TRUE;
}