ec44108653
We strip HTML from "title" elements when required now. And we always try to show at least the URI for feeds in the WebKit view. And we fallback to using the URI in the treeview if the title/ description is empty.
254 lines
5.8 KiB
C
254 lines
5.8 KiB
C
/*
|
|
Copyright (C) 2009 Dale Whittaker <dayul@users.sf.net>
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
See the file COPYING for the full license text.
|
|
*/
|
|
|
|
#include "feed-parse.h"
|
|
#include <time.h>
|
|
|
|
gchar*
|
|
feed_get_element_string (FeedParser* fparser)
|
|
{
|
|
xmlNodePtr node;
|
|
|
|
node = fparser->node;
|
|
|
|
if (!node->children ||
|
|
xmlIsBlankNode (node->children) ||
|
|
node->children->type != XML_TEXT_NODE
|
|
)
|
|
{
|
|
/* Some servers add required elements with no content,
|
|
* create a dummy string to handle it.
|
|
*/
|
|
return g_strdup (" ");
|
|
}
|
|
|
|
return (gchar*)xmlNodeListGetString (fparser->doc, node->children, 1);
|
|
}
|
|
|
|
void
|
|
handle_markup_chars (void* user_data,
|
|
const xmlChar* ch,
|
|
int len)
|
|
{
|
|
gchar** markup;
|
|
gchar* temp;
|
|
|
|
markup = (gchar**)user_data;
|
|
temp = g_strndup ((gchar*)ch, len);
|
|
*markup = (*markup) ? g_strconcat (*markup, temp, NULL) : g_strdup (temp);
|
|
g_free (temp);
|
|
}
|
|
|
|
gchar*
|
|
feed_get_element_markup (FeedParser* fparser)
|
|
{
|
|
gchar* markup;
|
|
const xmlChar* stag;
|
|
|
|
markup = feed_get_element_string (fparser);
|
|
if ((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>'))
|
|
{
|
|
gchar* text = NULL;
|
|
htmlSAXHandlerPtr psax;
|
|
|
|
psax = g_new0 (htmlSAXHandler, 1);
|
|
psax->characters = handle_markup_chars;
|
|
htmlSAXParseDoc (BAD_CAST markup, NULL, psax, &text);
|
|
g_free (psax);
|
|
g_free (markup);
|
|
return text;
|
|
}
|
|
return markup;
|
|
}
|
|
|
|
gint64
|
|
feed_get_element_date (FeedParser* fparser)
|
|
{
|
|
time_t date;
|
|
gchar* content;
|
|
|
|
date = 0;
|
|
content = feed_get_element_string (fparser);
|
|
|
|
if (content)
|
|
{
|
|
SoupDate* sdate;
|
|
|
|
sdate = soup_date_new_from_string (content);
|
|
date = soup_date_to_time_t (sdate);
|
|
soup_date_free (sdate);
|
|
g_free (content);
|
|
}
|
|
return ((gint64)date);
|
|
}
|
|
|
|
KatzeItem*
|
|
feed_item_exists (KatzeArray* array,
|
|
KatzeItem* item)
|
|
{
|
|
const gchar* guid;
|
|
gchar* hstr;
|
|
guint hash;
|
|
|
|
guid = katze_item_get_token (item);
|
|
if (!guid)
|
|
{
|
|
hstr = g_strjoin (NULL,
|
|
katze_item_get_name (item),
|
|
katze_item_get_uri (item),
|
|
katze_item_get_text (item),
|
|
NULL);
|
|
hash = g_str_hash (hstr);
|
|
g_free (hstr);
|
|
|
|
hstr = g_strdup_printf ("%u", hash);
|
|
katze_item_set_token (item, hstr);
|
|
g_free (hstr);
|
|
|
|
guid = katze_item_get_token (item);
|
|
}
|
|
|
|
return (katze_array_find_token (array, guid));
|
|
}
|
|
|
|
void
|
|
feed_parse_node (FeedParser* fparser)
|
|
{
|
|
xmlNodePtr node;
|
|
xmlNodePtr child;
|
|
|
|
if (!*fparser->error)
|
|
{
|
|
if (fparser->preparse)
|
|
(*fparser->preparse) (fparser);
|
|
|
|
if (fparser->parse)
|
|
{
|
|
node = fparser->node;
|
|
child = node->last;
|
|
|
|
while (child)
|
|
{
|
|
if (child->type == XML_ELEMENT_NODE)
|
|
{
|
|
fparser->node = child;
|
|
|
|
(*fparser->parse) (fparser);
|
|
|
|
if (*fparser->error)
|
|
break;
|
|
}
|
|
child = child->prev;
|
|
}
|
|
fparser->node = node;
|
|
}
|
|
|
|
if (fparser->postparse)
|
|
(*fparser->postparse) (fparser);
|
|
}
|
|
}
|
|
|
|
static void
|
|
feed_parse_doc (xmlDocPtr doc,
|
|
GSList* parsers,
|
|
KatzeArray* array,
|
|
GError** error)
|
|
{
|
|
FeedParser* fparser;
|
|
xmlNodePtr root;
|
|
gboolean isvalid;
|
|
|
|
root = xmlDocGetRootElement (doc);
|
|
|
|
if (!root)
|
|
{
|
|
*error = g_error_new (FEED_PARSE_ERROR,
|
|
FEED_PARSE_ERROR_MISSING_ELEMENT,
|
|
_("Failed to find root element in feed XML data."));
|
|
return;
|
|
}
|
|
|
|
while (parsers)
|
|
{
|
|
fparser = (FeedParser*)parsers->data;
|
|
fparser->error = error;
|
|
fparser->doc = doc;
|
|
fparser->node = root;
|
|
|
|
if (fparser && fparser->isvalid)
|
|
{
|
|
isvalid = (*fparser->isvalid) (fparser);
|
|
|
|
if (*fparser->error)
|
|
return;
|
|
|
|
if (isvalid)
|
|
{
|
|
fparser->item = KATZE_ITEM (array);
|
|
|
|
if (fparser->update &&
|
|
(*fparser->update) (fparser))
|
|
feed_parse_node (fparser);
|
|
}
|
|
}
|
|
|
|
fparser->error = NULL;
|
|
fparser->doc = NULL;
|
|
fparser->node = NULL;
|
|
|
|
if (isvalid)
|
|
return;
|
|
|
|
parsers = g_slist_next (parsers);
|
|
}
|
|
|
|
*error = g_error_new (FEED_PARSE_ERROR,
|
|
FEED_PARSE_ERROR_INVALID_FORMAT,
|
|
_("Unsupported feed format."));
|
|
}
|
|
|
|
gboolean
|
|
parse_feed (gchar* data,
|
|
gint64 length,
|
|
GSList* parsers,
|
|
KatzeArray* array,
|
|
GError** error)
|
|
{
|
|
xmlDocPtr doc;
|
|
xmlErrorPtr xerror;
|
|
|
|
LIBXML_TEST_VERSION
|
|
|
|
doc = xmlReadMemory (
|
|
data, length, "feedfile.xml", NULL,
|
|
XML_PARSE_NOWARNING | XML_PARSE_NOERROR /*| XML_PARSE_RECOVER*/
|
|
);
|
|
|
|
if (doc)
|
|
{
|
|
feed_parse_doc (doc, parsers, array, error);
|
|
xmlFreeDoc (doc);
|
|
}
|
|
else
|
|
{
|
|
xerror = xmlGetLastError ();
|
|
*error = g_error_new (FEED_PARSE_ERROR,
|
|
FEED_PARSE_ERROR_PARSE,
|
|
_("Failed to parse XML feed: %s"),
|
|
xerror->message);
|
|
xmlResetLastError ();
|
|
}
|
|
xmlCleanupParser ();
|
|
xmlMemoryDump ();
|
|
|
|
return *error ? FALSE : TRUE;
|
|
}
|
|
|