Improve handling of feeds with missing data

Previously the description was used in the treeview if the title
was not present, however this is not possible in some cases, as
it may contain markup.
The the RSS code is changed so now if there is no title, the
description is used as the title, but with the html stripped from
it. We also have to consider that some description elements may
simply contain an html tag (and therefore no text). In that
case the URI is used as the title.
This commit is contained in:
Dale Whittaker 2009-05-12 01:52:30 +02:00 committed by Christian Dywan
parent ff2abf1fd4
commit 20d03e43aa
3 changed files with 49 additions and 17 deletions

View file

@ -33,35 +33,36 @@ feed_get_element_string (FeedParser* fparser)
return (gchar*)xmlNodeListGetString (fparser->doc, node->children, 1);
}
void
static void
handle_markup_chars (void* user_data,
const xmlChar* ch,
int len)
{
gchar** markup;
gchar* temp;
if (len > 0)
{
gchar** markup;
gchar* temp;
markup = (gchar**)user_data;
temp = g_strndup ((gchar*)ch, len);
*markup = (*markup) ? g_strconcat (*markup, temp, NULL) : g_strdup (temp);
g_free (temp);
markup = (gchar**)user_data;
temp = g_strndup ((gchar*)ch, len);
*markup = (*markup) ? g_strconcat (*markup, temp, NULL) : g_strdup (temp);
g_free (temp);
}
}
gchar*
feed_get_element_markup (FeedParser* fparser)
feed_remove_markup (gchar* markup)
{
gchar* markup;
const xmlChar* stag;
markup = feed_get_element_string (fparser);
if ((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>'))
if (((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>')) ||
xmlStrchr (BAD_CAST markup, '&'))
{
gchar* text = NULL;
htmlSAXHandlerPtr psax;
psax = g_new0 (htmlSAXHandler, 1);
psax->characters = handle_markup_chars;
htmlSAXParseDoc (BAD_CAST markup, NULL, psax, &text);
htmlSAXParseDoc (BAD_CAST markup, "UTF-8", psax, &text);
g_free (psax);
g_free (markup);
return text;
@ -69,6 +70,15 @@ feed_get_element_markup (FeedParser* fparser)
return markup;
}
gchar*
feed_get_element_markup (FeedParser* fparser)
{
gchar* markup;
markup = feed_get_element_string (fparser);
return feed_remove_markup (markup);
}
gint64
feed_get_element_date (FeedParser* fparser)
{

View file

@ -57,6 +57,9 @@ typedef struct _FeedParser
gchar*
feed_get_element_string (FeedParser* fparser);
gchar*
feed_remove_markup (gchar* markup);
gchar*
feed_get_element_markup (FeedParser* fparser);

View file

@ -137,11 +137,30 @@ rss_postparse_item (FeedParser* fparser)
* Verify that the required RSS elements are added
* (as per the spec)
*/
if (!katze_item_get_name (fparser->item) &&
!katze_item_get_text (fparser->item))
if (!katze_item_get_name (fparser->item))
{
feed_parser_set_error (fparser, FEED_PARSE_ERROR_MISSING_ELEMENT,
_("Failed to find required RSS \"item\" elements in XML data."));
gchar* desc;
desc = (gchar*)katze_item_get_text (fparser->item);
if (!desc)
{
feed_parser_set_error (fparser, FEED_PARSE_ERROR_MISSING_ELEMENT,
_("Failed to find required RSS \"item\" elements in XML data."));
}
else
{
desc = feed_remove_markup (g_strdup (desc));
if (desc)
{
katze_item_set_name (fparser->item, desc);
g_free (desc);
}
else
{
if ((desc = (gchar*)katze_item_get_uri (fparser->item)))
katze_item_set_name (fparser->item, desc);
}
}
}
}