From 20d03e43aa67b933abf69da40d095b7ceb245bc3 Mon Sep 17 00:00:00 2001 From: Dale Whittaker Date: Tue, 12 May 2009 01:52:30 +0200 Subject: [PATCH] Improve handling of feeds with missing data Previously the description was used in the treeview if the title was not present, however this is not possible in some cases, as it may contain markup. The the RSS code is changed so now if there is no title, the description is used as the title, but with the html stripped from it. We also have to consider that some description elements may simply contain an html tag (and therefore no text). In that case the URI is used as the title. --- extensions/feed-panel/feed-parse.c | 36 +++++++++++++++++++----------- extensions/feed-panel/feed-parse.h | 3 +++ extensions/feed-panel/feed-rss.c | 27 ++++++++++++++++++---- 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/extensions/feed-panel/feed-parse.c b/extensions/feed-panel/feed-parse.c index 4c09dc64..1c2cd3f3 100644 --- a/extensions/feed-panel/feed-parse.c +++ b/extensions/feed-panel/feed-parse.c @@ -33,35 +33,36 @@ feed_get_element_string (FeedParser* fparser) return (gchar*)xmlNodeListGetString (fparser->doc, node->children, 1); } -void +static void handle_markup_chars (void* user_data, const xmlChar* ch, int len) { - gchar** markup; - gchar* temp; + if (len > 0) + { + gchar** markup; + gchar* temp; - markup = (gchar**)user_data; - temp = g_strndup ((gchar*)ch, len); - *markup = (*markup) ? g_strconcat (*markup, temp, NULL) : g_strdup (temp); - g_free (temp); + markup = (gchar**)user_data; + temp = g_strndup ((gchar*)ch, len); + *markup = (*markup) ? g_strconcat (*markup, temp, NULL) : g_strdup (temp); + g_free (temp); + } } gchar* -feed_get_element_markup (FeedParser* fparser) +feed_remove_markup (gchar* markup) { - gchar* markup; const xmlChar* stag; - - markup = feed_get_element_string (fparser); - if ((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>')) + if (((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>')) || + xmlStrchr (BAD_CAST markup, '&')) { gchar* text = NULL; htmlSAXHandlerPtr psax; psax = g_new0 (htmlSAXHandler, 1); psax->characters = handle_markup_chars; - htmlSAXParseDoc (BAD_CAST markup, NULL, psax, &text); + htmlSAXParseDoc (BAD_CAST markup, "UTF-8", psax, &text); g_free (psax); g_free (markup); return text; @@ -69,6 +70,15 @@ feed_get_element_markup (FeedParser* fparser) return markup; } +gchar* +feed_get_element_markup (FeedParser* fparser) +{ + gchar* markup; + + markup = feed_get_element_string (fparser); + return feed_remove_markup (markup); +} + gint64 feed_get_element_date (FeedParser* fparser) { diff --git a/extensions/feed-panel/feed-parse.h b/extensions/feed-panel/feed-parse.h index 84ae9f19..31685feb 100644 --- a/extensions/feed-panel/feed-parse.h +++ b/extensions/feed-panel/feed-parse.h @@ -57,6 +57,9 @@ typedef struct _FeedParser gchar* feed_get_element_string (FeedParser* fparser); +gchar* +feed_remove_markup (gchar* markup); + gchar* feed_get_element_markup (FeedParser* fparser); diff --git a/extensions/feed-panel/feed-rss.c b/extensions/feed-panel/feed-rss.c index 03843df6..e8022634 100644 --- a/extensions/feed-panel/feed-rss.c +++ b/extensions/feed-panel/feed-rss.c @@ -137,11 +137,30 @@ rss_postparse_item (FeedParser* fparser) * Verify that the required RSS elements are added * (as per the spec) */ - if (!katze_item_get_name (fparser->item) && - !katze_item_get_text (fparser->item)) + if (!katze_item_get_name (fparser->item)) { - feed_parser_set_error (fparser, FEED_PARSE_ERROR_MISSING_ELEMENT, - _("Failed to find required RSS \"item\" elements in XML data.")); + gchar* desc; + + desc = (gchar*)katze_item_get_text (fparser->item); + if (!desc) + { + feed_parser_set_error (fparser, FEED_PARSE_ERROR_MISSING_ELEMENT, + _("Failed to find required RSS \"item\" elements in XML data.")); + } + else + { + desc = feed_remove_markup (g_strdup (desc)); + if (desc) + { + katze_item_set_name (fparser->item, desc); + g_free (desc); + } + else + { + if ((desc = (gchar*)katze_item_get_uri (fparser->item))) + katze_item_set_name (fparser->item, desc); + } + } } }