Improve handling of feeds with missing data
Previously the description was used in the treeview if the title was not present, however this is not possible in some cases, as it may contain markup. The the RSS code is changed so now if there is no title, the description is used as the title, but with the html stripped from it. We also have to consider that some description elements may simply contain an html tag (and therefore no text). In that case the URI is used as the title.
This commit is contained in:
parent
ff2abf1fd4
commit
20d03e43aa
3 changed files with 49 additions and 17 deletions
|
@ -33,11 +33,13 @@ feed_get_element_string (FeedParser* fparser)
|
|||
return (gchar*)xmlNodeListGetString (fparser->doc, node->children, 1);
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
handle_markup_chars (void* user_data,
|
||||
const xmlChar* ch,
|
||||
int len)
|
||||
{
|
||||
if (len > 0)
|
||||
{
|
||||
gchar** markup;
|
||||
gchar* temp;
|
||||
|
||||
|
@ -45,23 +47,22 @@ handle_markup_chars (void* user_data,
|
|||
temp = g_strndup ((gchar*)ch, len);
|
||||
*markup = (*markup) ? g_strconcat (*markup, temp, NULL) : g_strdup (temp);
|
||||
g_free (temp);
|
||||
}
|
||||
}
|
||||
|
||||
gchar*
|
||||
feed_get_element_markup (FeedParser* fparser)
|
||||
feed_remove_markup (gchar* markup)
|
||||
{
|
||||
gchar* markup;
|
||||
const xmlChar* stag;
|
||||
|
||||
markup = feed_get_element_string (fparser);
|
||||
if ((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>'))
|
||||
if (((stag = xmlStrchr (BAD_CAST markup, '<')) && xmlStrchr (stag, '>')) ||
|
||||
xmlStrchr (BAD_CAST markup, '&'))
|
||||
{
|
||||
gchar* text = NULL;
|
||||
htmlSAXHandlerPtr psax;
|
||||
|
||||
psax = g_new0 (htmlSAXHandler, 1);
|
||||
psax->characters = handle_markup_chars;
|
||||
htmlSAXParseDoc (BAD_CAST markup, NULL, psax, &text);
|
||||
htmlSAXParseDoc (BAD_CAST markup, "UTF-8", psax, &text);
|
||||
g_free (psax);
|
||||
g_free (markup);
|
||||
return text;
|
||||
|
@ -69,6 +70,15 @@ feed_get_element_markup (FeedParser* fparser)
|
|||
return markup;
|
||||
}
|
||||
|
||||
gchar*
|
||||
feed_get_element_markup (FeedParser* fparser)
|
||||
{
|
||||
gchar* markup;
|
||||
|
||||
markup = feed_get_element_string (fparser);
|
||||
return feed_remove_markup (markup);
|
||||
}
|
||||
|
||||
gint64
|
||||
feed_get_element_date (FeedParser* fparser)
|
||||
{
|
||||
|
|
|
@ -57,6 +57,9 @@ typedef struct _FeedParser
|
|||
gchar*
|
||||
feed_get_element_string (FeedParser* fparser);
|
||||
|
||||
gchar*
|
||||
feed_remove_markup (gchar* markup);
|
||||
|
||||
gchar*
|
||||
feed_get_element_markup (FeedParser* fparser);
|
||||
|
||||
|
|
|
@ -137,12 +137,31 @@ rss_postparse_item (FeedParser* fparser)
|
|||
* Verify that the required RSS elements are added
|
||||
* (as per the spec)
|
||||
*/
|
||||
if (!katze_item_get_name (fparser->item) &&
|
||||
!katze_item_get_text (fparser->item))
|
||||
if (!katze_item_get_name (fparser->item))
|
||||
{
|
||||
gchar* desc;
|
||||
|
||||
desc = (gchar*)katze_item_get_text (fparser->item);
|
||||
if (!desc)
|
||||
{
|
||||
feed_parser_set_error (fparser, FEED_PARSE_ERROR_MISSING_ELEMENT,
|
||||
_("Failed to find required RSS \"item\" elements in XML data."));
|
||||
}
|
||||
else
|
||||
{
|
||||
desc = feed_remove_markup (g_strdup (desc));
|
||||
if (desc)
|
||||
{
|
||||
katze_item_set_name (fparser->item, desc);
|
||||
g_free (desc);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((desc = (gchar*)katze_item_get_uri (fparser->item)))
|
||||
katze_item_set_name (fparser->item, desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*fparser->error && KATZE_IS_ITEM (fparser->item))
|
||||
|
|
Loading…
Reference in a new issue