Refactor IDN handling and related unit tests

Most code is shared despite different available libraries now and
can more effeciently be tested.
This commit is contained in:
Christian Dywan 2009-10-10 11:09:10 +02:00
parent 94db485293
commit b9d4266d27
4 changed files with 92 additions and 79 deletions

View file

@ -1776,22 +1776,8 @@ main (int argc,
i = 0; i = 0;
while (uris[i] != NULL) while (uris[i] != NULL)
{ {
#ifdef HAVE_LIBSOUP_2_27_90 gchar* new_uri = sokoke_uri_to_ascii (uris[i]);
gchar* path; katze_assign (uris[i], new_uri);
gchar* hostname = sokoke_hostname_from_uri (uris[i], &path);
gchar* encoded = g_hostname_to_ascii (hostname);
if (encoded)
{
gchar* res = g_strconcat ("http://", encoded, path, NULL);
g_free (uris[i]);
g_free (encoded);
uris[i] = res;
}
g_free (hostname);
#else
uris[i] = sokoke_idn_to_punycode (uris[i]);
#endif
i++; i++;
} }
result = midori_app_instance_send_uris (app, uris); result = midori_app_instance_send_uris (app, uris);

View file

@ -185,7 +185,6 @@ sokoke_spawn_program (const gchar* command,
return TRUE; return TRUE;
} }
#if defined (HAVE_LIBSOUP_2_27_90) || HAVE_LIBIDN
/** /**
* sokoke_hostname_from_uri: * sokoke_hostname_from_uri:
* @uri: an URI string * @uri: an URI string
@ -222,79 +221,88 @@ sokoke_hostname_from_uri (const gchar* uri,
hostname = g_strdup (uri); hostname = g_strdup (uri);
return hostname; return hostname;
} }
#endif
/** /**
* sokoke_idn_to_punycode: * sokoke_hostname_to_ascii:
* @uri: an URI string
*
* The specified hostname is encoded if it is not ASCII.
*
* If no IDN support is available at compile time,
* the hostname will be returned unaltered.
*
* Return value: a newly allocated hostname
**/
static gchar*
sokoke_hostname_to_ascii (const gchar* hostname)
{
#ifdef HAVE_LIBSOUP_2_27_90
return g_hostname_to_ascii (hostname);
#elif HAVE_LIBIDN
uint32_t* q;
char* encoded;
int rc;
if ((q = stringprep_utf8_to_ucs4 (hostname, -1, NULL)))
{
rc = idna_to_ascii_4z (q, &encoded, IDNA_ALLOW_UNASSIGNED);
free (q);
if (rc == IDNA_SUCCESS)
return encoded;
}
#endif
return g_strdup (hostname);
}
/**
* sokoke_uri_to_ascii:
* @uri: an URI string * @uri: an URI string
* *
* The specified URI is parsed and the hostname * The specified URI is parsed and the hostname
* part of it is encoded if it is not ASCII. * part of it is encoded if it is not ASCII.
* *
* If libIDN is not available at compile time, * If no IDN support is available at compile time,
* this code will pass the string unaltered. * the URI will be returned unaltered.
* *
* The called function owns the passed string. * Return value: a newly allocated URI
*
* Return value: a newly allocated ASCII URI
**/ **/
gchar* gchar*
sokoke_idn_to_punycode (gchar* uri) sokoke_uri_to_ascii (const gchar* uri)
{ {
#if HAVE_LIBIDN
gchar* proto; gchar* proto;
gchar* hostname;
gchar* path;
char *s;
uint32_t *q;
int rc;
gchar *result;
if ((proto = g_utf8_strchr (uri, -1, ':'))) if ((proto = g_utf8_strchr (uri, -1, ':')))
{ {
gulong offset; gulong offset;
gchar* buffer; gchar* buffer;
/* 'file' URIs don't have a hostname */
if (!strcmp (proto, "file"))
return uri;
offset = g_utf8_pointer_to_offset (uri, proto); offset = g_utf8_pointer_to_offset (uri, proto);
buffer = g_malloc0 (offset + 1); buffer = g_malloc0 (offset + 1);
g_utf8_strncpy (buffer, uri, offset); g_utf8_strncpy (buffer, uri, offset);
proto = buffer; proto = buffer;
} }
hostname = sokoke_hostname_from_uri (uri, &path); gchar* path;
gchar* hostname = sokoke_hostname_from_uri (uri, &path);
gchar* encoded = sokoke_hostname_to_ascii (hostname);
if (!(q = stringprep_utf8_to_ucs4 (hostname, -1, NULL))) if (encoded)
{ {
g_free (proto); gchar* res = g_strconcat (proto ? proto : "", proto ? "://" : "",
encoded, path, NULL);
g_free (encoded);
return res;
}
g_free (hostname); g_free (hostname);
return uri; return g_strdup (uri);
} }
rc = idna_to_ascii_4z (q, &s, IDNA_ALLOW_UNASSIGNED); static gchar*
free (q); sokoke_idn_to_punycode (gchar* uri)
if (rc != IDNA_SUCCESS)
{ {
g_free (proto); #if HAVE_LIBIDN
g_free (hostname); gchar* result = sokoke_uri_to_ascii (uri);
return uri;
}
if (proto)
{
result = g_strconcat (proto, "://", s, path ? path : "", NULL);
g_free (proto);
if (path)
g_free (hostname);
}
else
result = g_strdup (s);
g_free (uri); g_free (uri);
free (s);
return result; return result;
#else #else
return uri; return uri;

View file

@ -46,7 +46,7 @@ sokoke_hostname_from_uri (const gchar* uri,
gchar** path); gchar** path);
gchar* gchar*
sokoke_idn_to_punycode (gchar* uri); sokoke_uri_to_ascii (const gchar* uri);
gchar* gchar*
sokoke_magic_uri (const gchar* uri, sokoke_magic_uri (const gchar* uri,

View file

@ -86,27 +86,46 @@ magic_uri_uri (void)
static void static void
magic_uri_idn (void) magic_uri_idn (void)
{ {
#if HAVE_LIBIDN typedef struct
test_input ("http://www.münchhausen.at", "http://www.xn--mnchhausen-9db.at"); {
test_input ("http://www.خداوند.com/", "http://www.xn--mgbndb8il.com/"); const gchar* before;
test_input ("айкидо.com", "http://xn--80aildf0a.com"); const gchar* after;
test_input ("http://東京理科大学.jp", "http://xn--1lq68wkwbj6ugkpigi.jp"); } URIItem;
test_input ("https://青のネコ", "https://xn--u9jthzcs263c");
static const URIItem items[] = {
#if HAVE_LIBIDN || defined (HAVE_LIBSOUP_2_27_90)
{ "http://www.münchhausen.at", "http://www.xn--mnchhausen-9db.at" },
{ "http://www.خداوند.com/", "http://www.xn--mgbndb8il.com/" },
{ "айкидо.com", "xn--80aildf0a.com" },
{ "http://東京理科大学.jp", "http://xn--1lq68wkwbj6ugkpigi.jp" },
{ "https://青のネコ", "https://xn--u9jthzcs263c" },
#else #else
test_input ("http://www.münchhausen.at", "http://www.münchhausen.at"); { "http://www.münchhausen.at", NULL },
test_input ("http://www.خداوند.com/", "http://www.خداوند.com/"); { "http://www.خداوند.com/", NULL },
test_input ("айкидо.com", "http://айкидо.com"); { "айкидо.com", NULL },
test_input ("http://東京理科大学.jp", "http://東京理科大学.jp"); { "http://東京理科大学.jp", NULL },
test_input ("https://青のネコ.co.jp", "https://青のネコ.co.jp"); { "https://青のネコ.co.jp", NULL },
#endif
{ "http://en.wikipedia.org/wiki/Kölsch_language", NULL },
{ "file:///home/mark/frühstück", NULL },
};
guint i;
for (i = 0; i < G_N_ELEMENTS (items); i++)
{
gchar* result = sokoke_uri_to_ascii (items[i].before);
const gchar* after = items[i].after ? items[i].after : items[i].before;
sokoke_assert_str_equal (items[i].before, result, after);
g_free (result);
}
#if HAVE_LIBIDN
test_input ("айкидо.com", "http://xn--80aildf0a.com");
#else
test_input ("айкидо.com", "http://айкидо.com");
#endif #endif
test_input ("http://en.wikipedia.org/wiki/Kölsch_language",
"http://en.wikipedia.org/wiki/Kölsch_language");
test_input ("en.wikipedia.org/wiki/Kölsch_language",
"http://en.wikipedia.org/wiki/Kölsch_language");
test_input ("sm Küchenzubehör", SM "Küchenzubehör"); test_input ("sm Küchenzubehör", SM "Küchenzubehör");
test_input ("sm 東京理科大学", SM "東京理科大学"); test_input ("sm 東京理科大学", SM "東京理科大学");
test_input ("file:///home/mark/frühstück",
"file:///home/mark/frühstück");
} }
static void static void