url_check_line () now recognizes all urls
previously it only recognized url with "://" after the scheme. Urls without a scheme are purposely not recognized by this function. Fixes #701 Closes #815
This commit is contained in:
parent
a25363f5c3
commit
6d3c176ff8
|
@ -35,6 +35,7 @@ GTree *url_btree = NULL;
|
||||||
static gboolean regex_match (const GRegex *re, const char *word,
|
static gboolean regex_match (const GRegex *re, const char *word,
|
||||||
int *start, int *end);
|
int *start, int *end);
|
||||||
static const GRegex *re_url (void);
|
static const GRegex *re_url (void);
|
||||||
|
static const GRegex *re_url_no_scheme (void);
|
||||||
static const GRegex *re_host (void);
|
static const GRegex *re_host (void);
|
||||||
static const GRegex *re_host6 (void);
|
static const GRegex *re_host6 (void);
|
||||||
static const GRegex *re_email (void);
|
static const GRegex *re_email (void);
|
||||||
|
@ -294,7 +295,10 @@ match_email (const char *word, int *start, int *end)
|
||||||
static gboolean
|
static gboolean
|
||||||
match_url (const char *word, int *start, int *end)
|
match_url (const char *word, int *start, int *end)
|
||||||
{
|
{
|
||||||
return regex_match (re_url (), word, start, end);
|
if (regex_match (re_url (), word, start, end))
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
return regex_match (re_url_no_scheme (), word, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
|
@ -372,7 +376,6 @@ url_check_line (char *buf, int len)
|
||||||
g_match_info_fetch_pos(gmi, 0, &start, &end);
|
g_match_info_fetch_pos(gmi, 0, &start, &end);
|
||||||
while (end > start && (po[end - 1] == '\r' || po[end - 1] == '\n'))
|
while (end > start && (po[end - 1] == '\r' || po[end - 1] == '\n'))
|
||||||
end--;
|
end--;
|
||||||
if (g_strstr_len (po + start, end - start, "://"))
|
|
||||||
url_add(po + start, end - start);
|
url_add(po + start, end - start);
|
||||||
g_match_info_next(gmi, NULL);
|
g_match_info_next(gmi, NULL);
|
||||||
}
|
}
|
||||||
|
@ -539,6 +542,18 @@ struct
|
||||||
{ NULL, "", 0}
|
{ NULL, "", 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const GRegex *
|
||||||
|
re_url_no_scheme (void)
|
||||||
|
{
|
||||||
|
static GRegex *url_ret = NULL;
|
||||||
|
|
||||||
|
if (url_ret) return url_ret;
|
||||||
|
|
||||||
|
url_ret = make_re ("(" HOST_URL OPT_PORT "/" "(" PATH ")?" ")");
|
||||||
|
|
||||||
|
return url_ret;
|
||||||
|
}
|
||||||
|
|
||||||
static const GRegex *
|
static const GRegex *
|
||||||
re_url (void)
|
re_url (void)
|
||||||
{
|
{
|
||||||
|
@ -551,12 +566,12 @@ re_url (void)
|
||||||
|
|
||||||
grist_gstr = g_string_new (NULL);
|
grist_gstr = g_string_new (NULL);
|
||||||
|
|
||||||
/* Add regex "host/path", representing a "schemeless" url */
|
|
||||||
g_string_append (grist_gstr, "(" HOST_URL OPT_PORT "/" "(" PATH ")?" ")");
|
|
||||||
|
|
||||||
for (i = 0; uri[i].scheme; i++)
|
for (i = 0; uri[i].scheme; i++)
|
||||||
{
|
{
|
||||||
g_string_append (grist_gstr, "|(");
|
if (i)
|
||||||
|
g_string_append (grist_gstr, "|");
|
||||||
|
|
||||||
|
g_string_append (grist_gstr, "(");
|
||||||
g_string_append_printf (grist_gstr, "%s:", uri[i].scheme);
|
g_string_append_printf (grist_gstr, "%s:", uri[i].scheme);
|
||||||
|
|
||||||
if (uri[i].flags & URI_AUTHORITY)
|
if (uri[i].flags & URI_AUTHORITY)
|
||||||
|
|
Loading…
Reference in New Issue