Now nickname and channel matching is server aware: we only match words that

have the prefixes the server uses.

The url regex matching was designed to be more versatile. This was needed to
disambiguate matches between nicks and channels, since they may have common
prefixes now (&+!). In case of ambiguity, when the server supports prefixes that
are common to both channels and nicknames, we choose the nickname (the nickname
only matches if there is a user with that nick in the current session).

Fixes #655.
This commit is contained in:
Diogo Sousa 2013-08-01 16:20:04 +01:00
parent 78a72a6c1e
commit 0e76283c9a
1 changed files with 142 additions and 95 deletions

View File

@ -32,15 +32,22 @@
void *url_tree = NULL; void *url_tree = NULL;
GTree *url_btree = NULL; GTree *url_btree = NULL;
static int do_an_re (const char *word, int *start, int *end, int *type); static gboolean regex_match (const GRegex *re, const char *word,
static GRegex *re_url (void); int *start, int *end);
static GRegex *re_host (void); static const GRegex *re_url (void);
static GRegex *re_host6 (void); static const GRegex *re_host (void);
static GRegex *re_email (void); static const GRegex *re_host6 (void);
static GRegex *re_nick (void); static const GRegex *re_email (void);
static GRegex *re_channel (void); static const GRegex *re_nick (void);
static GRegex *re_path (void); static const GRegex *re_channel (void);
static const GRegex *re_path (void);
static gboolean match_nick (const char *word, int *start, int *end);
static gboolean match_channel (const char *word, int *start, int *end);
static gboolean match_email (const char *word, int *start, int *end);
static gboolean match_url (const char *word, int *start, int *end);
static gboolean match_host (const char *word, int *start, int *end);
static gboolean match_host6 (const char *word, int *start, int *end);
static gboolean match_path (const char *word, int *start, int *end);
static int static int
url_free (char *url, void *data) url_free (char *url, void *data)
@ -189,50 +196,108 @@ static int laststart = 0;
static int lastend = 0; static int lastend = 0;
static int lasttype = 0; static int lasttype = 0;
static int #define NICKPRE "~+!@%&"
strchrs (char c, char *s) #define CHANPRE "#&!+"
{
while (*s)
if (c == *s++)
return TRUE;
return FALSE;
}
#define NICKPRE "~+!@%%&"
int int
url_check_word (const char *word) url_check_word (const char *word)
{ {
struct {
gboolean (*match) (const char *word, int *start, int *end);
int type;
} m[] = {
{ match_url, WORD_URL },
{ match_email, WORD_EMAIL },
{ match_nick, WORD_NICK },
{ match_channel, WORD_CHANNEL },
{ match_host6, WORD_HOST6 },
{ match_host, WORD_HOST },
{ match_path, WORD_PATH },
{ NULL, 0}
};
int i;
laststart = lastend = lasttype = 0; laststart = lastend = lasttype = 0;
if (do_an_re (word, &laststart, &lastend, &lasttype))
{ for (i = 0; m[i].match; i++)
switch (lasttype) if (m[i].match (word, &laststart, &lastend))
{ {
lasttype = m[i].type;
return lasttype;
}
return 0;
}
static gboolean
match_nick (const char *word, int *start, int *end)
{
const server *serv = current_sess->server;
const char *nick_prefixes = serv ? serv->nick_prefixes : NICKPRE;
char *str; char *str;
case WORD_NICK: if (!regex_match (re_nick (), word, start, end))
if (strchrs (word[laststart], NICKPRE)) return FALSE;
laststart++;
str = g_strndup (&word[laststart], lastend - laststart); /* ignore matches with prefixes that the server doesn't use */
if (strchr (NICKPRE, word[*start])
&& !strchr (nick_prefixes, word[*start]))
return FALSE;
/* nick prefix is not part of the matched word */
if (strchr (nick_prefixes, word[*start]))
(*start)++;
str = g_strndup (&word[*start], *end - *start);
if (!userlist_find (current_sess, str)) if (!userlist_find (current_sess, str))
lasttype = 0; return FALSE;
g_free (str); g_free (str);
return lasttype;
case WORD_EMAIL: return TRUE;
if (!isalnum (word[laststart])) }
laststart++;
/* Fall through */ static gboolean
case WORD_URL: match_channel (const char *word, int *start, int *end)
case WORD_HOST: {
case WORD_HOST6: const server *serv = current_sess->server;
case WORD_CHANNEL: const char *chan_prefixes = serv ? serv->chantypes : CHANPRE;
case WORD_PATH:
return lasttype; if (!regex_match (re_channel (), word, start, end))
default: return FALSE;
return 0; /* Should not occur */
} return strchr (chan_prefixes, word[*start]) != NULL;
} }
else
return 0; static gboolean
match_email (const char *word, int *start, int *end)
{
return regex_match (re_email (), word, start, end);
}
static gboolean
match_url (const char *word, int *start, int *end)
{
return regex_match (re_url (), word, start, end);
}
static gboolean
match_host (const char *word, int *start, int *end)
{
return regex_match (re_host (), word, start, end);
}
static gboolean
match_host6 (const char *word, int *start, int *end)
{
return regex_match (re_host6 (), word, start, end);
}
static gboolean
match_path (const char *word, int *start, int *end)
{
return regex_match (re_path (), word, start, end);
} }
/* List of IRC commands for which contents (and thus possible URLs) /* List of IRC commands for which contents (and thus possible URLs)
@ -307,46 +372,28 @@ url_last (int *lstart, int *lend)
return lasttype; return lasttype;
} }
static int static gboolean
do_an_re(const char *word, int *start, int *end, int *type) regex_match (const GRegex *re, const char *word, int *start, int *end)
{ {
typedef struct func_s {
GRegex *(*fn)(void);
int type;
} func_t;
func_t funcs[] =
{
{ re_url, WORD_URL },
{ re_email, WORD_EMAIL },
{ re_channel, WORD_CHANNEL },
{ re_host6, WORD_HOST6 },
{ re_host, WORD_HOST },
{ re_path, WORD_PATH },
{ re_nick, WORD_NICK }
};
GMatchInfo *gmi; GMatchInfo *gmi;
int k;
for (k = 0; k < sizeof funcs / sizeof (func_t); k++) g_regex_match (re, word, 0, &gmi);
{
g_regex_match (funcs[k].fn(), word, 0, &gmi);
if (!g_match_info_matches (gmi)) if (!g_match_info_matches (gmi))
{ {
g_match_info_free (gmi); g_match_info_free (gmi);
continue; return FALSE;
} }
while (g_match_info_matches (gmi)) while (g_match_info_matches (gmi))
{ {
g_match_info_fetch_pos (gmi, 0, start, end); g_match_info_fetch_pos (gmi, 0, start, end);
g_match_info_next (gmi, NULL); g_match_info_next (gmi, NULL);
} }
g_match_info_free (gmi);
*type = funcs[k].type;
return TRUE;
}
return FALSE; g_match_info_free (gmi);
return TRUE;
} }
/* Miscellaneous description --- */ /* Miscellaneous description --- */
@ -376,7 +423,7 @@ make_re (char *grist)
/* HOST description --- */ /* HOST description --- */
/* (see miscellaneous above) */ /* (see miscellaneous above) */
static GRegex * static const GRegex *
re_host (void) re_host (void)
{ {
static GRegex *host_ret; static GRegex *host_ret;
@ -384,7 +431,7 @@ re_host (void)
if (host_ret) return host_ret; if (host_ret) return host_ret;
grist = g_strdup_printf ( grist = g_strdup (
"(" "("
"(" HOST_URL PORT ")|(" HOST ")" "(" HOST_URL PORT ")|(" HOST ")"
")" ")"
@ -393,7 +440,7 @@ re_host (void)
return host_ret; return host_ret;
} }
static GRegex * static const GRegex *
re_host6 (void) re_host6 (void)
{ {
static GRegex *host6_ret; static GRegex *host6_ret;
@ -401,7 +448,7 @@ re_host6 (void)
if (host6_ret) return host6_ret; if (host6_ret) return host6_ret;
grist = g_strdup_printf ( grist = g_strdup (
"(" "("
"(" IPV6ADDR ")|(" "\\[" IPV6ADDR "\\]" PORT ")" "(" IPV6ADDR ")|(" "\\[" IPV6ADDR "\\]" PORT ")"
")" ")"
@ -489,7 +536,7 @@ struct
{ NULL, "", 0} { NULL, "", 0}
}; };
static GRegex * static const GRegex *
re_url (void) re_url (void)
{ {
static GRegex *url_ret = NULL; static GRegex *url_ret = NULL;
@ -546,7 +593,7 @@ re_url (void)
/* EMAIL description --- */ /* EMAIL description --- */
#define EMAIL "[a-z][-_a-z0-9]+@" "(" HOST_URL ")" #define EMAIL "[a-z][-_a-z0-9]+@" "(" HOST_URL ")"
static GRegex * static const GRegex *
re_email (void) re_email (void)
{ {
static GRegex *email_ret; static GRegex *email_ret;
@ -554,7 +601,7 @@ re_email (void)
if (email_ret) return email_ret; if (email_ret) return email_ret;
grist = g_strdup_printf ( grist = g_strdup (
"(" "("
EMAIL EMAIL
")" ")"
@ -582,7 +629,7 @@ re_email (void)
#define NICK1 "[" NICKHYP NICKLET NICKDIG NICKSPE "]*" #define NICK1 "[" NICKHYP NICKLET NICKDIG NICKSPE "]*"
#define NICK NICK0 NICK1 #define NICK NICK0 NICK1
static GRegex * static const GRegex *
re_nick (void) re_nick (void)
{ {
static GRegex *nick_ret; static GRegex *nick_ret;
@ -590,7 +637,7 @@ re_nick (void)
if (nick_ret) return nick_ret; if (nick_ret) return nick_ret;
grist = g_strdup_printf ( grist = g_strdup (
"(" "("
NICK NICK
")" ")"
@ -600,9 +647,9 @@ re_nick (void)
} }
/* CHANNEL description --- */ /* CHANNEL description --- */
#define CHANNEL "#[^ \t\a,:]+" #define CHANNEL "[" CHANPRE "][^ \t\a,:]+"
static GRegex * static const GRegex *
re_channel (void) re_channel (void)
{ {
static GRegex *channel_ret; static GRegex *channel_ret;
@ -610,7 +657,7 @@ re_channel (void)
if (channel_ret) return channel_ret; if (channel_ret) return channel_ret;
grist = g_strdup_printf ( grist = g_strdup (
"(" "("
CHANNEL CHANNEL
")" ")"
@ -628,7 +675,7 @@ re_channel (void)
#define FS_PATH "^(/|\\./|\\.\\./).*" #define FS_PATH "^(/|\\./|\\.\\./).*"
#endif #endif
static GRegex * static const GRegex *
re_path (void) re_path (void)
{ {
static GRegex *path_ret; static GRegex *path_ret;
@ -636,7 +683,7 @@ re_path (void)
if (path_ret) return path_ret; if (path_ret) return path_ret;
grist = g_strdup_printf ( grist = g_strdup (
"(" "("
FS_PATH FS_PATH
")" ")"