From be2d100d2d06c3ea31934fea30289bc1c88343ea Mon Sep 17 00:00:00 2001 From: RichardHitt Date: Mon, 14 Jan 2013 12:33:08 -0800 Subject: [PATCH 1/4] Parse url according to rfc 1738: add separate define for TLD. --- src/common/url.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/url.c b/src/common/url.c index 0ee09988..1f3277cd 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -333,9 +333,10 @@ do_an_re(const char *word,int *start, int *end, int *type) } /* Miscellaneous description --- */ -#define DOMAIN "[-a-z0-9]+(\\.[-a-z0-9]+)*\\.[a-z]+" +#define DOMAIN "[-a-z0-9]+(\\.[-a-z0-9]+)*\\." +#define TLD "[a-z][-a-z0-9]*[a-z]" #define IPADDR "[0-9]+(\\.[0-9]+){3}" -#define HOST "(" DOMAIN "|" IPADDR ")" +#define HOST "(" DOMAIN TLD "|" IPADDR ")" #define OPT_PORT "(:[1-9][0-9]{0,4})?" GRegex * From d5fbea3411d069cf8c0ae636518a7dc86dd692f6 Mon Sep 17 00:00:00 2001 From: RichardHitt Date: Tue, 15 Jan 2013 01:23:37 -0800 Subject: [PATCH 2/4] Fix DOMAIN and TLD per rfc 1738, thanks Arnavion --- src/common/url.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/url.c b/src/common/url.c index 1f3277cd..f3460aca 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -333,10 +333,10 @@ do_an_re(const char *word,int *start, int *end, int *type) } /* Miscellaneous description --- */ -#define DOMAIN "[-a-z0-9]+(\\.[-a-z0-9]+)*\\." -#define TLD "[a-z][-a-z0-9]*[a-z]" +#define DOMAIN "([a-z0-9]|([a-z0-9][-a-z0-9]*[a-z0-9]))\\." +#define TLD "([a-z][-a-z0-9]*[a-z0-9])" #define IPADDR "[0-9]+(\\.[0-9]+){3}" -#define HOST "(" DOMAIN TLD "|" IPADDR ")" +#define HOST "(" "(" "(" DOMAIN ")*" TLD ")" "|" IPADDR ")" #define OPT_PORT "(:[1-9][0-9]{0,4})?" GRegex * From bb617ccacd7fc46382d14034fdf0a8ccc34f8be3 Mon Sep 17 00:00:00 2001 From: RichardHitt Date: Tue, 15 Jan 2013 13:19:29 -0800 Subject: [PATCH 3/4] Prev called anything a URL! Reverted but slight DOMAIN change. --- src/common/url.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/url.c b/src/common/url.c index f3460aca..1f3277cd 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -333,10 +333,10 @@ do_an_re(const char *word,int *start, int *end, int *type) } /* Miscellaneous description --- */ -#define DOMAIN "([a-z0-9]|([a-z0-9][-a-z0-9]*[a-z0-9]))\\." -#define TLD "([a-z][-a-z0-9]*[a-z0-9])" +#define DOMAIN "[-a-z0-9]+(\\.[-a-z0-9]+)*\\." +#define TLD "[a-z][-a-z0-9]*[a-z]" #define IPADDR "[0-9]+(\\.[0-9]+){3}" -#define HOST "(" "(" "(" DOMAIN ")*" TLD ")" "|" IPADDR ")" +#define HOST "(" DOMAIN TLD "|" IPADDR ")" #define OPT_PORT "(:[1-9][0-9]{0,4})?" GRegex * From 7bc0e017f6de8a8f05e9d6202fe8737e2eb75c60 Mon Sep 17 00:00:00 2001 From: RichardHitt Date: Tue, 15 Jan 2013 13:26:11 -0800 Subject: [PATCH 4/4] Oops. didn't get the DOMAIN slight change last time. Here it is now. --- src/common/url.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/url.c b/src/common/url.c index 1f3277cd..89047511 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -333,7 +333,7 @@ do_an_re(const char *word,int *start, int *end, int *type) } /* Miscellaneous description --- */ -#define DOMAIN "[-a-z0-9]+(\\.[-a-z0-9]+)*\\." +#define DOMAIN "[a-z0-9][-a-z0-9]*(\\.[-a-z0-9]+)*\\." #define TLD "[a-z][-a-z0-9]*[a-z]" #define IPADDR "[0-9]+(\\.[0-9]+){3}" #define HOST "(" DOMAIN TLD "|" IPADDR ")"