2017-09-14 18:03:20 +02:00
|
|
|
module Twitter
|
|
|
|
class Regex
|
2019-02-09 20:13:11 +01:00
|
|
|
REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}<>\(\)\?]/iou
|
|
|
|
REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*"'「」<>;:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
|
2017-09-14 18:03:20 +02:00
|
|
|
REGEXEN[:valid_url_balanced_parens] = /
|
|
|
|
\(
|
|
|
|
(?:
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}+
|
|
|
|
|
|
|
|
|
# allow one nested level of balanced parentheses
|
|
|
|
(?:
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
|
|
|
\(
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}+
|
|
|
|
\)
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
|
|
|
)
|
|
|
|
)
|
|
|
|
\)
|
|
|
|
/iox
|
|
|
|
REGEXEN[:valid_url_path] = /(?:
|
|
|
|
(?:
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
|
|
|
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
|
|
|
|
#{REGEXEN[:valid_url_path_ending_chars]}
|
|
|
|
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
|
|
|
|
)/iox
|
|
|
|
REGEXEN[:valid_url] = %r{
|
|
|
|
( # $1 total match
|
2018-09-14 00:53:09 +02:00
|
|
|
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
|
2017-09-14 18:03:20 +02:00
|
|
|
( # $3 URL
|
2018-06-15 20:21:47 +02:00
|
|
|
((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
|
2017-09-14 18:03:20 +02:00
|
|
|
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
|
|
|
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
|
|
|
|
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
|
|
|
|
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}iox
|
|
|
|
end
|
|
|
|
end
|