fix: tighten URL host/IP matching; reject more bogus hosts, keep IPv6 brackets right

This commit is contained in:
2026-03-16 10:31:49 -06:00
parent b316d4a281
commit 0bcd369426

View File

@@ -292,7 +292,13 @@ match_host (const char *word, int *start, int *end)
static gboolean static gboolean
match_host6 (const char *word, int *start, int *end) match_host6 (const char *word, int *start, int *end)
{ {
return regex_match (re_host6 (), word, start, end); if (!regex_match (re_host6 (), word, start, end))
return FALSE;
if (word[*start] != '[')
return FALSE;
return TRUE;
} }
static gboolean static gboolean
@@ -373,40 +379,64 @@ url_last (int *lstart, int *lend)
} }
static gboolean static gboolean
regex_match (const GRegex *re, const char *word, int *start, int *end) match_has_valid_context (const char *word, int start, int end)
{ {
GMatchInfo *gmi; if (start > 0)
{
char prev = word[start - 1];
if (g_ascii_isalnum ((guchar)prev) || prev == '_' || prev == '-')
return FALSE;
}
g_regex_match (re, word, 0, &gmi); if (word[end] != '\0')
if (!g_match_info_matches (gmi))
{ {
g_match_info_free (gmi); char next = word[end];
return FALSE; if (g_ascii_isalnum ((guchar)next) || next == '_')
return FALSE;
} }
while (g_match_info_matches (gmi))
{
g_match_info_fetch_pos (gmi, 0, start, end);
g_match_info_next (gmi, NULL);
}
g_match_info_free (gmi);
return TRUE; return TRUE;
} }
static gboolean
regex_match (const GRegex *re, const char *word, int *start, int *end)
{
GMatchInfo *gmi;
gboolean found = FALSE;
int mstart;
int mend;
g_regex_match (re, word, 0, &gmi);
while (g_match_info_matches (gmi))
{
g_match_info_fetch_pos (gmi, 0, &mstart, &mend);
if (match_has_valid_context (word, mstart, mend))
{
*start = mstart;
*end = mend;
found = TRUE;
}
g_match_info_next (gmi, NULL);
}
g_match_info_free (gmi);
return found;
}
/* Miscellaneous description --- */ /* Miscellaneous description --- */
#define DOMAIN "[_\\pL\\pN\\pS][-_\\pL\\pN\\pS]*(\\.[-_\\pL\\pN\\pS]+)*" #define DOMAIN_LABEL "[\\pL\\pN](?:[-\\pL\\pN]{0,61}[\\pL\\pN])?"
#define TLD "\\.[\\pL][-\\pL\\pN]*[\\pL]" #define DOMAIN DOMAIN_LABEL "(\\." DOMAIN_LABEL ")*"
#define IPADDR "[0-9]{1,3}(\\.[0-9]{1,3}){3}" #define TLD "\\.[\\pL](?:[-\\pL\\pN]*[\\pL\\pN])?"
#define IPADDR "(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])(\\.(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])){3}"
#define IPV6GROUP "([0-9a-f]{0,4})" #define IPV6GROUP "([0-9a-f]{0,4})"
#define IPV6ADDR "((" IPV6GROUP "(:" IPV6GROUP "){7})" \ #define IPV6ADDR "((" IPV6GROUP "(:" IPV6GROUP "){7})" \
"|(" IPV6GROUP "(:" IPV6GROUP ")*:(:" IPV6GROUP ")+))" /* with :: compression */ "|(" IPV6GROUP "(:" IPV6GROUP ")*:(:" IPV6GROUP ")+))" /* with :: compression */
#define HOST "(" DOMAIN TLD "|" IPADDR "|" IPV6ADDR ")" #define HOST "(" DOMAIN TLD "|" IPADDR "|" IPV6ADDR ")"
/* In urls the IPv6 must be enclosed in square brackets */ /* In urls the IPv6 must be enclosed in square brackets */
#define HOST_URL "(" DOMAIN TLD "|" IPADDR "|" "\\[" IPV6ADDR "\\]" ")" #define HOST_URL "(" DOMAIN TLD "|" IPADDR "|" "\\[" IPV6ADDR "\\]" ")"
#define HOST_URL_OPT_TLD "(" DOMAIN "|" HOST_URL ")" #define HOST_URL_OPT_TLD HOST_URL
#define PORT "(:[1-9][0-9]{0,4})" #define PORT "(:[1-9][0-9]{0,4})"
#define OPT_PORT "(" PORT ")?" #define OPT_PORT "(" PORT ")?"