mirror of
https://github.com/ZoiteChat/zoitechat.git
synced 2026-03-10 16:00:18 +00:00
Fixed highlight word parsing so Unicode symbols (including emoji like 🐜) are treated as part of words when scanning incoming text for alert matches, which enables /Extra words to highlight/ entries containing emoji to work.
Improved token scanning to use UTF-8-aware character classification (gunichar, g_unichar_isdigit, g_unichar_isalpha) instead of byte-only checks, avoiding split/mis-detection on multibyte characters.
This commit is contained in:
@@ -258,6 +258,7 @@ alert_match_text (char *text, char *masks)
|
|||||||
{
|
{
|
||||||
unsigned char *p = text;
|
unsigned char *p = text;
|
||||||
unsigned char endchar;
|
unsigned char endchar;
|
||||||
|
gunichar ch;
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
if (masks[0] == 0)
|
if (masks[0] == 0)
|
||||||
@@ -265,26 +266,36 @@ alert_match_text (char *text, char *masks)
|
|||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
if (*p >= '0' && *p <= '9')
|
ch = g_utf8_get_char (p);
|
||||||
|
|
||||||
|
if (g_unichar_isdigit (ch) || g_unichar_isalpha (ch))
|
||||||
{
|
{
|
||||||
p++;
|
p += g_utf8_skip [p[0]];
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if it's RFC1459 <special>, it can be inside a word */
|
/* if it's RFC1459 <special>, it can be inside a word */
|
||||||
switch (*p)
|
switch (ch)
|
||||||
{
|
{
|
||||||
case '-': case '[': case ']': case '\\':
|
case '-': case '[': case ']': case '\\':
|
||||||
case '`': case '^': case '{': case '}':
|
case '`': case '^': case '{': case '}':
|
||||||
case '_': case '|':
|
case '_': case '|':
|
||||||
p++;
|
p += g_utf8_skip [p[0]];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Symbols (including emoji) can be part of highlighted words. */
|
||||||
|
if (!g_unichar_isspace (ch) && !g_unichar_ispunct (ch) &&
|
||||||
|
!g_unichar_iscntrl (ch))
|
||||||
|
{
|
||||||
|
p += g_utf8_skip [p[0]];
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if it's a 0, space or comma, the word has ended. */
|
/* if it's a 0, space or comma, the word has ended. */
|
||||||
if (*p == 0 || *p == ' ' || *p == ',' ||
|
if (*p == 0 || *p == ' ' || *p == ',' ||
|
||||||
/* if it's anything BUT a letter, the word has ended. */
|
/* if it's anything BUT a letter, the word has ended. */
|
||||||
(!g_unichar_isalpha (g_utf8_get_char (p))))
|
(!g_unichar_isalpha (ch)))
|
||||||
{
|
{
|
||||||
endchar = *p;
|
endchar = *p;
|
||||||
*p = 0;
|
*p = 0;
|
||||||
|
|||||||
Reference in New Issue
Block a user