mirror of
https://github.com/ZoiteChat/zoitechat.git
synced 2026-04-14 15:40:19 +00:00
Compare commits
2 Commits
master
...
domain-url
| Author | SHA1 | Date | |
|---|---|---|---|
| 9b89c7782c | |||
| 8f7c40caf1 |
@@ -29,6 +29,7 @@
|
|||||||
<ClInclude Include="plugin-timer.h" />
|
<ClInclude Include="plugin-timer.h" />
|
||||||
<ClInclude Include="plugin.h" />
|
<ClInclude Include="plugin.h" />
|
||||||
<ClInclude Include="proto-irc.h" />
|
<ClInclude Include="proto-irc.h" />
|
||||||
|
<ClInclude Include="public_suffix_data.h" />
|
||||||
<ClInclude Include="server.h" />
|
<ClInclude Include="server.h" />
|
||||||
<ClInclude Include="servlist.h" />
|
<ClInclude Include="servlist.h" />
|
||||||
<ClInclude Include="ssl.h" />
|
<ClInclude Include="ssl.h" />
|
||||||
@@ -109,6 +110,7 @@
|
|||||||
<Command><![CDATA[
|
<Command><![CDATA[
|
||||||
SET SOLUTIONDIR=$(SolutionDir)..\
|
SET SOLUTIONDIR=$(SolutionDir)..\
|
||||||
"$(Python3Path)\python.exe" $(ProjectDir)make-te.py "$(ProjectDir)textevents.in" "$(ZoiteChatLib)textevents.h" "$(ZoiteChatLib)textenums.h"
|
"$(Python3Path)\python.exe" $(ProjectDir)make-te.py "$(ProjectDir)textevents.in" "$(ZoiteChatLib)textevents.h" "$(ZoiteChatLib)textenums.h"
|
||||||
|
"$(Python3Path)\python.exe" $(ProjectDir)gen-public-suffix.py "$(ZoiteChatLib)public_suffix_data.h"
|
||||||
powershell -File "$(SolutionDir)..\win32\version-template.ps1" "$(SolutionDir)..\win32\config.h.tt" "$(ZoiteChatLib)config.h"
|
powershell -File "$(SolutionDir)..\win32\version-template.ps1" "$(SolutionDir)..\win32\config.h.tt" "$(ZoiteChatLib)config.h"
|
||||||
$(GlibGenMarshal) --prefix=_zoitechat_marshal --header "$(ProjectDir)marshalers.list" --output "$(ZoiteChatLib)marshal.h"
|
$(GlibGenMarshal) --prefix=_zoitechat_marshal --header "$(ProjectDir)marshalers.list" --output "$(ZoiteChatLib)marshal.h"
|
||||||
$(GlibGenMarshal) --prefix=_zoitechat_marshal --body "$(ProjectDir)marshalers.list" --output "$(ZoiteChatLib)marshal.c"
|
$(GlibGenMarshal) --prefix=_zoitechat_marshal --body "$(ProjectDir)marshalers.list" --output "$(ZoiteChatLib)marshal.c"
|
||||||
|
|||||||
@@ -65,6 +65,9 @@
|
|||||||
<ClInclude Include="proto-irc.h">
|
<ClInclude Include="proto-irc.h">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="public_suffix_data.h">
|
||||||
|
<Filter>Header Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
<ClInclude Include="server.h">
|
<ClInclude Include="server.h">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
|||||||
71
src/common/gen-public-suffix.py
Normal file
71
src/common/gen-public-suffix.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
URLS = (
|
||||||
|
"https://raw.githubusercontent.com/publicsuffix/list/main/public_suffix_list.dat",
|
||||||
|
"https://publicsuffix.org/list/public_suffix_list.dat",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_rules(text: str):
|
||||||
|
rules = []
|
||||||
|
for raw in text.splitlines():
|
||||||
|
line = raw.strip()
|
||||||
|
if not line or line.startswith("//"):
|
||||||
|
continue
|
||||||
|
if " " in line or "\t" in line:
|
||||||
|
line = line.split()[0]
|
||||||
|
rules.append(line.lower())
|
||||||
|
return sorted(set(rules))
|
||||||
|
|
||||||
|
|
||||||
|
def emit_header(path: str, rules):
|
||||||
|
with open(path, "w", encoding="utf-8", newline="\n") as out:
|
||||||
|
out.write("#pragma once\n")
|
||||||
|
out.write("static const char * const public_suffix_rules[] = {\n")
|
||||||
|
for rule in rules:
|
||||||
|
escaped = rule.replace("\\", "\\\\").replace('"', '\\"')
|
||||||
|
out.write(f'\t"{escaped}",\n')
|
||||||
|
out.write("};\n")
|
||||||
|
out.write(
|
||||||
|
"static const unsigned int public_suffix_rules_len = sizeof(public_suffix_rules) / sizeof(public_suffix_rules[0]);\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) not in (2, 3):
|
||||||
|
raise SystemExit("usage: gen-public-suffix.py <output> [source]")
|
||||||
|
output = Path(sys.argv[1])
|
||||||
|
sources = []
|
||||||
|
if len(sys.argv) == 3:
|
||||||
|
sources.append(Path(sys.argv[2]))
|
||||||
|
sources.extend(
|
||||||
|
[
|
||||||
|
Path(__file__).with_name("public_suffix_list.dat"),
|
||||||
|
Path("/usr/share/publicsuffix/public_suffix_list.dat"),
|
||||||
|
Path("/app/share/publicsuffix/public_suffix_list.dat"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
data = None
|
||||||
|
for url in URLS:
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(url, timeout=30) as resp:
|
||||||
|
data = resp.read().decode("utf-8")
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if data is None:
|
||||||
|
for source in sources:
|
||||||
|
if source.exists():
|
||||||
|
data = source.read_text(encoding="utf-8")
|
||||||
|
break
|
||||||
|
if data is None:
|
||||||
|
raise SystemExit("unable to load public suffix list")
|
||||||
|
rules = parse_rules(data)
|
||||||
|
emit_header(str(output), rules)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -96,6 +96,15 @@ marshal = [
|
|||||||
|
|
||||||
make_te = find_program('make-te.py')
|
make_te = find_program('make-te.py')
|
||||||
|
|
||||||
|
|
||||||
|
python3 = find_program('python3', required: true)
|
||||||
|
|
||||||
|
public_suffix_data = custom_target('public_suffix_data_h',
|
||||||
|
input: 'public_suffix_list.dat',
|
||||||
|
output: 'public_suffix_data.h',
|
||||||
|
command: [python3, files('gen-public-suffix.py'), '@OUTPUT@', '@INPUT@']
|
||||||
|
)
|
||||||
|
|
||||||
textevents = custom_target('textevents',
|
textevents = custom_target('textevents',
|
||||||
input: 'textevents.in',
|
input: 'textevents.in',
|
||||||
output: ['textevents.h', 'textenums.h'],
|
output: ['textevents.h', 'textenums.h'],
|
||||||
@@ -119,7 +128,7 @@ if get_option('plugin')
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
zoitechat_common = static_library('zoitechatcommon',
|
zoitechat_common = static_library('zoitechatcommon',
|
||||||
sources: [textevents] + marshal + common_sources,
|
sources: [textevents, public_suffix_data] + marshal + common_sources,
|
||||||
include_directories: config_h_include,
|
include_directories: config_h_include,
|
||||||
dependencies: common_deps + common_sysinfo_deps,
|
dependencies: common_deps + common_sysinfo_deps,
|
||||||
c_args: common_cflags,
|
c_args: common_cflags,
|
||||||
@@ -127,7 +136,7 @@ zoitechat_common = static_library('zoitechatcommon',
|
|||||||
)
|
)
|
||||||
|
|
||||||
zoitechat_common_dep = declare_dependency(
|
zoitechat_common_dep = declare_dependency(
|
||||||
sources: [textevents] + marshal,
|
sources: [textevents, public_suffix_data] + marshal,
|
||||||
link_with: zoitechat_common,
|
link_with: zoitechat_common,
|
||||||
include_directories: common_includes,
|
include_directories: common_includes,
|
||||||
compile_args: common_cflags,
|
compile_args: common_cflags,
|
||||||
|
|||||||
146
src/common/url.c
146
src/common/url.c
@@ -20,12 +20,14 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <glib.h>
|
||||||
#include "zoitechat.h"
|
#include "zoitechat.h"
|
||||||
#include "zoitechatc.h"
|
#include "zoitechatc.h"
|
||||||
#include "cfgfiles.h"
|
#include "cfgfiles.h"
|
||||||
#include "fe.h"
|
#include "fe.h"
|
||||||
#include "tree.h"
|
#include "tree.h"
|
||||||
#include "url.h"
|
#include "url.h"
|
||||||
|
#include "public_suffix_data.h"
|
||||||
#ifdef HAVE_STRINGS_H
|
#ifdef HAVE_STRINGS_H
|
||||||
#include <strings.h>
|
#include <strings.h>
|
||||||
#endif
|
#endif
|
||||||
@@ -35,6 +37,7 @@ GTree *url_btree = NULL;
|
|||||||
static gboolean regex_match (const GRegex *re, const char *word,
|
static gboolean regex_match (const GRegex *re, const char *word,
|
||||||
int *start, int *end);
|
int *start, int *end);
|
||||||
static const GRegex *re_url (void);
|
static const GRegex *re_url (void);
|
||||||
|
static const GRegex *re_url_no_scheme (void);
|
||||||
static const GRegex *re_email (void);
|
static const GRegex *re_email (void);
|
||||||
static const GRegex *re_nick (void);
|
static const GRegex *re_nick (void);
|
||||||
static const GRegex *re_channel (void);
|
static const GRegex *re_channel (void);
|
||||||
@@ -42,6 +45,8 @@ static gboolean match_nick (const char *word, int *start, int *end);
|
|||||||
static gboolean match_channel (const char *word, int *start, int *end);
|
static gboolean match_channel (const char *word, int *start, int *end);
|
||||||
static gboolean match_url (const char *word, int *start, int *end);
|
static gboolean match_url (const char *word, int *start, int *end);
|
||||||
static gboolean match_email (const char *word, int *start, int *end);
|
static gboolean match_email (const char *word, int *start, int *end);
|
||||||
|
static gboolean host_has_public_suffix (const char *host);
|
||||||
|
static gboolean host_has_public_suffix_range (const char *word, int start, int end);
|
||||||
|
|
||||||
static int
|
static int
|
||||||
url_free (char *url, void *data)
|
url_free (char *url, void *data)
|
||||||
@@ -266,7 +271,16 @@ match_channel (const char *word, int *start, int *end)
|
|||||||
static gboolean
|
static gboolean
|
||||||
match_url (const char *word, int *start, int *end)
|
match_url (const char *word, int *start, int *end)
|
||||||
{
|
{
|
||||||
return regex_match (re_url (), word, start, end);
|
if (regex_match (re_url (), word, start, end))
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
if (!regex_match (re_url_no_scheme (), word, start, end))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
if (*start > 0 && word[*start - 1] == '@')
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
return host_has_public_suffix_range (word, *start, *end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
@@ -393,6 +407,114 @@ regex_match (const GRegex *re, const char *word, int *start, int *end)
|
|||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
host_has_public_suffix_range (const char *word, int start, int end)
|
||||||
|
{
|
||||||
|
char *candidate;
|
||||||
|
const char *host_start;
|
||||||
|
const char *host_end;
|
||||||
|
const char *host_colon;
|
||||||
|
gboolean ok;
|
||||||
|
int host_len;
|
||||||
|
char *host;
|
||||||
|
|
||||||
|
candidate = g_strndup (word + start, end - start);
|
||||||
|
host_start = candidate;
|
||||||
|
host_end = candidate + strlen (candidate);
|
||||||
|
if (*host_start == '[')
|
||||||
|
{
|
||||||
|
g_free (candidate);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
host_colon = strchr (host_start, ':');
|
||||||
|
if (host_colon)
|
||||||
|
host_end = host_colon;
|
||||||
|
host_colon = strchr (host_start, '/');
|
||||||
|
if (host_colon && host_colon < host_end)
|
||||||
|
host_end = host_colon;
|
||||||
|
host_len = (int)(host_end - host_start);
|
||||||
|
if (host_len <= 0)
|
||||||
|
{
|
||||||
|
g_free (candidate);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
host = g_strndup (host_start, host_len);
|
||||||
|
ok = host_has_public_suffix (host);
|
||||||
|
g_free (host);
|
||||||
|
g_free (candidate);
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
static GHashTable *
|
||||||
|
public_suffix_table (void)
|
||||||
|
{
|
||||||
|
static GHashTable *table = NULL;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
if (table)
|
||||||
|
return table;
|
||||||
|
|
||||||
|
table = g_hash_table_new (g_str_hash, g_str_equal);
|
||||||
|
for (i = 0; i < public_suffix_rules_len; i++)
|
||||||
|
{
|
||||||
|
g_hash_table_add (table, (gpointer)public_suffix_rules[i]);
|
||||||
|
}
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
host_has_public_suffix (const char *host)
|
||||||
|
{
|
||||||
|
GHashTable *table;
|
||||||
|
gchar **labels;
|
||||||
|
int i;
|
||||||
|
int n;
|
||||||
|
gboolean matched = FALSE;
|
||||||
|
|
||||||
|
if (!strchr (host, '.'))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
labels = g_strsplit (host, ".", -1);
|
||||||
|
for (n = 0; labels[n]; n++)
|
||||||
|
{
|
||||||
|
if (labels[n][0] == '\0')
|
||||||
|
{
|
||||||
|
g_strfreev (labels);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
table = public_suffix_table ();
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
char *tail = g_strjoinv (".", &labels[i]);
|
||||||
|
if (g_hash_table_contains (table, tail))
|
||||||
|
matched = TRUE;
|
||||||
|
if (i + 1 < n)
|
||||||
|
{
|
||||||
|
char *tail_wild = g_strjoinv (".", &labels[i + 1]);
|
||||||
|
char *wild = g_strconcat ("*.", tail_wild, NULL);
|
||||||
|
if (g_hash_table_contains (table, wild))
|
||||||
|
matched = TRUE;
|
||||||
|
g_free (tail_wild);
|
||||||
|
g_free (wild);
|
||||||
|
}
|
||||||
|
if (i > 0)
|
||||||
|
{
|
||||||
|
char *exc = g_strconcat ("!", tail, NULL);
|
||||||
|
if (g_hash_table_contains (table, exc))
|
||||||
|
matched = TRUE;
|
||||||
|
g_free (exc);
|
||||||
|
}
|
||||||
|
g_free (tail);
|
||||||
|
if (matched)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_strfreev (labels);
|
||||||
|
return matched;
|
||||||
|
}
|
||||||
|
|
||||||
/* Miscellaneous description --- */
|
/* Miscellaneous description --- */
|
||||||
#define DOMAIN_LABEL "[\\pL\\pN](?:[-\\pL\\pN]{0,61}[\\pL\\pN])?"
|
#define DOMAIN_LABEL "[\\pL\\pN](?:[-\\pL\\pN]{0,61}[\\pL\\pN])?"
|
||||||
#define DOMAIN DOMAIN_LABEL "(\\." DOMAIN_LABEL ")*"
|
#define DOMAIN DOMAIN_LABEL "(\\." DOMAIN_LABEL ")*"
|
||||||
@@ -477,6 +599,28 @@ re_url (void)
|
|||||||
return url_ret;
|
return url_ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const GRegex *
|
||||||
|
re_url_no_scheme (void)
|
||||||
|
{
|
||||||
|
static GRegex *url_ret = NULL;
|
||||||
|
GString *grist_gstr;
|
||||||
|
char *grist;
|
||||||
|
|
||||||
|
if (url_ret) return url_ret;
|
||||||
|
|
||||||
|
grist_gstr = g_string_new (NULL);
|
||||||
|
g_string_append (grist_gstr, "(");
|
||||||
|
g_string_append (grist_gstr, HOST_URL_OPT_TLD OPT_PORT);
|
||||||
|
g_string_append_printf (grist_gstr, "(/" PATH ")?");
|
||||||
|
g_string_append (grist_gstr, ")");
|
||||||
|
|
||||||
|
grist = g_string_free (grist_gstr, FALSE);
|
||||||
|
url_ret = make_re (grist);
|
||||||
|
g_free (grist);
|
||||||
|
|
||||||
|
return url_ret;
|
||||||
|
}
|
||||||
|
|
||||||
#define EMAIL_LOCAL_ATOM "[\\pL\\pN!#$%&'*+/=?^_`{|}~-]+"
|
#define EMAIL_LOCAL_ATOM "[\\pL\\pN!#$%&'*+/=?^_`{|}~-]+"
|
||||||
#define EMAIL_LOCAL EMAIL_LOCAL_ATOM "(\\." EMAIL_LOCAL_ATOM ")*"
|
#define EMAIL_LOCAL EMAIL_LOCAL_ATOM "(\\." EMAIL_LOCAL_ATOM ")*"
|
||||||
#define EMAIL EMAIL_LOCAL "@" DOMAIN TLD
|
#define EMAIL EMAIL_LOCAL "@" DOMAIN TLD
|
||||||
|
|||||||
Reference in New Issue
Block a user