2011-02-24 04:14:30 +01:00
|
|
|
/* X-Chat
|
|
|
|
|
* Copyright (C) 1998 Peter Zelezny.
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program; if not, write to the Free Software
|
2013-01-02 14:58:26 -08:00
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
2011-02-24 04:14:30 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <ctype.h>
|
2026-01-05 23:12:38 -07:00
|
|
|
#include "zoitechat.h"
|
|
|
|
|
#include "zoitechatc.h"
|
2011-02-24 04:14:30 +01:00
|
|
|
#include "cfgfiles.h"
|
|
|
|
|
#include "fe.h"
|
|
|
|
|
#include "tree.h"
|
|
|
|
|
#include "url.h"
|
|
|
|
|
#ifdef HAVE_STRINGS_H
|
|
|
|
|
#include <strings.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
void *url_tree = NULL;
|
2012-10-01 12:53:25 -07:00
|
|
|
GTree *url_btree = NULL;
|
2013-08-01 16:20:04 +01:00
|
|
|
static gboolean regex_match (const GRegex *re, const char *word,
|
|
|
|
|
int *start, int *end);
|
|
|
|
|
static const GRegex *re_url (void);
|
|
|
|
|
static const GRegex *re_nick (void);
|
|
|
|
|
static const GRegex *re_channel (void);
|
|
|
|
|
static gboolean match_nick (const char *word, int *start, int *end);
|
|
|
|
|
static gboolean match_channel (const char *word, int *start, int *end);
|
|
|
|
|
static gboolean match_url (const char *word, int *start, int *end);
|
2011-02-24 04:14:30 +01:00
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
url_free (char *url, void *data)
|
|
|
|
|
{
|
2014-12-28 06:37:25 -05:00
|
|
|
g_free (url);
|
2011-02-24 04:14:30 +01:00
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
url_clear (void)
|
|
|
|
|
{
|
|
|
|
|
tree_foreach (url_tree, (tree_traverse_func *)url_free, NULL);
|
|
|
|
|
tree_destroy (url_tree);
|
|
|
|
|
url_tree = NULL;
|
2012-10-01 12:53:25 -07:00
|
|
|
g_tree_destroy (url_btree);
|
|
|
|
|
url_btree = NULL;
|
2011-02-24 04:14:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
url_save_cb (char *url, FILE *fd)
|
|
|
|
|
{
|
|
|
|
|
fprintf (fd, "%s\n", url);
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
2012-10-13 10:03:39 +02:00
|
|
|
url_save_tree (const char *fname, const char *mode, gboolean fullpath)
|
2011-02-24 04:14:30 +01:00
|
|
|
{
|
|
|
|
|
FILE *fd;
|
|
|
|
|
|
|
|
|
|
if (fullpath)
|
2026-01-05 23:12:38 -07:00
|
|
|
fd = zoitechat_fopen_file (fname, mode, XOF_FULLPATH);
|
2011-02-24 04:14:30 +01:00
|
|
|
else
|
2026-01-05 23:12:38 -07:00
|
|
|
fd = zoitechat_fopen_file (fname, mode, 0);
|
2011-02-24 04:14:30 +01:00
|
|
|
if (fd == NULL)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
tree_foreach (url_tree, (tree_traverse_func *)url_save_cb, fd);
|
|
|
|
|
fclose (fd);
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-13 10:03:39 +02:00
|
|
|
static void
|
|
|
|
|
url_save_node (char* url)
|
2011-02-24 04:14:30 +01:00
|
|
|
{
|
2012-10-13 10:03:39 +02:00
|
|
|
FILE *fd;
|
|
|
|
|
|
2026-01-05 23:12:38 -07:00
|
|
|
fd = zoitechat_fopen_file ("url.log", "a", 0);
|
2012-10-13 10:03:39 +02:00
|
|
|
if (fd == NULL)
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fprintf (fd, "%s\n", url);
|
|
|
|
|
fclose (fd);
|
2011-02-24 04:14:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
url_find (char *urltext)
|
|
|
|
|
{
|
2012-10-01 12:53:25 -07:00
|
|
|
return (g_tree_lookup_extended (url_btree, urltext, NULL, NULL));
|
2011-02-24 04:14:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
url_add (char *urltext, int len)
|
|
|
|
|
{
|
2012-03-15 23:58:52 +01:00
|
|
|
char *data;
|
|
|
|
|
int size;
|
|
|
|
|
|
2012-10-22 15:55:43 +02:00
|
|
|
if (!prefs.hex_url_grabber && !prefs.hex_url_logging)
|
2012-10-13 10:03:39 +02:00
|
|
|
{
|
2012-03-15 23:58:52 +01:00
|
|
|
return;
|
2012-10-13 10:03:39 +02:00
|
|
|
}
|
2012-03-15 23:58:52 +01:00
|
|
|
|
2014-12-28 06:37:25 -05:00
|
|
|
data = g_strndup (urltext, len);
|
2011-02-24 04:14:30 +01:00
|
|
|
|
2026-02-25 00:23:42 -07:00
|
|
|
if (data[len - 1] == '.')
|
2011-02-24 04:14:30 +01:00
|
|
|
{
|
|
|
|
|
len--;
|
|
|
|
|
data[len] = 0;
|
|
|
|
|
}
|
2012-10-07 13:27:44 +02:00
|
|
|
if (data[len - 1] == ')' && strchr (data, '(') == NULL)
|
2012-10-13 10:03:39 +02:00
|
|
|
{
|
2011-02-24 04:14:30 +01:00
|
|
|
data[len - 1] = 0;
|
2012-10-13 10:03:39 +02:00
|
|
|
}
|
|
|
|
|
|
2012-10-22 15:55:43 +02:00
|
|
|
if (prefs.hex_url_logging)
|
2012-10-13 10:03:39 +02:00
|
|
|
{
|
|
|
|
|
url_save_node (data);
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-22 15:55:43 +02:00
|
|
|
if (!prefs.hex_url_grabber)
|
2012-10-13 10:03:39 +02:00
|
|
|
{
|
2014-12-28 06:37:25 -05:00
|
|
|
g_free (data);
|
2012-10-13 10:03:39 +02:00
|
|
|
return;
|
|
|
|
|
}
|
2011-02-24 04:14:30 +01:00
|
|
|
|
2012-10-01 12:53:25 -07:00
|
|
|
if (!url_tree)
|
|
|
|
|
{
|
|
|
|
|
url_tree = tree_new ((tree_cmp_func *)strcasecmp, NULL);
|
|
|
|
|
url_btree = g_tree_new ((GCompareFunc)strcasecmp);
|
|
|
|
|
}
|
|
|
|
|
|
2011-02-24 04:14:30 +01:00
|
|
|
if (url_find (data))
|
|
|
|
|
{
|
2014-12-28 06:37:25 -05:00
|
|
|
g_free (data);
|
2011-02-24 04:14:30 +01:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2012-03-15 23:58:52 +01:00
|
|
|
size = tree_size (url_tree);
|
2012-10-22 15:55:43 +02:00
|
|
|
if (prefs.hex_url_grabber_limit > 0 && size >= prefs.hex_url_grabber_limit)
|
2012-03-15 23:58:52 +01:00
|
|
|
{
|
2012-10-22 15:55:43 +02:00
|
|
|
size -= prefs.hex_url_grabber_limit;
|
2012-03-15 23:58:52 +01:00
|
|
|
for(; size > 0; size--)
|
2012-10-01 12:53:25 -07:00
|
|
|
{
|
|
|
|
|
char *pos;
|
|
|
|
|
|
|
|
|
|
pos = tree_remove_at_pos (url_tree, 0);
|
|
|
|
|
g_tree_remove (url_btree, pos);
|
2014-12-28 06:37:25 -05:00
|
|
|
g_free (pos);
|
2012-10-01 12:53:25 -07:00
|
|
|
}
|
2012-03-15 23:58:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tree_append (url_tree, data);
|
2012-10-01 12:53:25 -07:00
|
|
|
g_tree_insert (url_btree, data, GINT_TO_POINTER (tree_size (url_tree) - 1));
|
2011-02-24 04:14:30 +01:00
|
|
|
fe_url_add (data);
|
|
|
|
|
}
|
|
|
|
|
|
2013-01-02 14:50:26 -08:00
|
|
|
static int laststart = 0;
|
|
|
|
|
static int lastend = 0;
|
|
|
|
|
static int lasttype = 0;
|
|
|
|
|
|
2013-08-01 16:20:04 +01:00
|
|
|
#define NICKPRE "~+!@%&"
|
|
|
|
|
#define CHANPRE "#&!+"
|
2013-01-11 01:39:21 -08:00
|
|
|
|
2011-02-24 04:14:30 +01:00
|
|
|
int
|
2013-01-02 14:50:26 -08:00
|
|
|
url_check_word (const char *word)
|
2011-02-24 04:14:30 +01:00
|
|
|
{
|
2013-08-01 16:20:04 +01:00
|
|
|
struct {
|
|
|
|
|
gboolean (*match) (const char *word, int *start, int *end);
|
|
|
|
|
int type;
|
|
|
|
|
} m[] = {
|
2013-08-01 16:39:49 +01:00
|
|
|
{ match_url, WORD_URL },
|
|
|
|
|
{ match_channel, WORD_CHANNEL },
|
2013-10-09 10:09:51 -04:00
|
|
|
{ match_nick, WORD_NICK },
|
2013-08-01 16:20:04 +01:00
|
|
|
{ NULL, 0}
|
|
|
|
|
};
|
|
|
|
|
int i;
|
|
|
|
|
|
2013-01-02 14:50:26 -08:00
|
|
|
laststart = lastend = lasttype = 0;
|
2013-08-01 16:20:04 +01:00
|
|
|
|
|
|
|
|
for (i = 0; m[i].match; i++)
|
|
|
|
|
if (m[i].match (word, &laststart, &lastend))
|
2011-02-24 04:14:30 +01:00
|
|
|
{
|
2013-08-01 16:20:04 +01:00
|
|
|
lasttype = m[i].type;
|
|
|
|
|
return lasttype;
|
2011-02-24 04:14:30 +01:00
|
|
|
}
|
2013-08-01 16:20:04 +01:00
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
match_nick (const char *word, int *start, int *end)
|
|
|
|
|
{
|
|
|
|
|
const server *serv = current_sess->server;
|
|
|
|
|
const char *nick_prefixes = serv ? serv->nick_prefixes : NICKPRE;
|
|
|
|
|
char *str;
|
|
|
|
|
|
|
|
|
|
if (!regex_match (re_nick (), word, start, end))
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
|
|
if (strchr (NICKPRE, word[*start])
|
|
|
|
|
&& !strchr (nick_prefixes, word[*start]))
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
|
|
/* nick prefix is not part of the matched word */
|
|
|
|
|
if (strchr (nick_prefixes, word[*start]))
|
|
|
|
|
(*start)++;
|
|
|
|
|
|
|
|
|
|
str = g_strndup (&word[*start], *end - *start);
|
|
|
|
|
|
|
|
|
|
if (!userlist_find (current_sess, str))
|
2013-08-02 02:45:54 +01:00
|
|
|
{
|
|
|
|
|
g_free (str);
|
2013-08-01 16:20:04 +01:00
|
|
|
return FALSE;
|
2013-08-02 02:45:54 +01:00
|
|
|
}
|
2013-08-01 16:20:04 +01:00
|
|
|
|
|
|
|
|
g_free (str);
|
|
|
|
|
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
match_channel (const char *word, int *start, int *end)
|
|
|
|
|
{
|
|
|
|
|
const server *serv = current_sess->server;
|
|
|
|
|
const char *chan_prefixes = serv ? serv->chantypes : CHANPRE;
|
2013-09-23 14:58:11 -04:00
|
|
|
const char *nick_prefixes = serv ? serv->nick_prefixes : NICKPRE;
|
2013-08-01 16:20:04 +01:00
|
|
|
|
|
|
|
|
if (!regex_match (re_channel (), word, start, end))
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
2013-09-23 14:58:11 -04:00
|
|
|
/* Check for +#channel (for example whois output) */
|
|
|
|
|
if (strchr (nick_prefixes, word[*start]) != NULL
|
|
|
|
|
&& strchr (chan_prefixes, word[*start + 1]) != NULL)
|
2013-09-23 15:10:00 -04:00
|
|
|
{
|
|
|
|
|
(*start)++;
|
2013-09-23 14:58:11 -04:00
|
|
|
return TRUE;
|
2013-09-23 15:10:00 -04:00
|
|
|
}
|
2013-09-23 14:58:11 -04:00
|
|
|
/* Or just #channel */
|
|
|
|
|
else if (strchr (chan_prefixes, word[*start]) != NULL)
|
|
|
|
|
return TRUE;
|
|
|
|
|
|
|
|
|
|
return FALSE;
|
2013-08-01 16:20:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
|
|
|
match_url (const char *word, int *start, int *end)
|
|
|
|
|
{
|
2026-03-23 13:50:02 -06:00
|
|
|
return regex_match (re_url (), word, start, end);
|
2011-02-24 04:14:30 +01:00
|
|
|
}
|
|
|
|
|
|
2012-11-13 12:06:35 -08:00
|
|
|
/* List of IRC commands for which contents (and thus possible URLs)
|
|
|
|
|
* are visible to the user. NOTE: Trailing blank required in each. */
|
|
|
|
|
static char *commands[] = {
|
|
|
|
|
"NOTICE ",
|
|
|
|
|
"PRIVMSG ",
|
|
|
|
|
"TOPIC ",
|
|
|
|
|
"332 ", /* RPL_TOPIC */
|
|
|
|
|
"372 " /* RPL_MOTD */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
|
|
|
|
|
|
2011-02-24 04:14:30 +01:00
|
|
|
void
|
2014-12-15 10:25:28 -08:00
|
|
|
url_check_line (char *buf)
|
2011-02-24 04:14:30 +01:00
|
|
|
{
|
2013-01-02 14:50:26 -08:00
|
|
|
GRegex *re(void);
|
|
|
|
|
GMatchInfo *gmi;
|
2011-02-24 04:14:30 +01:00
|
|
|
char *po = buf;
|
2024-01-28 19:33:45 -08:00
|
|
|
size_t i;
|
2012-11-13 12:06:35 -08:00
|
|
|
|
|
|
|
|
/* Skip over message prefix */
|
|
|
|
|
if (*po == ':')
|
|
|
|
|
{
|
|
|
|
|
po = strchr (po, ' ');
|
|
|
|
|
if (!po)
|
|
|
|
|
return;
|
|
|
|
|
po++;
|
|
|
|
|
}
|
|
|
|
|
/* Allow only commands from the above list */
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE (commands); i++)
|
|
|
|
|
{
|
|
|
|
|
char *cmd = commands[i];
|
|
|
|
|
int len = strlen (cmd);
|
|
|
|
|
|
|
|
|
|
if (strncmp (cmd, po, len) == 0)
|
|
|
|
|
{
|
|
|
|
|
po += len;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (i == ARRAY_SIZE (commands))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* Skip past the channel name or user nick */
|
|
|
|
|
po = strchr (po, ' ');
|
|
|
|
|
if (!po)
|
|
|
|
|
return;
|
|
|
|
|
po++;
|
2011-02-24 04:14:30 +01:00
|
|
|
|
2013-01-02 14:50:26 -08:00
|
|
|
g_regex_match(re_url(), po, 0, &gmi);
|
|
|
|
|
while (g_match_info_matches(gmi))
|
|
|
|
|
{
|
|
|
|
|
int start, end;
|
|
|
|
|
|
|
|
|
|
g_match_info_fetch_pos(gmi, 0, &start, &end);
|
2013-03-29 13:11:17 -07:00
|
|
|
while (end > start && (po[end - 1] == '\r' || po[end - 1] == '\n'))
|
2013-03-31 12:14:52 -07:00
|
|
|
end--;
|
2013-10-21 04:25:36 +01:00
|
|
|
url_add(po + start, end - start);
|
2013-01-02 14:50:26 -08:00
|
|
|
g_match_info_next(gmi, NULL);
|
|
|
|
|
}
|
|
|
|
|
g_match_info_free(gmi);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
url_last (int *lstart, int *lend)
|
|
|
|
|
{
|
|
|
|
|
*lstart = laststart;
|
|
|
|
|
*lend = lastend;
|
|
|
|
|
return lasttype;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-16 10:31:49 -06:00
|
|
|
static gboolean
|
|
|
|
|
match_has_valid_context (const char *word, int start, int end)
|
|
|
|
|
{
|
|
|
|
|
if (start > 0)
|
|
|
|
|
{
|
|
|
|
|
char prev = word[start - 1];
|
|
|
|
|
if (g_ascii_isalnum ((guchar)prev) || prev == '_' || prev == '-')
|
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (word[end] != '\0')
|
|
|
|
|
{
|
|
|
|
|
char next = word[end];
|
|
|
|
|
if (g_ascii_isalnum ((guchar)next) || next == '_')
|
|
|
|
|
return FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
2013-08-01 16:20:04 +01:00
|
|
|
static gboolean
|
|
|
|
|
regex_match (const GRegex *re, const char *word, int *start, int *end)
|
2013-01-02 14:50:26 -08:00
|
|
|
{
|
|
|
|
|
GMatchInfo *gmi;
|
2026-03-16 10:31:49 -06:00
|
|
|
gboolean found = FALSE;
|
|
|
|
|
int mstart;
|
|
|
|
|
int mend;
|
2011-02-24 04:14:30 +01:00
|
|
|
|
2013-08-01 16:20:04 +01:00
|
|
|
g_regex_match (re, word, 0, &gmi);
|
2026-03-16 10:31:49 -06:00
|
|
|
|
2013-08-01 16:20:04 +01:00
|
|
|
while (g_match_info_matches (gmi))
|
|
|
|
|
{
|
2026-03-16 10:31:49 -06:00
|
|
|
g_match_info_fetch_pos (gmi, 0, &mstart, &mend);
|
|
|
|
|
if (match_has_valid_context (word, mstart, mend))
|
|
|
|
|
{
|
|
|
|
|
*start = mstart;
|
|
|
|
|
*end = mend;
|
|
|
|
|
found = TRUE;
|
|
|
|
|
}
|
2013-08-01 16:20:04 +01:00
|
|
|
g_match_info_next (gmi, NULL);
|
|
|
|
|
}
|
2026-03-16 10:31:49 -06:00
|
|
|
|
2013-08-01 16:20:04 +01:00
|
|
|
g_match_info_free (gmi);
|
2026-03-16 10:31:49 -06:00
|
|
|
|
|
|
|
|
return found;
|
2013-01-02 14:50:26 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Miscellaneous description --- */
|
2026-03-16 10:31:49 -06:00
|
|
|
#define DOMAIN_LABEL "[\\pL\\pN](?:[-\\pL\\pN]{0,61}[\\pL\\pN])?"
|
|
|
|
|
#define DOMAIN DOMAIN_LABEL "(\\." DOMAIN_LABEL ")*"
|
|
|
|
|
#define TLD "\\.[\\pL](?:[-\\pL\\pN]*[\\pL\\pN])?"
|
|
|
|
|
#define IPADDR "(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])(\\.(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])){3}"
|
2013-06-16 20:16:58 +01:00
|
|
|
#define IPV6GROUP "([0-9a-f]{0,4})"
|
|
|
|
|
#define IPV6ADDR "((" IPV6GROUP "(:" IPV6GROUP "){7})" \
|
|
|
|
|
"|(" IPV6GROUP "(:" IPV6GROUP ")*:(:" IPV6GROUP ")+))" /* with :: compression */
|
2013-06-16 02:57:38 +01:00
|
|
|
#define HOST "(" DOMAIN TLD "|" IPADDR "|" IPV6ADDR ")"
|
2013-06-17 22:43:18 +01:00
|
|
|
/* In urls the IPv6 must be enclosed in square brackets */
|
|
|
|
|
#define HOST_URL "(" DOMAIN TLD "|" IPADDR "|" "\\[" IPV6ADDR "\\]" ")"
|
2026-03-16 10:31:49 -06:00
|
|
|
#define HOST_URL_OPT_TLD HOST_URL
|
2013-06-18 12:01:39 +01:00
|
|
|
#define PORT "(:[1-9][0-9]{0,4})"
|
|
|
|
|
#define OPT_PORT "(" PORT ")?"
|
2013-01-02 14:50:26 -08:00
|
|
|
|
2013-10-21 04:22:25 +01:00
|
|
|
static GRegex *
|
2014-07-19 13:36:13 +10:00
|
|
|
make_re (const char *grist)
|
2013-01-02 14:50:26 -08:00
|
|
|
{
|
|
|
|
|
GRegex *ret;
|
|
|
|
|
GError *err = NULL;
|
|
|
|
|
|
2013-06-16 15:36:52 +01:00
|
|
|
ret = g_regex_new (grist, G_REGEX_CASELESS | G_REGEX_OPTIMIZE, 0, &err);
|
2013-10-21 04:22:25 +01:00
|
|
|
|
2013-01-02 14:50:26 -08:00
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* URL description --- */
|
|
|
|
|
#define LPAR "\\("
|
|
|
|
|
#define RPAR "\\)"
|
|
|
|
|
#define NOPARENS "[^() \t]*"
|
2013-06-16 15:36:52 +01:00
|
|
|
#define PATH \
|
|
|
|
|
"(" \
|
|
|
|
|
"(" LPAR NOPARENS RPAR ")" \
|
|
|
|
|
"|" \
|
|
|
|
|
"(" NOPARENS ")" \
|
|
|
|
|
")*" /* Zero or more occurrences of either of these */ \
|
|
|
|
|
"(?<![.,?!\\]])" /* Not allowed to end with these */
|
|
|
|
|
struct
|
|
|
|
|
{
|
2026-03-23 13:50:02 -06:00
|
|
|
const char *scheme;
|
2013-06-16 15:36:52 +01:00
|
|
|
} uri[] = {
|
2026-03-23 13:50:02 -06:00
|
|
|
{ "http" },
|
|
|
|
|
{ "https" },
|
|
|
|
|
{ "ftp" },
|
|
|
|
|
{ "gopher" },
|
|
|
|
|
{ "gemini" },
|
|
|
|
|
{ NULL }
|
2013-01-02 14:50:26 -08:00
|
|
|
};
|
|
|
|
|
|
2013-08-01 16:20:04 +01:00
|
|
|
static const GRegex *
|
2013-01-02 14:50:26 -08:00
|
|
|
re_url (void)
|
|
|
|
|
{
|
2013-06-16 15:36:52 +01:00
|
|
|
static GRegex *url_ret = NULL;
|
|
|
|
|
GString *grist_gstr;
|
2013-01-02 14:50:26 -08:00
|
|
|
char *grist;
|
2013-06-16 15:36:52 +01:00
|
|
|
int i;
|
2013-01-02 14:50:26 -08:00
|
|
|
|
|
|
|
|
if (url_ret) return url_ret;
|
|
|
|
|
|
2013-06-16 15:36:52 +01:00
|
|
|
grist_gstr = g_string_new (NULL);
|
|
|
|
|
|
|
|
|
|
for (i = 0; uri[i].scheme; i++)
|
|
|
|
|
{
|
2013-10-21 04:25:36 +01:00
|
|
|
if (i)
|
|
|
|
|
g_string_append (grist_gstr, "|");
|
|
|
|
|
|
|
|
|
|
g_string_append (grist_gstr, "(");
|
2026-03-23 13:50:02 -06:00
|
|
|
g_string_append_printf (grist_gstr, "%s://", uri[i].scheme);
|
|
|
|
|
g_string_append (grist_gstr, HOST_URL_OPT_TLD OPT_PORT);
|
|
|
|
|
g_string_append_printf (grist_gstr, "(/" PATH ")?");
|
2013-06-16 15:36:52 +01:00
|
|
|
|
2014-12-08 21:24:59 -08:00
|
|
|
g_string_append (grist_gstr, ")");
|
2013-06-16 15:36:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
grist = g_string_free (grist_gstr, FALSE);
|
|
|
|
|
|
2013-06-18 12:07:46 +01:00
|
|
|
url_ret = make_re (grist);
|
2013-10-21 04:22:25 +01:00
|
|
|
g_free (grist);
|
2013-06-16 15:36:52 +01:00
|
|
|
|
2013-01-02 14:50:26 -08:00
|
|
|
return url_ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NICK description --- */
|
2013-01-11 01:39:21 -08:00
|
|
|
/* For NICKPRE see before url_check_word() */
|
2013-01-02 14:50:26 -08:00
|
|
|
#define NICKHYP "-"
|
|
|
|
|
#define NICKLET "a-z"
|
|
|
|
|
#define NICKDIG "0-9"
|
|
|
|
|
/* Note for NICKSPE: \\\\ boils down to a single \ */
|
|
|
|
|
#define NICKSPE "\\[\\]\\\\`_^{|}"
|
2013-04-06 11:51:22 -07:00
|
|
|
#if 0
|
2013-01-03 14:13:20 -08:00
|
|
|
#define NICK0 "[" NICKPRE "]?[" NICKLET NICKSPE "]"
|
2013-04-06 11:51:22 -07:00
|
|
|
#else
|
|
|
|
|
/* Allow violation of rfc 2812 by allowing digit as first char */
|
|
|
|
|
/* Rationale is that do_an_re() above will anyway look up what */
|
|
|
|
|
/* we find, and that WORD_NICK is the last item in the array */
|
|
|
|
|
/* that do_an_re() runs through. */
|
2013-08-13 07:39:34 +03:00
|
|
|
#define NICK0 "^[" NICKPRE "]?[" NICKLET NICKDIG NICKSPE "]"
|
2013-04-06 11:51:22 -07:00
|
|
|
#endif
|
2013-01-03 14:24:04 -08:00
|
|
|
#define NICK1 "[" NICKHYP NICKLET NICKDIG NICKSPE "]*"
|
2013-01-02 14:50:26 -08:00
|
|
|
#define NICK NICK0 NICK1
|
|
|
|
|
|
2013-08-01 16:20:04 +01:00
|
|
|
static const GRegex *
|
2013-01-02 14:50:26 -08:00
|
|
|
re_nick (void)
|
|
|
|
|
{
|
|
|
|
|
static GRegex *nick_ret;
|
|
|
|
|
|
|
|
|
|
if (nick_ret) return nick_ret;
|
|
|
|
|
|
2013-10-21 04:22:25 +01:00
|
|
|
nick_ret = make_re ("(" NICK ")");
|
|
|
|
|
|
2013-01-02 14:50:26 -08:00
|
|
|
return nick_ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* CHANNEL description --- */
|
2013-08-13 07:39:34 +03:00
|
|
|
#define CHANNEL "[" CHANPRE "][^ \t\a,]+(?:,[" CHANPRE "][^ \t\a,]+)*"
|
2013-01-02 14:50:26 -08:00
|
|
|
|
2013-08-01 16:20:04 +01:00
|
|
|
static const GRegex *
|
2013-01-02 14:50:26 -08:00
|
|
|
re_channel (void)
|
|
|
|
|
{
|
|
|
|
|
static GRegex *channel_ret;
|
|
|
|
|
|
|
|
|
|
if (channel_ret) return channel_ret;
|
|
|
|
|
|
2013-10-21 04:22:25 +01:00
|
|
|
channel_ret = make_re ("(" CHANNEL ")");
|
|
|
|
|
|
2013-01-02 14:50:26 -08:00
|
|
|
return channel_ret;
|
|
|
|
|
}
|