Server line text-encoding-related fixes.
- Handle server lines that contain sequences which are invalid in the server encoding. Previously, these would cause the whole line to be interpreted in ISO-8859-1, but now they're simply replaced with an appropriate replacement character. - Removed prefs.utf8_locale. - Change default server encoding from system locale to UTF-8. - Always populate server->encoding with a non-null value - UTF-8. Fixes #1198
This commit is contained in:
		
							parent
							
								
									5569205d15
								
							
						
					
					
						commit
						5749c53484
					
				
					 7 changed files with 129 additions and 326 deletions
				
			
		| 
						 | 
					@ -505,29 +505,11 @@ dcc_chat_line (struct DCC *dcc, char *line)
 | 
				
			||||||
	session *sess;
 | 
						session *sess;
 | 
				
			||||||
	char *word[PDIWORDS];
 | 
						char *word[PDIWORDS];
 | 
				
			||||||
	char *po;
 | 
						char *po;
 | 
				
			||||||
	char *utf;
 | 
					 | 
				
			||||||
	char *conv;
 | 
					 | 
				
			||||||
	int ret, i;
 | 
						int ret, i;
 | 
				
			||||||
	gssize len;
 | 
					 | 
				
			||||||
	gsize utf_len;
 | 
					 | 
				
			||||||
	char portbuf[32];
 | 
						char portbuf[32];
 | 
				
			||||||
	message_tags_data no_tags = MESSAGE_TAGS_DATA_INIT;
 | 
						message_tags_data no_tags = MESSAGE_TAGS_DATA_INIT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	len = strlen (line);
 | 
						line = text_invalid_encoding_to_utf8 (line, -1, dcc->serv->encoding, NULL);
 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (dcc->serv->encoding == NULL)     /* system */
 | 
					 | 
				
			||||||
		utf = g_locale_to_utf8 (line, len, NULL, &utf_len, NULL);
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
		utf = g_convert (line, len, "UTF-8", dcc->serv->encoding, 0, &utf_len, 0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (utf)
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		line = utf;
 | 
					 | 
				
			||||||
		len = utf_len;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* we really need valid UTF-8 now */
 | 
					 | 
				
			||||||
	conv = text_validate (&line, &len);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sess = find_dialog (dcc->serv, dcc->nick);
 | 
						sess = find_dialog (dcc->serv, dcc->nick);
 | 
				
			||||||
	if (!sess)
 | 
						if (!sess)
 | 
				
			||||||
| 
						 | 
					@ -548,16 +530,14 @@ dcc_chat_line (struct DCC *dcc, char *line)
 | 
				
			||||||
	/* did the plugin close it? */
 | 
						/* did the plugin close it? */
 | 
				
			||||||
	if (!g_slist_find (dcc_list, dcc))
 | 
						if (!g_slist_find (dcc_list, dcc))
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		g_free (utf);
 | 
							g_free (line);
 | 
				
			||||||
		g_free (conv);
 | 
					 | 
				
			||||||
		return 1;
 | 
							return 1;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* did the plugin eat the event? */
 | 
						/* did the plugin eat the event? */
 | 
				
			||||||
	if (ret)
 | 
						if (ret)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		g_free (utf);
 | 
							g_free (line);
 | 
				
			||||||
		g_free (conv);
 | 
					 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -574,8 +554,7 @@ dcc_chat_line (struct DCC *dcc, char *line)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		inbound_privmsg (dcc->serv, dcc->nick, "", line, FALSE, &no_tags);
 | 
							inbound_privmsg (dcc->serv, dcc->nick, "", line, FALSE, &no_tags);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	g_free (utf);
 | 
						g_free (line);
 | 
				
			||||||
	g_free (conv);
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -757,7 +757,6 @@ static void
 | 
				
			||||||
xchat_init (void)
 | 
					xchat_init (void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	char buf[3068];
 | 
						char buf[3068];
 | 
				
			||||||
	const char *cs = NULL;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef WIN32
 | 
					#ifdef WIN32
 | 
				
			||||||
	WSADATA wsadata;
 | 
						WSADATA wsadata;
 | 
				
			||||||
| 
						 | 
					@ -795,9 +794,6 @@ xchat_init (void)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (g_get_charset (&cs))
 | 
					 | 
				
			||||||
		prefs.utf8_locale = TRUE;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	load_text_events ();
 | 
						load_text_events ();
 | 
				
			||||||
	sound_load ();
 | 
						sound_load ();
 | 
				
			||||||
	notify_load ();
 | 
						notify_load ();
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -317,7 +317,6 @@ struct hexchatprefs
 | 
				
			||||||
	guint32 dcc_ip;
 | 
						guint32 dcc_ip;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	unsigned int wait_on_exit;	/* wait for logs to be flushed to disk IF we're connected */
 | 
						unsigned int wait_on_exit;	/* wait for logs to be flushed to disk IF we're connected */
 | 
				
			||||||
	unsigned int utf8_locale;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Tells us if we need to save, only when they've been edited.
 | 
						/* Tells us if we need to save, only when they've been edited.
 | 
				
			||||||
		This is so that we continue using internal defaults (which can
 | 
							This is so that we continue using internal defaults (which can
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -970,9 +970,7 @@ hexchat_printf (hexchat_plugin *ph, const char *format, ...)
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
hexchat_command (hexchat_plugin *ph, const char *command)
 | 
					hexchat_command (hexchat_plugin *ph, const char *command)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	char *command_nonconst;
 | 
						char *command_utf8;
 | 
				
			||||||
	char *conv;
 | 
					 | 
				
			||||||
	gssize len = -1;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!is_session (ph->context))
 | 
						if (!is_session (ph->context))
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
| 
						 | 
					@ -981,11 +979,9 @@ hexchat_command (hexchat_plugin *ph, const char *command)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* scripts/plugins continue to send non-UTF8... *sigh* */
 | 
						/* scripts/plugins continue to send non-UTF8... *sigh* */
 | 
				
			||||||
	command_nonconst = g_strdup (command);
 | 
						command_utf8 = text_invalid_encoding_to_utf8 (command, -1, "UTF-8", NULL);
 | 
				
			||||||
	conv = text_validate (&command_nonconst, &len);
 | 
						handle_command (ph->context, command_utf8, FALSE);
 | 
				
			||||||
	handle_command (ph->context, command_nonconst, FALSE);
 | 
						g_free (command_utf8);
 | 
				
			||||||
	g_free (conv);
 | 
					 | 
				
			||||||
	g_free (command_nonconst);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -89,48 +89,18 @@ int
 | 
				
			||||||
tcp_send_real (void *ssl, int sok, char *encoding, char *buf, int len)
 | 
					tcp_send_real (void *ssl, int sok, char *encoding, char *buf, int len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
	char *locale;
 | 
					 | 
				
			||||||
	gsize loc_len;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (encoding == NULL)	/* system */
 | 
						gsize buf_encoded_len;
 | 
				
			||||||
	{
 | 
						gchar *buf_encoded = text_invalid_utf8_to_encoding (buf, len, encoding, &buf_encoded_len);
 | 
				
			||||||
		locale = NULL;
 | 
					 | 
				
			||||||
		if (!prefs.utf8_locale)
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			const gchar *charset;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			g_get_charset (&charset);
 | 
					 | 
				
			||||||
			locale = g_convert_with_fallback (buf, len, charset, "UTF-8", "?", 0, &loc_len, 0);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		locale = g_convert_with_fallback (buf, len, encoding, "UTF-8", "?", 0, &loc_len, 0);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (locale)
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		len = loc_len;
 | 
					 | 
				
			||||||
#ifdef USE_OPENSSL
 | 
					#ifdef USE_OPENSSL
 | 
				
			||||||
	if (!ssl)
 | 
						if (!ssl)
 | 
				
			||||||
			ret = send (sok, locale, len, 0);
 | 
							ret = send (sok, buf_encoded, buf_encoded_len, 0);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
			ret = _SSL_send (ssl, locale, len);
 | 
							ret = _SSL_send (ssl, buf_encoded, buf_encoded_len);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
		ret = send (sok, locale, len, 0);
 | 
						ret = send (sok, buf_encoded, buf_encoded_len, 0);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
		g_free (locale);
 | 
						g_free (buf_encoded);
 | 
				
			||||||
	} else
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
#ifdef USE_OPENSSL
 | 
					 | 
				
			||||||
		if (!ssl)
 | 
					 | 
				
			||||||
			ret = send (sok, buf, len, 0);
 | 
					 | 
				
			||||||
		else
 | 
					 | 
				
			||||||
			ret = _SSL_send (ssl, buf, len);
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
		ret = send (sok, buf, len, 0);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -287,94 +257,15 @@ close_socket (int sok)
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
server_inline (server *serv, char *line, gssize len)
 | 
					server_inline (server *serv, char *line, gssize len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	char *utf_line_allocated = NULL;
 | 
						gsize len_utf8;
 | 
				
			||||||
 | 
						line = text_invalid_encoding_to_utf8 (line, len, serv->encoding, &len_utf8);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Checks whether we're set to use UTF-8 charset */
 | 
						fe_add_rawlog (serv, line, len_utf8, FALSE);
 | 
				
			||||||
	if ((serv->encoding == NULL && prefs.utf8_locale) /* Using system default - UTF-8 */ ||
 | 
					 | 
				
			||||||
		g_ascii_strcasecmp (serv->encoding, "UTF8") == 0 ||
 | 
					 | 
				
			||||||
		g_ascii_strcasecmp (serv->encoding, "UTF-8") == 0
 | 
					 | 
				
			||||||
	)
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		utf_line_allocated = text_validate (&line, &len);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		/* Since the user has an explicit charset set, either
 | 
					 | 
				
			||||||
		via /charset command or from his non-UTF8 locale,
 | 
					 | 
				
			||||||
		we don't fallback to ISO-8859-1 and instead try to remove
 | 
					 | 
				
			||||||
		errnoeous octets till the string is convertable in the
 | 
					 | 
				
			||||||
		said charset. */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		const char *encoding = NULL;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (serv->encoding != NULL)
 | 
					 | 
				
			||||||
			encoding = serv->encoding;
 | 
					 | 
				
			||||||
		else
 | 
					 | 
				
			||||||
			g_get_charset (&encoding);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (encoding != NULL)
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			char *conv_line; /* holds a copy of the original string */
 | 
					 | 
				
			||||||
			gsize conv_len; /* tells g_convert how much of line to convert */
 | 
					 | 
				
			||||||
			gsize utf_len;
 | 
					 | 
				
			||||||
			gsize read_len;
 | 
					 | 
				
			||||||
			GError *err;
 | 
					 | 
				
			||||||
			gboolean retry;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			conv_line = g_malloc (len + 1);
 | 
					 | 
				
			||||||
			memcpy (conv_line, line, len);
 | 
					 | 
				
			||||||
			conv_line[len] = 0;
 | 
					 | 
				
			||||||
			conv_len = len;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			/* if CP1255, convert it with the NUL terminator.
 | 
					 | 
				
			||||||
				Works around SF bug #1122089 */
 | 
					 | 
				
			||||||
			if (serv->using_cp1255)
 | 
					 | 
				
			||||||
				conv_len++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			do
 | 
					 | 
				
			||||||
			{
 | 
					 | 
				
			||||||
				err = NULL;
 | 
					 | 
				
			||||||
				retry = FALSE;
 | 
					 | 
				
			||||||
				utf_line_allocated = g_convert_with_fallback (conv_line, conv_len, "UTF-8", encoding, "?", &read_len, &utf_len, &err);
 | 
					 | 
				
			||||||
				if (err != NULL)
 | 
					 | 
				
			||||||
				{
 | 
					 | 
				
			||||||
					if (err->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE && conv_len > (read_len + 1))
 | 
					 | 
				
			||||||
					{
 | 
					 | 
				
			||||||
						/* Make our best bet by removing the erroneous char.
 | 
					 | 
				
			||||||
						   This will work for casual 8-bit strings with non-standard chars. */
 | 
					 | 
				
			||||||
						memmove (conv_line + read_len, conv_line + read_len + 1, conv_len - read_len -1);
 | 
					 | 
				
			||||||
						conv_len--;
 | 
					 | 
				
			||||||
						retry = TRUE;
 | 
					 | 
				
			||||||
					}
 | 
					 | 
				
			||||||
					g_error_free (err);
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
			} while (retry);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			g_free (conv_line);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			/* If any conversion has occured at all. Conversion might fail
 | 
					 | 
				
			||||||
			due to errors other than invalid sequences, e.g. unknown charset. */
 | 
					 | 
				
			||||||
			if (utf_line_allocated != NULL)
 | 
					 | 
				
			||||||
			{
 | 
					 | 
				
			||||||
				line = utf_line_allocated;
 | 
					 | 
				
			||||||
				len = utf_len;
 | 
					 | 
				
			||||||
				if (serv->using_cp1255 && len > 0)
 | 
					 | 
				
			||||||
					len--;
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			else
 | 
					 | 
				
			||||||
			{
 | 
					 | 
				
			||||||
				/* If all fails, treat as UTF-8 with fallback to ISO-8859-1. */
 | 
					 | 
				
			||||||
				utf_line_allocated = text_validate (&line, &len);
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	fe_add_rawlog (serv, line, len, FALSE);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* let proto-irc.c handle it */
 | 
						/* let proto-irc.c handle it */
 | 
				
			||||||
	serv->p_inline (serv, line, len);
 | 
						serv->p_inline (serv, line, len_utf8);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	g_free (utf_line_allocated);
 | 
						g_free (line);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* read data from socket */
 | 
					/* read data from socket */
 | 
				
			||||||
| 
						 | 
					@ -1749,12 +1640,7 @@ server_set_encoding (server *serv, char *new_encoding)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	char *space;
 | 
						char *space;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (serv->encoding)
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
	g_free (serv->encoding);
 | 
						g_free (serv->encoding);
 | 
				
			||||||
		/* can be left as NULL to indicate system encoding */
 | 
					 | 
				
			||||||
		serv->encoding = NULL;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (new_encoding)
 | 
						if (new_encoding)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
| 
						 | 
					@ -1772,6 +1658,10 @@ server_set_encoding (server *serv, char *new_encoding)
 | 
				
			||||||
			serv->encoding = g_strdup ("UTF-8");
 | 
								serv->encoding = g_strdup ("UTF-8");
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							serv->encoding = g_strdup ("UTF-8");
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
server *
 | 
					server *
 | 
				
			||||||
| 
						 | 
					@ -1816,6 +1706,8 @@ server_set_defaults (server *serv)
 | 
				
			||||||
	serv->nick_prefixes = g_strdup ("@%+");
 | 
						serv->nick_prefixes = g_strdup ("@%+");
 | 
				
			||||||
	serv->nick_modes = g_strdup ("ohv");
 | 
						serv->nick_modes = g_strdup ("ohv");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						server_set_encoding (serv, "UTF-8");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	serv->nickcount = 1;
 | 
						serv->nickcount = 1;
 | 
				
			||||||
	serv->end_of_motd = FALSE;
 | 
						serv->end_of_motd = FALSE;
 | 
				
			||||||
	serv->is_away = FALSE;
 | 
						serv->is_away = FALSE;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -658,33 +658,29 @@ log_open_or_close (session *sess)
 | 
				
			||||||
int
 | 
					int
 | 
				
			||||||
get_stamp_str (char *fmt, time_t tim, char **ret)
 | 
					get_stamp_str (char *fmt, time_t tim, char **ret)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	char *loc = NULL;
 | 
					 | 
				
			||||||
	char dest[128];
 | 
						char dest[128];
 | 
				
			||||||
	gsize len;
 | 
						gsize len_locale;
 | 
				
			||||||
 | 
						gsize len_utf8;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* strftime wants the format string in LOCALE! */
 | 
						/* strftime requires the format string to be in locale encoding. */
 | 
				
			||||||
	if (!prefs.utf8_locale)
 | 
						fmt = g_locale_from_utf8 (fmt, -1, NULL, NULL, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						len_locale = strftime_validated (dest, sizeof (dest), fmt, localtime (&tim));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						g_free (fmt);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (len_locale == 0)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		const gchar *charset;
 | 
							return 0;
 | 
				
			||||||
 | 
					 | 
				
			||||||
		g_get_charset (&charset);
 | 
					 | 
				
			||||||
		loc = g_convert_with_fallback (fmt, -1, charset, "UTF-8", "?", 0, 0, 0);
 | 
					 | 
				
			||||||
		if (loc)
 | 
					 | 
				
			||||||
			fmt = loc;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	len = strftime_validated (dest, sizeof (dest), fmt, localtime (&tim));
 | 
						*ret = g_locale_to_utf8 (dest, len_locale, NULL, &len_utf8, NULL);
 | 
				
			||||||
	if (len)
 | 
						if (*ret == NULL)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		if (prefs.utf8_locale)
 | 
							return 0;
 | 
				
			||||||
			*ret = g_strdup (dest);
 | 
					 | 
				
			||||||
		else
 | 
					 | 
				
			||||||
			*ret = g_locale_to_utf8 (dest, len, 0, &len, 0);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	g_free (loc);
 | 
						return len_utf8;
 | 
				
			||||||
 | 
					 | 
				
			||||||
	return len;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
| 
						 | 
					@ -753,154 +749,101 @@ log_write (session *sess, char *text, time_t ts)
 | 
				
			||||||
	g_free (temp);
 | 
						g_free (temp);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* converts a CP1252/ISO-8859-1(5) hybrid to UTF-8                           */
 | 
					/**
 | 
				
			||||||
/* Features: 1. It never fails, all 00-FF chars are converted to valid UTF-8 */
 | 
					 * Converts a given string in from_encoding to to_encoding. This is similar to g_convert_with_fallback, except that it is tolerant of sequences in
 | 
				
			||||||
/*           2. Uses CP1252 in the range 80-9f because ISO doesn't have any- */
 | 
					 * the original input that are invalid even in from_encoding. g_convert_with_fallback fails for such text, whereas this function replaces such a
 | 
				
			||||||
/*              thing useful in this range and it helps us receive from mIRC */
 | 
					 * sequence with the fallback string.
 | 
				
			||||||
/*           3. The five undefined chars in CP1252 80-9f are replaced with   */
 | 
					 *
 | 
				
			||||||
/*              ISO-8859-15 control codes.                                   */
 | 
					 * If len is -1, strlen(text) is used to calculate the length. Do not pass -1 if text is supposed to contain \0 bytes, such as if from_encoding is a
 | 
				
			||||||
/*           4. Handles 0xa4 as a Euro symbol ala ISO-8859-15.               */
 | 
					 * multi-byte encoding like UTF-16.
 | 
				
			||||||
/*           5. Uses ISO-8859-1 (which matches CP1252) for everything else.  */
 | 
					 */
 | 
				
			||||||
/*           6. This routine measured 3x faster than g_convert :)            */
 | 
					static gchar *
 | 
				
			||||||
 | 
					text_convert_invalid (const gchar* text, gssize len, const gchar *to_encoding, const gchar *from_encoding, const gchar *fallback, gsize *len_out)
 | 
				
			||||||
static unsigned char *
 | 
					 | 
				
			||||||
iso_8859_1_to_utf8 (unsigned char *text, int len, gsize *bytes_written)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned int idx;
 | 
						gchar *result_part;
 | 
				
			||||||
	unsigned char *res, *output;
 | 
						gsize result_part_len;
 | 
				
			||||||
	static const unsigned short lowtable[] = /* 74 byte table for 80-a4 */
 | 
						const gchar *end;
 | 
				
			||||||
	{
 | 
						gsize invalid_start_pos;
 | 
				
			||||||
	/* compressed utf-8 table: if the first byte's 0x20 bit is set, it
 | 
						GString *result;
 | 
				
			||||||
	   indicates a 2-byte utf-8 sequence, otherwise prepend a 0xe2. */
 | 
						const gchar *current_start;
 | 
				
			||||||
		0x82ac, /* 80 Euro. CP1252 from here on... */
 | 
					 | 
				
			||||||
		0xe281, /* 81 NA */
 | 
					 | 
				
			||||||
		0x809a, /* 82 */
 | 
					 | 
				
			||||||
		0xe692, /* 83 */
 | 
					 | 
				
			||||||
		0x809e, /* 84 */
 | 
					 | 
				
			||||||
		0x80a6, /* 85 */
 | 
					 | 
				
			||||||
		0x80a0, /* 86 */
 | 
					 | 
				
			||||||
		0x80a1, /* 87 */
 | 
					 | 
				
			||||||
		0xeb86, /* 88 */
 | 
					 | 
				
			||||||
		0x80b0, /* 89 */
 | 
					 | 
				
			||||||
		0xe5a0, /* 8a */
 | 
					 | 
				
			||||||
		0x80b9, /* 8b */
 | 
					 | 
				
			||||||
		0xe592, /* 8c */
 | 
					 | 
				
			||||||
		0xe28d, /* 8d NA */
 | 
					 | 
				
			||||||
		0xe5bd, /* 8e */
 | 
					 | 
				
			||||||
		0xe28f, /* 8f NA */
 | 
					 | 
				
			||||||
		0xe290, /* 90 NA */
 | 
					 | 
				
			||||||
		0x8098, /* 91 */
 | 
					 | 
				
			||||||
		0x8099, /* 92 */
 | 
					 | 
				
			||||||
		0x809c, /* 93 */
 | 
					 | 
				
			||||||
		0x809d, /* 94 */
 | 
					 | 
				
			||||||
		0x80a2, /* 95 */
 | 
					 | 
				
			||||||
		0x8093, /* 96 */
 | 
					 | 
				
			||||||
		0x8094, /* 97 */
 | 
					 | 
				
			||||||
		0xeb9c, /* 98 */
 | 
					 | 
				
			||||||
		0x84a2, /* 99 */
 | 
					 | 
				
			||||||
		0xe5a1, /* 9a */
 | 
					 | 
				
			||||||
		0x80ba, /* 9b */
 | 
					 | 
				
			||||||
		0xe593, /* 9c */
 | 
					 | 
				
			||||||
		0xe29d, /* 9d NA */
 | 
					 | 
				
			||||||
		0xe5be, /* 9e */
 | 
					 | 
				
			||||||
		0xe5b8, /* 9f */
 | 
					 | 
				
			||||||
		0xe2a0, /* a0 */
 | 
					 | 
				
			||||||
		0xe2a1, /* a1 */
 | 
					 | 
				
			||||||
		0xe2a2, /* a2 */
 | 
					 | 
				
			||||||
		0xe2a3, /* a3 */
 | 
					 | 
				
			||||||
		0x82ac  /* a4 ISO-8859-15 Euro. */
 | 
					 | 
				
			||||||
	};
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (len == -1)
 | 
						if (len == -1)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
		len = strlen (text);
 | 
							len = strlen (text);
 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* worst case scenario: every byte turns into 3 bytes */
 | 
					 | 
				
			||||||
	res = output = g_malloc ((len * 3) + 1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	while (len)
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		if (G_LIKELY (*text < 0x80))
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			*output = *text;	/* ascii maps directly */
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		else if (*text <= 0xa4)	/* 80-a4 use a lookup table */
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			idx = *text - 0x80;
 | 
					 | 
				
			||||||
			if (lowtable[idx] & 0x2000)
 | 
					 | 
				
			||||||
			{
 | 
					 | 
				
			||||||
				*output++ = (lowtable[idx] >> 8) & 0xdf; /* 2 byte utf-8 */
 | 
					 | 
				
			||||||
				*output = lowtable[idx] & 0xff;
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			else
 | 
					 | 
				
			||||||
			{
 | 
					 | 
				
			||||||
				*output++ = 0xe2;	/* 3 byte utf-8 */
 | 
					 | 
				
			||||||
				*output++ = (lowtable[idx] >> 8) & 0xff;
 | 
					 | 
				
			||||||
				*output = lowtable[idx] & 0xff;
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		else if (*text < 0xc0)
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			*output++ = 0xc2;
 | 
					 | 
				
			||||||
			*output = *text;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		else
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			*output++ = 0xc3;
 | 
					 | 
				
			||||||
			*output = *text - 0x40;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		output++;
 | 
					 | 
				
			||||||
		text++;
 | 
					 | 
				
			||||||
		len--;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	*output = 0;	/* terminate */
 | 
					 | 
				
			||||||
	*bytes_written = output - res;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return res;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
char *
 | 
						end = text + len;
 | 
				
			||||||
text_validate (char **text, gssize *len)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	char *utf;
 | 
					 | 
				
			||||||
	gsize utf_len;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* valid utf8? */
 | 
						/* Find the first position of an invalid sequence. */
 | 
				
			||||||
	if (g_utf8_validate (*text, *len, 0))
 | 
						result_part = g_convert (text, len, to_encoding, from_encoding, &invalid_start_pos, &result_part_len, NULL);
 | 
				
			||||||
		return NULL;
 | 
						if (result_part != NULL)
 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef WIN32
 | 
					 | 
				
			||||||
	if (GetACP () == 1252) /* our routine is better than iconv's 1252 */
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	if (prefs.utf8_locale)
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
		/* fallback to iso-8859-1 */
 | 
					 | 
				
			||||||
		utf = iso_8859_1_to_utf8 (*text, *len, &utf_len);
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		/* fallback to locale */
 | 
							/* All text converted successfully on the first try. Return it. */
 | 
				
			||||||
		utf = g_locale_to_utf8 (*text, *len, 0, &utf_len, NULL);
 | 
					
 | 
				
			||||||
		if (!utf)
 | 
							if (len_out != NULL)
 | 
				
			||||||
			utf = iso_8859_1_to_utf8 (*text, *len, &utf_len);
 | 
							{
 | 
				
			||||||
 | 
								*len_out = result_part_len;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!utf) 
 | 
							return result_part;
 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		*text = g_strdup ("%INVALID%");
 | 
					 | 
				
			||||||
		*len = 9;
 | 
					 | 
				
			||||||
	} else
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		*text = utf;
 | 
					 | 
				
			||||||
		*len = utf_len;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return utf;
 | 
						/* One or more invalid sequences exist that need to be replaced with the fallback. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						result = g_string_sized_new (len);
 | 
				
			||||||
 | 
						current_start = text;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (;;)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							g_assert (current_start + invalid_start_pos < end);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Convert everything before the position of the invalid sequence. It should be successful. */
 | 
				
			||||||
 | 
							result_part = g_convert (current_start, invalid_start_pos, to_encoding, from_encoding, &invalid_start_pos, &result_part_len, NULL);
 | 
				
			||||||
 | 
							g_assert (result_part != NULL);
 | 
				
			||||||
 | 
							g_string_append_len (result, result_part, result_part_len);
 | 
				
			||||||
 | 
							g_free (result_part);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Append the fallback */
 | 
				
			||||||
 | 
							g_string_append (result, fallback);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Now try converting everything after the invalid sequence. */
 | 
				
			||||||
 | 
							current_start += invalid_start_pos + 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							result_part = g_convert (current_start, end - current_start, to_encoding, from_encoding, &invalid_start_pos, &result_part_len, NULL);
 | 
				
			||||||
 | 
							if (result_part != NULL)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/* The rest of the text converted successfully. Append it and return the whole converted text. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								g_string_append_len (result, result_part, result_part_len);
 | 
				
			||||||
 | 
								g_free (result_part);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (len_out != NULL)
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									*len_out = result->len;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								return g_string_free (result, FALSE);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* The rest of the text didn't convert successfully. invalid_start_pos has the position of the next invalid sequence. */
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					gchar *
 | 
				
			||||||
 | 
					text_invalid_utf8_to_encoding (const gchar* text, gssize len, const gchar *to_encoding, gsize *len_out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return text_convert_invalid (text, len, to_encoding, "UTF-8", "?", len_out);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					gchar *
 | 
				
			||||||
 | 
					text_invalid_encoding_to_utf8 (const gchar* text, gssize len, const gchar *from_encoding, gsize *len_out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return text_convert_invalid (text, len, "UTF-8", from_encoding, "\357\277\275", len_out);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
PrintTextTimeStamp (session *sess, char *text, time_t timestamp)
 | 
					PrintTextTimeStamp (session *sess, char *text, time_t timestamp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	char *conv;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!sess)
 | 
						if (!sess)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		if (!sess_list)
 | 
							if (!sess_list)
 | 
				
			||||||
| 
						 | 
					@ -909,22 +852,19 @@ PrintTextTimeStamp (session *sess, char *text, time_t timestamp)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* make sure it's valid utf8 */
 | 
						/* make sure it's valid utf8 */
 | 
				
			||||||
	if (text[0] == 0)
 | 
						if (text[0] == '\0')
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		text = "\n";
 | 
							text = g_strdup ("\n");
 | 
				
			||||||
		conv = NULL;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		gssize len = -1;
 | 
							text = text_invalid_encoding_to_utf8 (text, -1, "UTF-8", NULL);
 | 
				
			||||||
		conv = text_validate ((char **)&text, &len);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	log_write (sess, text, timestamp);
 | 
						log_write (sess, text, timestamp);
 | 
				
			||||||
	scrollback_save (sess, text);
 | 
						scrollback_save (sess, text);
 | 
				
			||||||
	fe_print_text (sess, text, timestamp, FALSE);
 | 
						fe_print_text (sess, text, timestamp, FALSE);
 | 
				
			||||||
 | 
						g_free (text);
 | 
				
			||||||
	g_free (conv);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -57,7 +57,8 @@ void text_emit (int index, session *sess, char *a, char *b, char *c, char *d,
 | 
				
			||||||
		time_t timestamp);
 | 
							time_t timestamp);
 | 
				
			||||||
int text_emit_by_name (char *name, session *sess, time_t timestamp,
 | 
					int text_emit_by_name (char *name, session *sess, time_t timestamp,
 | 
				
			||||||
					   char *a, char *b, char *c, char *d);
 | 
										   char *a, char *b, char *c, char *d);
 | 
				
			||||||
char *text_validate (char **text, gssize *len);
 | 
					gchar *text_invalid_utf8_to_encoding (const gchar* text, gssize len, const gchar *to_encoding, gsize *len_out);
 | 
				
			||||||
 | 
					gchar *text_invalid_encoding_to_utf8 (const gchar* text, gssize len, const gchar *from_encoding, gsize *len_out);
 | 
				
			||||||
int get_stamp_str (char *fmt, time_t tim, char **ret);
 | 
					int get_stamp_str (char *fmt, time_t tim, char **ret);
 | 
				
			||||||
void format_event (session *sess, int index, char **args, char *o, gsize sizeofo, unsigned int stripcolor_args);
 | 
					void format_event (session *sess, int index, char **args, char *o, gsize sizeofo, unsigned int stripcolor_args);
 | 
				
			||||||
char *text_find_format_string (char *name);
 | 
					char *text_find_format_string (char *name);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue