[781] in BarnOwl Developers
[D-O-H] r786 - branches/barnowl_unicode/owl
daemon@ATHENA.MIT.EDU (asedeno@MIT.EDU)
Thu Oct 29 18:09:39 2009
Resent-From: nelhage@mit.edu
Resent-To: barnowl-dev-mtg@charon.mit.edu
To: dirty-owl-hackers@mit.edu
From: asedeno@MIT.EDU
Reply-to: dirty-owl-hackers@MIT.EDU
Date: Fri, 28 Dec 2007 17:04:34 -0500 (EST)
Author: asedeno
Date: 2007-12-28 17:04:34 -0500 (Fri, 28 Dec 2007)
New Revision: 786
Modified:
branches/barnowl_unicode/owl/fmtext.c
branches/barnowl_unicode/owl/functions.c
branches/barnowl_unicode/owl/message.c
branches/barnowl_unicode/owl/perlconfig.c
branches/barnowl_unicode/owl/util.c
branches/barnowl_unicode/owl/zephyr.c
Log:
unicode/glib branch.
First pass at incoming zephyr -> UTF-8 sanitizing.
This only operates on incoming data so far.
We still need to clean outgoing data -- the plan is to attempt conversion
to ISO-8859-1, and use that if it works.
Modified: branches/barnowl_unicode/owl/fmtext.c
===================================================================
--- branches/barnowl_unicode/owl/fmtext.c 2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/fmtext.c 2007-12-28 22:04:34 UTC (rev 786)
@@ -234,30 +234,7 @@
*/
char *owl_fmtext_print_plain(owl_fmtext *f)
{
- char *r, *s, *p;
- r = owl_malloc(f->bufflen);
- r[0] = '\0';
- s = f->textbuff;
- /* Find next possible format character. */
- p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
- while(p) {
- /* If it's a format character, copy up to it, and skip all
- immediately following format characters. */
- if (_owl_fmtext_is_format_char(g_utf8_get_char(p))) {
- strncat(r, s, p-s);
- p = g_utf8_next_char(p);
- while (p && _owl_fmtext_is_format_char(g_utf8_get_char(p))) {
- p = g_utf8_next_char(p);
- }
- s = p;
- p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
- }
- else {
- p = strchr(p+1, OWL_FMTEXT_UC_STARTBYTE_UTF8);
- }
- }
- if (s) strcat(r,s);
- return(r);
+ return owl_strip_format_chars(f->textbuff);
}
void _owl_fmtext_wattrset(WINDOW *w, int attrs) /*noproto*/
Modified: branches/barnowl_unicode/owl/functions.c
===================================================================
--- branches/barnowl_unicode/owl/functions.c 2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/functions.c 2007-12-28 22:04:34 UTC (rev 786)
@@ -1604,7 +1604,7 @@
for (i=0; i<fields; i++) {
sprintf(buff, " Field %i : ", i+1);
- ptr=owl_zephyr_get_field(n, i+1);
+ ptr=owl_zephyr_get_field_as_utf8(n, i+1);
len=strlen(ptr);
if (len<30) {
strncpy(tmpbuff, ptr, len);
Modified: branches/barnowl_unicode/owl/message.c
===================================================================
--- branches/barnowl_unicode/owl/message.c 2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/message.c 2007-12-28 22:04:34 UTC (rev 786)
@@ -78,7 +78,7 @@
owl_pair_create(pair, owl_global_intern(&g, attrname), NULL);
owl_list_append_element(&(m->attributes), pair);
}
- owl_pair_set_value(pair, owl_strdup(attrvalue));
+ owl_pair_set_value(pair, owl_validate_or_convert(attrvalue, -1));
}
/* return the value associated with the named attribute, or NULL if
Modified: branches/barnowl_unicode/owl/perlconfig.c
===================================================================
--- branches/barnowl_unicode/owl/perlconfig.c 2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/perlconfig.c 2007-12-28 22:04:34 UTC (rev 786)
@@ -54,7 +54,7 @@
av_zfields = newAV();
j=owl_zephyr_get_num_fields(owl_message_get_notice(m));
for (i=0; i<j; i++) {
- ptr=owl_zephyr_get_field(owl_message_get_notice(m), i+1);
+ ptr=owl_zephyr_get_field_as_utf8(owl_message_get_notice(m), i+1);
av_push(av_zfields, newSVpvn(ptr, strlen(ptr)));
owl_free(ptr);
}
Modified: branches/barnowl_unicode/owl/util.c
===================================================================
--- branches/barnowl_unicode/owl/util.c 2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/util.c 2007-12-28 22:04:34 UTC (rev 786)
@@ -752,13 +752,66 @@
return start;
}
-char * owl_get_datadir() {
- char * datadir = getenv("BARNOWL_DATA_DIR");
- if(datadir != NULL)
- return strchr(datadir, '=') + 1;
- return DATADIR;
+char * owl_get_datadir()
+{
+ char * datadir = getenv("BARNOWL_DATA_DIR");
+ if(datadir != NULL)
+ return strchr(datadir, '=') + 1;
+ return DATADIR;
}
+/* Strips format characters from a valid utf-8 string. Returns the
+ empty string if 'in' does not validate. */
+char * owl_strip_format_chars(char *in)
+{
+ char *r;
+ if (g_utf8_validate(in, -1, NULL)) {
+ char *s, *p;
+ r = owl_malloc(strlen(in)+1);
+ r[0] = '\0';
+ s = in;
+ p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
+ while(p) {
+ /* If it's a format character, copy up to it, and skip all
+ immediately following format characters. */
+ if (_owl_fmtext_is_format_char(g_utf8_get_char(p))) {
+ strncat(r, s, p-s);
+ p = g_utf8_next_char(p);
+ while (p && _owl_fmtext_is_format_char(g_utf8_get_char(p))) {
+ p = g_utf8_next_char(p);
+ }
+ s = p;
+ p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
+ }
+ else {
+ p = strchr(p+1, OWL_FMTEXT_UC_STARTBYTE_UTF8);
+ }
+ }
+ if (s) strcat(r,s);
+ }
+ else {
+ r = owl_strdup("");
+ }
+ return r;
+}
+
+/* If in is not UTF-8, convert from ISO-8859-1. We may want to allow
+ * the caller to specify an alternative in the future. We also strip
+ * out characters in Unicode Plane 16, as we use that plane internally
+ * for formatting.
+ */
+char * owl_validate_or_convert(char *in, int len)
+{
+ if (g_utf8_validate(in, len , NULL)) {
+ return owl_strip_format_chars(in);
+ }
+ else {
+ return g_convert(in, len,
+ "UTF-8", "ISO-8859-1",
+ NULL, NULL, NULL);
+ }
+}
+
/**************************************************************************/
/************************* REGRESSION TESTS *******************************/
/**************************************************************************/
Modified: branches/barnowl_unicode/owl/zephyr.c
===================================================================
--- branches/barnowl_unicode/owl/zephyr.c 2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/zephyr.c 2007-12-28 22:04:34 UTC (rev 786)
@@ -353,11 +353,43 @@
return(owl_strdup(""));
}
+
+char *owl_zephyr_get_field_as_utf8(ZNotice_t *n, int j)
+{
+ int i, count, save;
+
+ /* If there's no message here, just run along now */
+ if (n->z_message_len == 0)
+ return(owl_strdup(""));
+
+ count=save=0;
+ for (i = 0; i < n->z_message_len; i++) {
+ if (n->z_message[i]=='\0') {
+ count++;
+ if (count == j) {
+ /* just found the end of the field we're looking for */
+ return(owl_validate_or_convert(n->z_message + save, -1));
+ } else {
+ save = i + 1;
+ }
+ }
+ }
+ /* catch the last field, which might not be null terminated */
+ if (count == j - 1) {
+ return owl_validate_or_convert(n->z_message + save, n->z_message_len - save);
+ }
+
+ return(owl_strdup(""));
+}
#else
char *owl_zephyr_get_field(void *n, int j)
{
return(owl_strdup(""));
}
+char *owl_zephyr_get_field_as_utf8(ZNotice_t *n, int j)
+{
+ return owl_zephyr_get_field(n, j);
+}
#endif