[781] in BarnOwl Developers

home help back first fref pref prev next nref lref last post

[D-O-H] r786 - branches/barnowl_unicode/owl

daemon@ATHENA.MIT.EDU (asedeno@MIT.EDU)
Thu Oct 29 18:09:39 2009

Resent-From: nelhage@mit.edu
Resent-To: barnowl-dev-mtg@charon.mit.edu
To: dirty-owl-hackers@mit.edu
From: asedeno@MIT.EDU
Reply-to: dirty-owl-hackers@MIT.EDU
Date: Fri, 28 Dec 2007 17:04:34 -0500 (EST)

Author: asedeno
Date: 2007-12-28 17:04:34 -0500 (Fri, 28 Dec 2007)
New Revision: 786

Modified:
   branches/barnowl_unicode/owl/fmtext.c
   branches/barnowl_unicode/owl/functions.c
   branches/barnowl_unicode/owl/message.c
   branches/barnowl_unicode/owl/perlconfig.c
   branches/barnowl_unicode/owl/util.c
   branches/barnowl_unicode/owl/zephyr.c
Log:
unicode/glib branch.
First pass at incoming zephyr -> UTF-8 sanitizing.
This only operates on incoming data so far.
We still need to clean outgoing data -- the plan is to attempt conversion
to ISO-8859-1, and use that if it works.


Modified: branches/barnowl_unicode/owl/fmtext.c
===================================================================
--- branches/barnowl_unicode/owl/fmtext.c	2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/fmtext.c	2007-12-28 22:04:34 UTC (rev 786)
@@ -234,30 +234,7 @@
  */
 char *owl_fmtext_print_plain(owl_fmtext *f)
 {
-  char *r, *s, *p;
-  r = owl_malloc(f->bufflen);
-  r[0] = '\0';
-  s = f->textbuff;
-  /* Find next possible format character. */
-  p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
-  while(p) {
-    /* If it's a format character, copy up to it, and skip all
-       immediately following format characters. */
-    if (_owl_fmtext_is_format_char(g_utf8_get_char(p))) {
-      strncat(r, s, p-s);
-      p = g_utf8_next_char(p);
-      while (p && _owl_fmtext_is_format_char(g_utf8_get_char(p))) {
-	p = g_utf8_next_char(p);
-      }
-      s = p;
-      p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
-    }
-    else {
-      p = strchr(p+1, OWL_FMTEXT_UC_STARTBYTE_UTF8);
-    }
-  }
-  if (s) strcat(r,s);
-  return(r);
+  return owl_strip_format_chars(f->textbuff);
 }
 
 void _owl_fmtext_wattrset(WINDOW *w, int attrs) /*noproto*/

Modified: branches/barnowl_unicode/owl/functions.c
===================================================================
--- branches/barnowl_unicode/owl/functions.c	2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/functions.c	2007-12-28 22:04:34 UTC (rev 786)
@@ -1604,7 +1604,7 @@
 	for (i=0; i<fields; i++) {
 	  sprintf(buff, "  Field %i   : ", i+1);
 	  
-	  ptr=owl_zephyr_get_field(n, i+1);
+	  ptr=owl_zephyr_get_field_as_utf8(n, i+1);
 	  len=strlen(ptr);
 	  if (len<30) {
 	    strncpy(tmpbuff, ptr, len);

Modified: branches/barnowl_unicode/owl/message.c
===================================================================
--- branches/barnowl_unicode/owl/message.c	2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/message.c	2007-12-28 22:04:34 UTC (rev 786)
@@ -78,7 +78,7 @@
     owl_pair_create(pair, owl_global_intern(&g, attrname), NULL);
     owl_list_append_element(&(m->attributes), pair);
   }
-  owl_pair_set_value(pair, owl_strdup(attrvalue));
+  owl_pair_set_value(pair, owl_validate_or_convert(attrvalue, -1));
 }
 
 /* return the value associated with the named attribute, or NULL if

Modified: branches/barnowl_unicode/owl/perlconfig.c
===================================================================
--- branches/barnowl_unicode/owl/perlconfig.c	2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/perlconfig.c	2007-12-28 22:04:34 UTC (rev 786)
@@ -54,7 +54,7 @@
     av_zfields = newAV();
     j=owl_zephyr_get_num_fields(owl_message_get_notice(m));
     for (i=0; i<j; i++) {
-      ptr=owl_zephyr_get_field(owl_message_get_notice(m), i+1);
+      ptr=owl_zephyr_get_field_as_utf8(owl_message_get_notice(m), i+1);
       av_push(av_zfields, newSVpvn(ptr, strlen(ptr)));
       owl_free(ptr);
     }

Modified: branches/barnowl_unicode/owl/util.c
===================================================================
--- branches/barnowl_unicode/owl/util.c	2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/util.c	2007-12-28 22:04:34 UTC (rev 786)
@@ -752,13 +752,66 @@
   return start;
 }
 
-char * owl_get_datadir() {
-    char * datadir = getenv("BARNOWL_DATA_DIR");
-    if(datadir != NULL)
-        return strchr(datadir, '=') + 1;
-    return DATADIR;
+char * owl_get_datadir()
+{
+  char * datadir = getenv("BARNOWL_DATA_DIR");
+  if(datadir != NULL)
+    return strchr(datadir, '=') + 1;
+  return DATADIR;
 }
 
+/* Strips format characters from a valid utf-8 string. Returns the
+   empty string if 'in' does not validate. */
+char * owl_strip_format_chars(char *in)
+{
+  char *r;
+  if (g_utf8_validate(in, -1, NULL)) {
+    char *s, *p;
+    r = owl_malloc(strlen(in)+1);
+    r[0] = '\0';
+    s = in;
+    p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
+    while(p) {
+      /* If it's a format character, copy up to it, and skip all
+	 immediately following format characters. */
+      if (_owl_fmtext_is_format_char(g_utf8_get_char(p))) {
+	strncat(r, s, p-s);
+	p = g_utf8_next_char(p);
+	while (p && _owl_fmtext_is_format_char(g_utf8_get_char(p))) {
+	  p = g_utf8_next_char(p);
+	}
+	s = p;
+	p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
+      }
+      else {
+	p = strchr(p+1, OWL_FMTEXT_UC_STARTBYTE_UTF8);
+      }
+    }
+    if (s) strcat(r,s);
+  }
+  else {
+    r = owl_strdup("");
+  }
+  return r;
+}
+
+/* If in is not UTF-8, convert from ISO-8859-1. We may want to allow
+ * the caller to specify an alternative in the future. We also strip
+ * out characters in Unicode Plane 16, as we use that plane internally
+ * for formatting.
+ */
+char * owl_validate_or_convert(char *in, int len)
+{
+  if (g_utf8_validate(in, len , NULL)) {
+    return owl_strip_format_chars(in);
+  }
+  else {
+    return g_convert(in, len,
+		     "UTF-8", "ISO-8859-1",
+		     NULL, NULL, NULL);
+  }
+}
+
 /**************************************************************************/
 /************************* REGRESSION TESTS *******************************/
 /**************************************************************************/

Modified: branches/barnowl_unicode/owl/zephyr.c
===================================================================
--- branches/barnowl_unicode/owl/zephyr.c	2007-12-27 16:01:22 UTC (rev 785)
+++ branches/barnowl_unicode/owl/zephyr.c	2007-12-28 22:04:34 UTC (rev 786)
@@ -353,11 +353,43 @@
 
   return(owl_strdup(""));
 }
+
+char *owl_zephyr_get_field_as_utf8(ZNotice_t *n, int j)
+{
+  int i, count, save;
+
+  /* If there's no message here, just run along now */
+  if (n->z_message_len == 0)
+    return(owl_strdup(""));
+
+  count=save=0;
+  for (i = 0; i < n->z_message_len; i++) {
+    if (n->z_message[i]=='\0') {
+      count++;
+      if (count == j) {
+	/* just found the end of the field we're looking for */
+	return(owl_validate_or_convert(n->z_message + save, -1));
+      } else {
+	save = i + 1;
+      }
+    }
+  }
+  /* catch the last field, which might not be null terminated */
+  if (count == j - 1) {
+    return owl_validate_or_convert(n->z_message + save, n->z_message_len - save);
+  }
+
+  return(owl_strdup(""));
+}
 #else
 char *owl_zephyr_get_field(void *n, int j)
 {
   return(owl_strdup(""));
 }
+char *owl_zephyr_get_field_as_utf8(ZNotice_t *n, int j)
+{
+  return owl_zephyr_get_field(n, j);
+}
 #endif
 
 


home help back first fref pref prev next nref lref last post