[774] in BarnOwl Developers

home help back first fref pref prev next nref lref last post

[D-O-H] r780 - branches/barnowl_unicode/owl

daemon@ATHENA.MIT.EDU (asedeno@MIT.EDU)
Thu Oct 29 18:09:34 2009

Resent-From: nelhage@mit.edu
Resent-To: barnowl-dev-mtg@charon.mit.edu
To: dirty-owl-hackers@mit.edu
From: asedeno@MIT.EDU
Reply-to: dirty-owl-hackers@MIT.EDU
Date: Mon, 24 Dec 2007 02:53:11 -0500 (EST)

Author: asedeno
Date: 2007-12-24 02:53:11 -0500 (Mon, 24 Dec 2007)
New Revision: 780

Modified:
   branches/barnowl_unicode/owl/fmtext.c
   branches/barnowl_unicode/owl/functions.c
   branches/barnowl_unicode/owl/logging.c
   branches/barnowl_unicode/owl/text.c
   branches/barnowl_unicode/owl/util.c
Log:
UTF-8 - first pass

unicode changes:
* remove downstr() from text.c, replace on site with calls to g_utf8_strdown.
  In place downcasing is not a good idea, so the downstr() contract is unfulfillable.

* make owl_text_truncate_cols() and owl_fmtext_truncate_cols() understand character width.
  This may need more work. Some code duplication - see if we can refactor.

* stristr() rewritten to yse g_utf_casefold() instead of downstr(), and restructured to have a single return.

* only_whitespace() rewritten for unicode.

glib changes:
* rewrite owl_sprintf() in terms of g_strdup_vprintf()

WARNING: THIS IS NOT SAFE YET. Network data is not yet sanitized. Non
UTF-8 inputs may do horrible things to you. This phase is just
working on rendering.



Modified: branches/barnowl_unicode/owl/fmtext.c
===================================================================
--- branches/barnowl_unicode/owl/fmtext.c	2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/fmtext.c	2007-12-24 07:53:11 UTC (rev 780)
@@ -314,45 +314,69 @@
  */
 void owl_fmtext_truncate_cols(owl_fmtext *in, int acol, int bcol, owl_fmtext *out)
 {
-  char *ptr1, *ptr2, *last;
-  int len, offset;
+  char *ptr_s, *ptr_e, *ptr_c, *last;
+  int col, cnt;
 
   last=in->textbuff+in->textlen-1;
-  ptr1=in->textbuff;
-  while (ptr1<=last) {
-    ptr2=strchr(ptr1, '\n');
-    if (!ptr2) {
+  ptr_s=in->textbuff;
+  while (ptr_s<=last) {
+    ptr_e=strchr(ptr_s, '\n');
+    if (!ptr_e) {
       /* but this shouldn't happen if we end in a \n */
       break;
     }
     
-    if (ptr2==ptr1) {
+    if (ptr_e==ptr_s) {
       owl_fmtext_append_normal(out, "\n");
-      ptr1++;
+      ptr_s++;
       continue;
     }
 
+    col = 0;
+    cnt = 0;
+    ptr_c = ptr_s;
+    while(col < bcol && ptr_c < ptr_e) {
+      gunichar c = g_utf8_get_char(ptr_c);
+      if (g_unichar_iswide(c)) {
+	if (col + 2 > bcol) break;
+	else col += 2;
+      }
+      else if (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK) ; /*do nothing*/
+      /* We may need more special cases here... unicode spacing is hard. */
+      else {
+	if (col + 1 > bcol) break;
+	else ++col;
+      }
+      ptr_c = g_utf8_next_char(ptr_c);
+      if (col >= acol) ++cnt;
+      if (col <= acol) ptr_s = ptr_c;
+    }
+    _owl_fmtext_append_fmtext(out, in, ptr_s - in->textbuff, ptr_c - in->textbuff);
+    ptr_s=ptr_e+1;
+    
+#if 0
     /* we need to check that we won't run over here */
     len=bcol-acol;
-    if (len > (ptr2-(ptr1+acol))) {
+    if (len > (ptr_e-(ptr_s+acol))) {
       /* the whole line fits with room to spare, don't take a full 'len' */
-      len=ptr2-(ptr1+acol);
+      len=ptr_e-(ptr_s+acol);
     }
-    if (len>last-ptr1) {
+    if (len>last-ptr_s) {
       /* the whole rest of the text fits with room to spare, adjust for it */
-      len-=(last-ptr1);
+      len-=(last-ptr_s);
     }
     if (len<=0) {
       /* saftey check */
       owl_fmtext_append_normal(out, "\n");
-      ptr1=ptr2+1;
+      ptr_s=ptr_e+1;
       continue;
     }
 
-    offset=ptr1-in->textbuff;
+    offset = ptr_s - in->textbuff;
     _owl_fmtext_append_fmtext(out, in, offset+acol, offset+acol+len);
 
-    ptr1=ptr2+1;
+    ptr_s=ptr_e+1;
+#endif
   }
 }
 

Modified: branches/barnowl_unicode/owl/functions.c
===================================================================
--- branches/barnowl_unicode/owl/functions.c	2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/functions.c	2007-12-24 07:53:11 UTC (rev 780)
@@ -2597,7 +2597,13 @@
     sprintf(filtname, "class-%s-instance-%s", class, instance);
   }
   /* downcase it */
-  downstr(filtname);
+  {
+    char *temp = g_utf8_strdown(filtname, -1);
+    if (temp) {
+      owl_free(filtname);
+      filtname = temp;
+    }
+  }
   /* turn spaces, single quotes, and double quotes into dots */
   owl_text_tr(filtname, ' ', '.');
   owl_text_tr(filtname, '\'', '.');

Modified: branches/barnowl_unicode/owl/logging.c
===================================================================
--- branches/barnowl_unicode/owl/logging.c	2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/logging.c	2007-12-24 07:53:11 UTC (rev 780)
@@ -154,9 +154,11 @@
   } else if (owl_message_is_type_jabber(m)) {
     to = owl_sprintf("jabber:%s", owl_message_get_recipient(m));
   } else if (owl_message_is_type_aim(m)) {
+    char *temp2;
     temp = owl_aim_normalize_screenname(owl_message_get_recipient(m));
-    downstr(temp);
-    to = owl_sprintf("aim:%s", temp);
+    temp2 = g_utf8_strdown(temp,-1);
+    to = owl_sprintf("aim:%s", temp2);
+    owl_free(temp2);
     owl_free(temp);
   } else {
     to = owl_sprintf("loopback");
@@ -266,11 +268,12 @@
     }
   } else if (owl_message_is_type_aim(m)) {
     /* we do not yet handle chat rooms */
-    char *normalto;
-    normalto=owl_aim_normalize_screenname(owl_message_get_sender(m));
-    downstr(normalto);
+    char *normalto, *temp;
+    temp = owl_aim_normalize_screenname(owl_message_get_sender(m));
+    normalto = g_utf8_strdown(temp, -1);
     from=frombuff=owl_sprintf("aim:%s", normalto);
     owl_free(normalto);
+    owl_free(temp);
   } else if (owl_message_is_type_loopback(m)) {
     from=frombuff=owl_strdup("loopback");
   } else if (owl_message_is_type_jabber(m)) {
@@ -289,7 +292,7 @@
   if (strchr(frombuff, '/')) from="weird";
 
   ch=frombuff[0];
-  if (!isalnum(ch)) from="weird";
+  if (!g_ascii_isalnum(ch)) from="weird";
 
   for (i=0; i<len; i++) {
     if (frombuff[i]<'!' || frombuff[i]>='~') from="weird";
@@ -298,7 +301,13 @@
   if (!strcmp(frombuff, ".") || !strcasecmp(frombuff, "..")) from="weird";
 
   if (!personal) {
-    if (strcmp(from, "weird")) downstr(from);
+    if (strcmp(from, "weird")) {
+      char* temp = g_utf8_strdown(frombuff, -1);
+      if (temp) {
+	owl_free(frombuff);
+	from = frombuff = temp;
+      }
+    }
   }
 
   /* create the filename (expanding ~ in path names) */

Modified: branches/barnowl_unicode/owl/text.c
===================================================================
--- branches/barnowl_unicode/owl/text.c	2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/text.c	2007-12-24 07:53:11 UTC (rev 780)
@@ -49,41 +49,65 @@
  * new line for now */
 void owl_text_truncate_cols(char *out, char *in, int acol, int bcol)
 {
-  char *ptr1, *ptr2, *tmpbuff, *last;
-  int len;
-
+  char *ptr_s, *ptr_e, *ptr_c, *tmpbuff, *last;
+  int col, cnt;
+  
   tmpbuff=owl_malloc(strlen(in)+20);
 
   strcpy(tmpbuff, "");
   last=in+strlen(in)-1;
-  ptr1=in;
-  while (ptr1<last) {
-    ptr2=strchr(ptr1, '\n');
-    if (!ptr2) {
+  ptr_s=in;
+  while (ptr_s<last) {
+    ptr_e=strchr(ptr_s, '\n');
+    if (!ptr_e) {
       /* but this shouldn't happen if we end in a \n */
       break;
     }
     
-    if (ptr2==ptr1) {
+    if (ptr_e==ptr_s) {
       strcat(tmpbuff, "\n");
-      ptr1++;
+      ptr_s++;
       continue;
     }
 
+    col = 0;
+    cnt = 0;
+    ptr_c = ptr_s;
+    while(col < bcol && ptr_c < ptr_e) {
+      gunichar c = g_utf8_get_char(ptr_c);
+      if (g_unichar_iswide(c)) {
+	if (col + 2 > bcol) break;
+	else col += 2;
+      }
+      else if (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK) ; /*do nothing*/
+      /* We may need more special cases here... unicode spacing is hard. */
+      else {
+	if (col + 1 > bcol) break;
+	else ++col;
+      }
+      ptr_c = g_utf8_next_char(ptr_c);
+      if (col >= acol) ++cnt;
+      if (col <= acol) ptr_s = ptr_c;
+    }
+    strncat(tmpbuff, ptr_s, ptr_c - ptr_s - 1);
+    strcat(tmpbuff, "\n");
+    ptr_s = ptr_e + 1;
+#if 0
     /* we need to check that we won't run over here */
-    if ( (ptr2-ptr1) < (bcol-acol) ) {
-      len=ptr2-(ptr1+acol);
+    if ( (ptr_e-ptr_s) < (bcol-acol) ) {
+      len=ptr_e-(ptr_s+acol);
     } else {
       len=bcol-acol;
     }
-    if ((ptr1+len)>=last) {
-      len-=last-(ptr1+len);
+    if ((ptr_s+len)>=last) {
+      len-=last-(ptr_s+len);
     }
 
-    strncat(tmpbuff, ptr1+acol, len);
+    strncat(tmpbuff, ptr_s+acol, len);
     strcat(tmpbuff, "\n");
 
-    ptr1=ptr2+1;
+    ptr_s=ptr_e+1;
+#endif
   }
   strcpy(out, tmpbuff);
   owl_free(tmpbuff);
@@ -274,31 +298,36 @@
 /* exactly like strstr but case insensitive */
 char *stristr(char *a, char *b)
 {
-  char *x, *y, *ret;
-
-  if ((x=owl_strdup(a))==NULL) return(NULL);
-  if ((y=owl_strdup(b))==NULL) return(NULL);
-  downstr(x);
-  downstr(y);
-  ret=strstr(x, y);
-  if (ret==NULL) {
-    owl_free(x);
-    owl_free(y);
-    return(NULL);
+  char *x, *y;
+  char *ret = NULL;
+  if ((x = g_utf8_casefold(a, -1)) != NULL) {
+    if ((y = g_utf8_casefold(b, -1)) != NULL) {
+      ret = strstr(x, y);
+      if (ret != NULL) {
+	ret = ret - x + a;
+      }
+      g_free(y);
+    }
+    g_free(x);
   }
-  ret=ret-x+a;
-  owl_free(x);
-  owl_free(y);
   return(ret);
 }
 
 /* return 1 if a string is only whitespace, otherwise 0 */
 int only_whitespace(char *s)
 {
-  int i;
-  for (i=0; s[i]; i++) {
-    if (!isspace((int) s[i])) return(0);
+  if (g_utf8_validate(s,-1,NULL)) {
+    char *p;
+    for(p = s; p[0]; p=g_utf8_next_char(p)) {
+      if (!g_unichar_isspace(g_utf8_get_char(p))) return 0;
+    }
   }
+  else {
+    int i;
+    for (i=0; s[i]; i++) {
+      if (!isspace((int) s[i])) return(0);
+    }
+  }
   return(1);
 }
 

Modified: branches/barnowl_unicode/owl/util.c
===================================================================
--- branches/barnowl_unicode/owl/util.c	2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/util.c	2007-12-24 07:53:11 UTC (rev 780)
@@ -398,15 +398,6 @@
   return(i);
 }
 
-/* downcase the string 'foo' */
-void downstr(char *foo)
-{
-  int i;
-  for (i=0; foo[i]!='\0'; i++) {
-    foo[i]=tolower(foo[i]);
-  }
-}
-
 /* Caller must free response. 
  * Takes in strings which are space-separated lists of tokens
  * and returns a single string containing no token more than once.
@@ -465,32 +456,18 @@
 
 /* allocates memory and returns the string or null.
  * caller must free the string. 
- * from Linux sprintf man page. 
  */
 char *owl_sprintf(const char *fmt, ...)
 {
-  int n, size = 100;
-  char *p;
   va_list ap;
-  if ((p = owl_malloc (size)) == NULL) return (NULL);
-  while (1) {
-    /* Try to print in the allocated space. */
-    va_start(ap, fmt);
-    n = vsnprintf (p, size, fmt, ap);
-    va_end(ap);
-    /* If that worked, return the string. */
-    if (n > -1 && n < size)
-      return p;
-    /* Else try again with more space. */
-    if (n > -1)    /* glibc 2.1 */
-      size = n+1; /* precisely what is needed */
-    else           /* glibc 2.0 */
-      size *= 2;  /* twice the old size */
-    if ((p = owl_realloc (p, size)) == NULL)
-      return NULL;
-  }
+  char *ret = NULL;
+  va_start(ap, fmt);
+  ret = g_strdup_vprintf(fmt, ap);
+  va_end(ap);
+  return ret;
 }
 
+
 /* Return the owl color associated with the named color.  Return -1
  * if the named color is not available
  */


home help back first fref pref prev next nref lref last post