[905] in BarnOwl Developers

home help back first fref pref prev next nref lref last post

[D-O-H] r892 - branches/barnowl_unicode/owl

daemon@ATHENA.MIT.EDU (asedeno@MIT.EDU)
Thu Oct 29 18:10:59 2009

Resent-From: nelhage@mit.edu
Resent-To: barnowl-dev-mtg@charon.mit.edu
To: dirty-owl-hackers@mit.edu
From: asedeno@MIT.EDU
Reply-to: dirty-owl-hackers@MIT.EDU
Date: Thu, 17 Jan 2008 01:23:53 -0500 (EST)

Author: asedeno
Date: 2008-01-17 01:23:53 -0500 (Thu, 17 Jan 2008)
New Revision: 892

Modified:
   branches/barnowl_unicode/owl/editwin.c
   branches/barnowl_unicode/owl/util.c
Log:
unicode/glib branch
editwin.c - lots of utf-8 cleanup that I had been putting off.
util.c - a can we break here'' function based on perl's Text::WrapI18N


Modified: branches/barnowl_unicode/owl/editwin.c
===================================================================
--- branches/barnowl_unicode/owl/editwin.c	2008-01-17 03:24:41 UTC (rev 891)
+++ branches/barnowl_unicode/owl/editwin.c	2008-01-17 06:23:53 UTC (rev 892)
@@ -350,26 +350,36 @@
  */
 int _owl_editwin_linewrap_word(owl_editwin *e)
 {
-  int i, z;
+  int x, y;
+  int i;
+  char *ptr1, *start;
+  gunichar c;
 
-  z = _owl_editwin_get_index_from_xy(e);
-  /* move back and line wrap the previous word */
-  for (i = z - 1; ; i--) {
-    /* move back until you find a space or hit the beginning of the line */
-    if (e->buff[i] == ' ') {
-      /* replace the space with a newline */
-      e->buff[i] = '\n';
-      e->buffy++;
-      e->buffx = z - i - 1;
-      /* were we on the last line */
-      return(0);
-    } else if (e->buff[i] == '\n' || i <= e->lock) {
-      /* we hit the beginning of the line or the buffer, we cannot
-       * wrap.
-       */
-      return(-1);
+  /* saving values */
+  x = e->buffx;
+  y = e->buffy;
+  start = e->buff + e->lock;
+
+  ptr1 = e->buff + _owl_editwin_get_index_from_xy(e);
+  ptr1 = g_utf8_find_prev_char(start, ptr1);
+
+  while (ptr1) {
+    c = g_utf8_get_char(ptr1);
+    if (owl_util_can_break_after(c)) {
+      if (c != ' ') {
+        _owl_editwin_set_xy_by_index(e, ptr1 - e->buff);
+        owl_editwin_key_right(e);
+        /* _owl_editwin_insert_bytes may move e->buff. */
+        i = ptr1 - e->buff;
+        _owl_editwin_insert_bytes(e,1);
+        ptr1 = e->buff + i;
+      }
+      *ptr1 = '\n';
+      return 0;
     }
+    ptr1 = g_utf8_find_prev_char(start, ptr1);
   }
+  return -1;
 }
 
 /* insert a character at the current point (shift later
@@ -641,6 +651,13 @@
   return(ptr2 - e->buff);
 }
 
+/* We assume x,y are not set to point to a mid-char */
+gunichar _owl_editwin_get_char_at_xy(owl_editwin *e)
+{
+  return g_utf8_get_char(e->buff + _owl_editwin_get_index_from_xy(e));
+}
+
+
 void _owl_editwin_set_xy_by_index(owl_editwin *e, int index)
 {
   char *ptr1, *ptr2, *target;
@@ -686,6 +703,7 @@
   gunichar c;
   int x, i;
 
+  /* Find line */
   ptr1 = e->buff;
   ptr2 = strchr(ptr1, '\n');
   for (i = 0; ptr2 != NULL && i < e->buffy; i++) {
@@ -693,6 +711,8 @@
     ptr2 = strchr(ptr1, '\n');
   }
   ptr2 = ptr1;
+
+  /* Find char */
   x = 0;
   while (ptr2 != NULL && x < e->buffx) {
     if (*ptr2 == '\n') return 0;
@@ -700,6 +720,8 @@
     x += mk_wcwidth(c);
     ptr2 = g_utf8_next_char(ptr2);
   }
+  
+  /* calculate x offset */
   return x - e->buffx;
 }
 
@@ -804,8 +826,8 @@
 
 void owl_editwin_move_to_nextword(owl_editwin *e)
 {
-  /* asedeno: needs fixing for utf-8*/
   int i, x;
+  gunichar c = '\0';
 
   /* if we're starting on a space, find the first non-space */
   i=_owl_editwin_get_index_from_xy(e);
@@ -818,20 +840,22 @@
     }
   }
 
-  /* find the next space, newline or end of line and go there, if
-     already at the end of the line, continue on to the next */
-  i=owl_editwin_get_numchars_on_line(e, e->buffy);
+  /* find the next space, newline or end of line and go
+     there, if already at the end of the line, continue on to the next */
+  i=owl_editwin_get_numcells_on_line(e, e->buffy);
+  c = _owl_editwin_get_char_at_xy(e);
   if (e->buffx < i) {
     /* move right till end of line */
     while (e->buffx < i) {
-      e->buffx++;
-      if (e->buff[_owl_editwin_get_index_from_xy(e)]==' ') return;
+      owl_editwin_key_right(e);
+      c = _owl_editwin_get_char_at_xy(e);
+      if (c == ' ') return;
       if (e->buffx == i) return;
     }
   } else if (e->buffx == i) {
     /* try to move down */
     if (e->style==OWL_EDITWIN_STYLE_MULTILINE) {
-      if (e->buffy+1 <  owl_editwin_get_numlines(e)) {
+      if (e->buffy+1 < owl_editwin_get_numlines(e)) {
 	e->buffx=0;
 	e->buffy++;
 	owl_editwin_move_to_nextword(e);
@@ -844,88 +868,96 @@
  */
 void owl_editwin_move_to_previousword(owl_editwin *e)
 {
-  /* asedeno: needs fixing for utf-8*/
-  int i, x;
+  int i;
+  gunichar c;
+  char *ptr1, *ptr2;
 
   /* are we already at the beginning of the word? */
-  i=_owl_editwin_get_index_from_xy(e);
-  if ( (e->buff[i]!=' ' && e->buff[i]!='\n' && e->buff[i]!='\0') &&
-       (e->buff[i-1]==' ' || e->buff[i-1]=='\n') ) {
-    owl_editwin_key_left(e);
+  c = _owl_editwin_get_char_at_xy(e);
+  i = _owl_editwin_get_index_from_xy(e);
+  ptr1 = e->buff + i;
+  if (*ptr1 != ' ' && *ptr1 != '\n' && *ptr1 != '\0' ) {
+    ptr1 = g_utf8_find_prev_char(e->buff, ptr1);
+    c = g_utf8_get_char(ptr1);
+    if (c == ' ' || c == '\n') {
+      owl_editwin_key_left(e);      
+    }
   }
-    
+
   /* are we starting on a space character? */
-  i=_owl_editwin_get_index_from_xy(e);
-  if (e->buff[i]==' ' || e->buff[i]=='\n' || e->buff[i]=='\0') {
+  i = _owl_editwin_get_index_from_xy(e);
+  while (e->buff[i] == ' ' || e->buff[i] == '\n' || e->buff[i] == '\0') {
     /* find the first non-space */
-    for (x=i; x>=e->lock; x--) {
-      if (e->buff[x]!=' ' && e->buff[x]!='\n' && e->buff[x]!='\0') {
-	_owl_editwin_set_xy_by_index(e, x);
-	break;
-      }
-    }
+    owl_editwin_key_left(e);      
+    i = _owl_editwin_get_index_from_xy(e);
   }
 
   /* find the last non-space */
-  i=_owl_editwin_get_index_from_xy(e);
-  for (x=i; x>=e->lock; x--) {
-    if (e->buff[x-1]==' ' || e->buff[x-1]=='\n') {
-      _owl_editwin_set_xy_by_index(e, x);
+  owl_editwin_key_left(e);
+  ptr1 = e->buff + _owl_editwin_get_index_from_xy(e);
+  while (ptr1 >= e->buff + e->lock) {
+    ptr2 = g_utf8_find_prev_char(e->buff, ptr1);
+    if (!ptr2) break;
+    
+    c = g_utf8_get_char(ptr2);
+    if (c == ' ' || c == '\n'){
       break;
     }
+    owl_editwin_key_left(e);
+    ptr1 = e->buff + _owl_editwin_get_index_from_xy(e);
   }
-  _owl_editwin_set_xy_by_index(e, x);
 }
 
 
 void owl_editwin_delete_nextword(owl_editwin *e)
 {
-  /* asedeno: needs fixing for utf-8*/
-  int z;
+  char *ptr1, *start;
+  gunichar c;
 
   if (e->bufflen==0) return;
 
-  /* if we start out on a space character then gobble all the spaces
-     up first */
-  while (1) {
-    z=_owl_editwin_get_index_from_xy(e);
-    if (e->buff[z]==' ' || e->buff[z]=='\n') {
-      owl_editwin_delete_char(e);
-    } else {
-      break;
-    }
+  start = ptr1 = e->buff + _owl_editwin_get_index_from_xy(e);
+  /* if we start out on a space character then jump past all the
+     spaces up first */
+  while (*ptr1 == ' ' || *ptr1 == '\n') {
+    ++ptr1;
   }
 
-  /* then nuke the next word */
-  while (1) {
-    z=_owl_editwin_get_index_from_xy(e);
-    /* z == e->bufflen check added to prevent a hang I (nelhage) have
-       seen repeatedly while using owl. I'm not sure precisely what
-       conditions lead to it. */
-    if (z == e->bufflen
-        || e->buff[z+1]==' ' || e->buff[z+1]=='\n' || e->buff[z+1]=='\0') break;
-    owl_editwin_delete_char(e);
+  /* then jump past the next word */
+  
+  while (ptr1 && ptr1 - e->buff < e->bufflen) {
+    c = g_utf8_get_char(ptr1);
+    if (c == ' ' || c == '\n' || c == '\0') break;
+    ptr1 = g_utf8_find_next_char(ptr1, NULL);
   }
-  owl_editwin_delete_char(e);
+
+  if (ptr1) { /* We broke on a space, */
+    ptr1 = g_utf8_find_next_char(ptr1, NULL);
+    if (ptr1) { /* and there's a character after it, */
+      /* nuke everything back to our starting point. */
+      _owl_editwin_remove_bytes(e, ptr1 - start);
+      return;
+    }
+  }
+  
+  /* If we get here, we ran out of string, drop what's left. */
+  *start = '\0';
+  e->bufflen = start - e->buff;
 }
 
 void owl_editwin_delete_previousword(owl_editwin *e)
 {
-  /* asedeno: needs fixing for utf-8*/
   /* go backwards to the last non-space character, then delete chars */
-  int i, startpos, endpos;
+  int startpos, endpos;
 
   startpos = _owl_editwin_get_index_from_xy(e);
   owl_editwin_move_to_previousword(e);
   endpos = _owl_editwin_get_index_from_xy(e);
-  for (i=0; i<startpos-endpos; i++) {
-    owl_editwin_delete_char(e);
-  }
+  _owl_editwin_remove_bytes(e, startpos-endpos);
 }
 
 void owl_editwin_delete_to_endofline(owl_editwin *e)
 {
-  /* asedeno: needs fixing for utf-8*/
   int i;
 
   if (owl_editwin_get_numchars_on_line(e, e->buffy) > e->buffx) {
@@ -986,7 +1018,6 @@
 
 void owl_editwin_fill_paragraph(owl_editwin *e)
 {
-  /* asedeno: needs fixing for utf-8*/
   int i, save;
 
   /* save our starting point */
@@ -1017,6 +1048,9 @@
     }
 
     /* did we hit the end of a line too soon? */
+    /* asedeno: Here we replace a newline with a space. We may want to
+       consider removing the space if the characters to either side
+       are CJK ideograms.*/
     i = _owl_editwin_get_index_from_xy(e);
     if (e->buff[i] == '\n' && e->buffx < e->fillcol - 1) {
       /* ********* we need to make sure we don't pull in a word that's too long ***********/
@@ -1030,7 +1064,9 @@
 	owl_editwin_key_right(e);
       } else {
 	owl_editwin_delete_char(e);
-	/* if we did this ahead of the save point, adjust it */
+	/* if we did this ahead of the save point, adjust it. Changing
+           by one is fine here because we're only removing an ASCII
+           space. */
 	if (i < save) save--;
       }
     } else {

Modified: branches/barnowl_unicode/owl/util.c
===================================================================
--- branches/barnowl_unicode/owl/util.c	2008-01-17 03:24:41 UTC (rev 891)
+++ branches/barnowl_unicode/owl/util.c	2008-01-17 06:23:53 UTC (rev 892)
@@ -831,6 +831,27 @@
   return out;
 }
 
+/* This is based on _extract() and _isCJ() from perl's Text::WrapI18N */
+int owl_util_can_break_after(gunichar c)
+{
+  
+  if (c == ' ') return 1;
+  if (c >= 0x3000 && c <= 0x312f) {
+    /* CJK punctuations, Hiragana, Katakana, Bopomofo */
+    if (c == 0x300a || c == 0x300c || c == 0x300e ||
+        c == 0x3010 || c == 0x3014 || c == 0x3016 ||
+        c == 0x3018 || c == 0x301a)
+      return 0;
+    return 1;
+  }
+  if (c >= 0x31a0 && c <= 0x31bf) {return 1;}  /* Bopomofo */
+  if (c >= 0x31f0 && c <= 0x31ff) {return 1;}  /* Katakana extension */
+  if (c >= 0x3400 && c <= 0x9fff) {return 1;}  /* Han Ideogram */
+  if (c >= 0xf900 && c <= 0xfaff) {return 1;}  /* Han Ideogram */
+  if (c >= 0x20000 && c <= 0x2ffff) {return 1;}  /* Han Ideogram */
+  return 0;
+}
+
 /**************************************************************************/
 /************************* REGRESSION TESTS *******************************/
 /**************************************************************************/


home help back first fref pref prev next nref lref last post