[774] in BarnOwl Developers
[D-O-H] r780 - branches/barnowl_unicode/owl
daemon@ATHENA.MIT.EDU (asedeno@MIT.EDU)
Thu Oct 29 18:09:34 2009
Resent-From: nelhage@mit.edu
Resent-To: barnowl-dev-mtg@charon.mit.edu
To: dirty-owl-hackers@mit.edu
From: asedeno@MIT.EDU
Reply-to: dirty-owl-hackers@MIT.EDU
Date: Mon, 24 Dec 2007 02:53:11 -0500 (EST)
Author: asedeno
Date: 2007-12-24 02:53:11 -0500 (Mon, 24 Dec 2007)
New Revision: 780
Modified:
branches/barnowl_unicode/owl/fmtext.c
branches/barnowl_unicode/owl/functions.c
branches/barnowl_unicode/owl/logging.c
branches/barnowl_unicode/owl/text.c
branches/barnowl_unicode/owl/util.c
Log:
UTF-8 - first pass
unicode changes:
* remove downstr() from text.c, replace on site with calls to g_utf8_strdown.
In place downcasing is not a good idea, so the downstr() contract is unfulfillable.
* make owl_text_truncate_cols() and owl_fmtext_truncate_cols() understand character width.
This may need more work. Some code duplication - see if we can refactor.
* stristr() rewritten to yse g_utf_casefold() instead of downstr(), and restructured to have a single return.
* only_whitespace() rewritten for unicode.
glib changes:
* rewrite owl_sprintf() in terms of g_strdup_vprintf()
WARNING: THIS IS NOT SAFE YET. Network data is not yet sanitized. Non
UTF-8 inputs may do horrible things to you. This phase is just
working on rendering.
Modified: branches/barnowl_unicode/owl/fmtext.c
===================================================================
--- branches/barnowl_unicode/owl/fmtext.c 2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/fmtext.c 2007-12-24 07:53:11 UTC (rev 780)
@@ -314,45 +314,69 @@
*/
void owl_fmtext_truncate_cols(owl_fmtext *in, int acol, int bcol, owl_fmtext *out)
{
- char *ptr1, *ptr2, *last;
- int len, offset;
+ char *ptr_s, *ptr_e, *ptr_c, *last;
+ int col, cnt;
last=in->textbuff+in->textlen-1;
- ptr1=in->textbuff;
- while (ptr1<=last) {
- ptr2=strchr(ptr1, '\n');
- if (!ptr2) {
+ ptr_s=in->textbuff;
+ while (ptr_s<=last) {
+ ptr_e=strchr(ptr_s, '\n');
+ if (!ptr_e) {
/* but this shouldn't happen if we end in a \n */
break;
}
- if (ptr2==ptr1) {
+ if (ptr_e==ptr_s) {
owl_fmtext_append_normal(out, "\n");
- ptr1++;
+ ptr_s++;
continue;
}
+ col = 0;
+ cnt = 0;
+ ptr_c = ptr_s;
+ while(col < bcol && ptr_c < ptr_e) {
+ gunichar c = g_utf8_get_char(ptr_c);
+ if (g_unichar_iswide(c)) {
+ if (col + 2 > bcol) break;
+ else col += 2;
+ }
+ else if (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK) ; /*do nothing*/
+ /* We may need more special cases here... unicode spacing is hard. */
+ else {
+ if (col + 1 > bcol) break;
+ else ++col;
+ }
+ ptr_c = g_utf8_next_char(ptr_c);
+ if (col >= acol) ++cnt;
+ if (col <= acol) ptr_s = ptr_c;
+ }
+ _owl_fmtext_append_fmtext(out, in, ptr_s - in->textbuff, ptr_c - in->textbuff);
+ ptr_s=ptr_e+1;
+
+#if 0
/* we need to check that we won't run over here */
len=bcol-acol;
- if (len > (ptr2-(ptr1+acol))) {
+ if (len > (ptr_e-(ptr_s+acol))) {
/* the whole line fits with room to spare, don't take a full 'len' */
- len=ptr2-(ptr1+acol);
+ len=ptr_e-(ptr_s+acol);
}
- if (len>last-ptr1) {
+ if (len>last-ptr_s) {
/* the whole rest of the text fits with room to spare, adjust for it */
- len-=(last-ptr1);
+ len-=(last-ptr_s);
}
if (len<=0) {
/* saftey check */
owl_fmtext_append_normal(out, "\n");
- ptr1=ptr2+1;
+ ptr_s=ptr_e+1;
continue;
}
- offset=ptr1-in->textbuff;
+ offset = ptr_s - in->textbuff;
_owl_fmtext_append_fmtext(out, in, offset+acol, offset+acol+len);
- ptr1=ptr2+1;
+ ptr_s=ptr_e+1;
+#endif
}
}
Modified: branches/barnowl_unicode/owl/functions.c
===================================================================
--- branches/barnowl_unicode/owl/functions.c 2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/functions.c 2007-12-24 07:53:11 UTC (rev 780)
@@ -2597,7 +2597,13 @@
sprintf(filtname, "class-%s-instance-%s", class, instance);
}
/* downcase it */
- downstr(filtname);
+ {
+ char *temp = g_utf8_strdown(filtname, -1);
+ if (temp) {
+ owl_free(filtname);
+ filtname = temp;
+ }
+ }
/* turn spaces, single quotes, and double quotes into dots */
owl_text_tr(filtname, ' ', '.');
owl_text_tr(filtname, '\'', '.');
Modified: branches/barnowl_unicode/owl/logging.c
===================================================================
--- branches/barnowl_unicode/owl/logging.c 2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/logging.c 2007-12-24 07:53:11 UTC (rev 780)
@@ -154,9 +154,11 @@
} else if (owl_message_is_type_jabber(m)) {
to = owl_sprintf("jabber:%s", owl_message_get_recipient(m));
} else if (owl_message_is_type_aim(m)) {
+ char *temp2;
temp = owl_aim_normalize_screenname(owl_message_get_recipient(m));
- downstr(temp);
- to = owl_sprintf("aim:%s", temp);
+ temp2 = g_utf8_strdown(temp,-1);
+ to = owl_sprintf("aim:%s", temp2);
+ owl_free(temp2);
owl_free(temp);
} else {
to = owl_sprintf("loopback");
@@ -266,11 +268,12 @@
}
} else if (owl_message_is_type_aim(m)) {
/* we do not yet handle chat rooms */
- char *normalto;
- normalto=owl_aim_normalize_screenname(owl_message_get_sender(m));
- downstr(normalto);
+ char *normalto, *temp;
+ temp = owl_aim_normalize_screenname(owl_message_get_sender(m));
+ normalto = g_utf8_strdown(temp, -1);
from=frombuff=owl_sprintf("aim:%s", normalto);
owl_free(normalto);
+ owl_free(temp);
} else if (owl_message_is_type_loopback(m)) {
from=frombuff=owl_strdup("loopback");
} else if (owl_message_is_type_jabber(m)) {
@@ -289,7 +292,7 @@
if (strchr(frombuff, '/')) from="weird";
ch=frombuff[0];
- if (!isalnum(ch)) from="weird";
+ if (!g_ascii_isalnum(ch)) from="weird";
for (i=0; i<len; i++) {
if (frombuff[i]<'!' || frombuff[i]>='~') from="weird";
@@ -298,7 +301,13 @@
if (!strcmp(frombuff, ".") || !strcasecmp(frombuff, "..")) from="weird";
if (!personal) {
- if (strcmp(from, "weird")) downstr(from);
+ if (strcmp(from, "weird")) {
+ char* temp = g_utf8_strdown(frombuff, -1);
+ if (temp) {
+ owl_free(frombuff);
+ from = frombuff = temp;
+ }
+ }
}
/* create the filename (expanding ~ in path names) */
Modified: branches/barnowl_unicode/owl/text.c
===================================================================
--- branches/barnowl_unicode/owl/text.c 2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/text.c 2007-12-24 07:53:11 UTC (rev 780)
@@ -49,41 +49,65 @@
* new line for now */
void owl_text_truncate_cols(char *out, char *in, int acol, int bcol)
{
- char *ptr1, *ptr2, *tmpbuff, *last;
- int len;
-
+ char *ptr_s, *ptr_e, *ptr_c, *tmpbuff, *last;
+ int col, cnt;
+
tmpbuff=owl_malloc(strlen(in)+20);
strcpy(tmpbuff, "");
last=in+strlen(in)-1;
- ptr1=in;
- while (ptr1<last) {
- ptr2=strchr(ptr1, '\n');
- if (!ptr2) {
+ ptr_s=in;
+ while (ptr_s<last) {
+ ptr_e=strchr(ptr_s, '\n');
+ if (!ptr_e) {
/* but this shouldn't happen if we end in a \n */
break;
}
- if (ptr2==ptr1) {
+ if (ptr_e==ptr_s) {
strcat(tmpbuff, "\n");
- ptr1++;
+ ptr_s++;
continue;
}
+ col = 0;
+ cnt = 0;
+ ptr_c = ptr_s;
+ while(col < bcol && ptr_c < ptr_e) {
+ gunichar c = g_utf8_get_char(ptr_c);
+ if (g_unichar_iswide(c)) {
+ if (col + 2 > bcol) break;
+ else col += 2;
+ }
+ else if (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK) ; /*do nothing*/
+ /* We may need more special cases here... unicode spacing is hard. */
+ else {
+ if (col + 1 > bcol) break;
+ else ++col;
+ }
+ ptr_c = g_utf8_next_char(ptr_c);
+ if (col >= acol) ++cnt;
+ if (col <= acol) ptr_s = ptr_c;
+ }
+ strncat(tmpbuff, ptr_s, ptr_c - ptr_s - 1);
+ strcat(tmpbuff, "\n");
+ ptr_s = ptr_e + 1;
+#if 0
/* we need to check that we won't run over here */
- if ( (ptr2-ptr1) < (bcol-acol) ) {
- len=ptr2-(ptr1+acol);
+ if ( (ptr_e-ptr_s) < (bcol-acol) ) {
+ len=ptr_e-(ptr_s+acol);
} else {
len=bcol-acol;
}
- if ((ptr1+len)>=last) {
- len-=last-(ptr1+len);
+ if ((ptr_s+len)>=last) {
+ len-=last-(ptr_s+len);
}
- strncat(tmpbuff, ptr1+acol, len);
+ strncat(tmpbuff, ptr_s+acol, len);
strcat(tmpbuff, "\n");
- ptr1=ptr2+1;
+ ptr_s=ptr_e+1;
+#endif
}
strcpy(out, tmpbuff);
owl_free(tmpbuff);
@@ -274,31 +298,36 @@
/* exactly like strstr but case insensitive */
char *stristr(char *a, char *b)
{
- char *x, *y, *ret;
-
- if ((x=owl_strdup(a))==NULL) return(NULL);
- if ((y=owl_strdup(b))==NULL) return(NULL);
- downstr(x);
- downstr(y);
- ret=strstr(x, y);
- if (ret==NULL) {
- owl_free(x);
- owl_free(y);
- return(NULL);
+ char *x, *y;
+ char *ret = NULL;
+ if ((x = g_utf8_casefold(a, -1)) != NULL) {
+ if ((y = g_utf8_casefold(b, -1)) != NULL) {
+ ret = strstr(x, y);
+ if (ret != NULL) {
+ ret = ret - x + a;
+ }
+ g_free(y);
+ }
+ g_free(x);
}
- ret=ret-x+a;
- owl_free(x);
- owl_free(y);
return(ret);
}
/* return 1 if a string is only whitespace, otherwise 0 */
int only_whitespace(char *s)
{
- int i;
- for (i=0; s[i]; i++) {
- if (!isspace((int) s[i])) return(0);
+ if (g_utf8_validate(s,-1,NULL)) {
+ char *p;
+ for(p = s; p[0]; p=g_utf8_next_char(p)) {
+ if (!g_unichar_isspace(g_utf8_get_char(p))) return 0;
+ }
}
+ else {
+ int i;
+ for (i=0; s[i]; i++) {
+ if (!isspace((int) s[i])) return(0);
+ }
+ }
return(1);
}
Modified: branches/barnowl_unicode/owl/util.c
===================================================================
--- branches/barnowl_unicode/owl/util.c 2007-12-24 02:19:08 UTC (rev 779)
+++ branches/barnowl_unicode/owl/util.c 2007-12-24 07:53:11 UTC (rev 780)
@@ -398,15 +398,6 @@
return(i);
}
-/* downcase the string 'foo' */
-void downstr(char *foo)
-{
- int i;
- for (i=0; foo[i]!='\0'; i++) {
- foo[i]=tolower(foo[i]);
- }
-}
-
/* Caller must free response.
* Takes in strings which are space-separated lists of tokens
* and returns a single string containing no token more than once.
@@ -465,32 +456,18 @@
/* allocates memory and returns the string or null.
* caller must free the string.
- * from Linux sprintf man page.
*/
char *owl_sprintf(const char *fmt, ...)
{
- int n, size = 100;
- char *p;
va_list ap;
- if ((p = owl_malloc (size)) == NULL) return (NULL);
- while (1) {
- /* Try to print in the allocated space. */
- va_start(ap, fmt);
- n = vsnprintf (p, size, fmt, ap);
- va_end(ap);
- /* If that worked, return the string. */
- if (n > -1 && n < size)
- return p;
- /* Else try again with more space. */
- if (n > -1) /* glibc 2.1 */
- size = n+1; /* precisely what is needed */
- else /* glibc 2.0 */
- size *= 2; /* twice the old size */
- if ((p = owl_realloc (p, size)) == NULL)
- return NULL;
- }
+ char *ret = NULL;
+ va_start(ap, fmt);
+ ret = g_strdup_vprintf(fmt, ap);
+ va_end(ap);
+ return ret;
}
+
/* Return the owl color associated with the named color. Return -1
* if the named color is not available
*/