From 36f38afb959f19490a38ba50949ba62e7c85c3af Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Sun, 3 Jun 2012 14:08:55 +0200 Subject: [PATCH] UTF-8 support for para_gui. This adds two public helper functions to string.c which operate on multibyte strings if the character encoding used in the selected locale is UTF-8. The first new helper, strwidth(), computes the width of the UTF-8 string while skip_cells() determines the number of bytes the given multibyte string must be advanced in order to skip the given number of cells. para_gui is changed to use the new functions to properly display UTF-8 encoded data in its top and bottom windows. --- gui.c | 53 ++++++++++++++------ string.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ string.h | 2 + 3 files changed, 189 insertions(+), 15 deletions(-) diff --git a/gui.c b/gui.c index 696ea1b2..ac9ee19f 100644 --- a/gui.c +++ b/gui.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "gui.cmdline.h" #include "para.h" @@ -305,11 +306,18 @@ static void add_spaces(WINDOW* win, unsigned int num) static int align_str(WINDOW* win, char *str, unsigned int len, unsigned int align) { - int i, num; /* of spaces */ + int ret, i, num; /* of spaces */ + size_t width; if (!win || !str) - return -1; - num = len - strlen(str); + return 0; + ret = strwidth(str, &width); + if (ret < 0) { + PARA_ERROR_LOG("%s\n", para_strerror(-ret)); + width = 0; + str[0] = '\0'; + } + num = len - width; if (num < 0) { str[len] = '\0'; num = 0; @@ -391,11 +399,14 @@ static int first_visible_rbe(unsigned *lines) return RINGBUFFER_SIZE - 1; } +/* +returns number of first visible rbe, *lines is the number of lines drawn. + */ static int draw_top_rbe(unsigned *lines) { - unsigned len; - int offset, fvr = first_visible_rbe(lines); + int ret, fvr = first_visible_rbe(lines); struct rb_entry *rbe; + size_t bytes_to_skip, cells_to_skip, width; if (fvr < 0) return -1; @@ -403,16 +414,22 @@ static int draw_top_rbe(unsigned *lines) rbe = ringbuffer_get(bot_win_rb, fvr); if (!rbe) return -1; - len = rbe->len; if (*lines > bot.lines) { - /* first rbe is only partially visible */ - offset = (*lines - bot.lines) * bot.cols; - assert(offset <= len); - } else - offset = 0; + /* rbe is partially visible multi-line */ + cells_to_skip = (*lines - bot.lines) * bot.cols; + ret = skip_cells(rbe->msg, cells_to_skip, &bytes_to_skip); + if (ret < 0) + return ret; + ret = strwidth(rbe->msg + bytes_to_skip, &width); + if (ret < 0) + return ret; + } else { + bytes_to_skip = 0; + width = rbe->len; + } wattron(bot.win, COLOR_PAIR(rbe->color)); - waddstr(bot.win, rbe->msg + offset); - *lines = NUM_LINES(len - offset); + waddstr(bot.win, rbe->msg + bytes_to_skip); + *lines = NUM_LINES(width); return fvr; } @@ -444,10 +461,15 @@ out: static void rb_add_entry(int color, char *msg) { - struct rb_entry *old, *new = para_malloc(sizeof(struct rb_entry)); + struct rb_entry *old, *new; int x, y; + size_t len; + + if (strwidth(msg, &len) < 0) + return; + new = para_malloc(sizeof(struct rb_entry)); new->color = color; - new->len = strlen(msg); + new->len = len; new->msg = msg; old = ringbuffer_add(bot_win_rb, new); // fprintf(stderr, "added: %s\n", new->msg); @@ -1514,6 +1536,7 @@ int main(int argc, char *argv[]) top.lines = theme.top_lines_default; setup_signal_handling(); bot_win_rb = ringbuffer_new(RINGBUFFER_SIZE); + setlocale(LC_CTYPE, ""); initscr(); /* needed only once, always successful */ init_curses(); print_welcome(); diff --git a/string.c b/string.c index 283c46e2..21c674ec 100644 --- a/string.c +++ b/string.c @@ -6,12 +6,19 @@ /** \file string.c Memory allocation and string handling functions. */ +#define _GNU_SOURCE + #include /* gettimeofday */ #include #include /* uname() */ + #include #include +#include +#include +#include + #include "para.h" #include "string.h" #include "error.h" @@ -982,3 +989,145 @@ char *key_value_copy(const char *src, size_t len, const char *key) return NULL; return safe_strdup(src + keylen + 1, len - keylen - 1); } + +static bool utf8_mode(void) +{ + static bool initialized, have_utf8; + + if (!initialized) { + char *info = nl_langinfo(CODESET); + have_utf8 = (info && strcmp(info, "UTF-8") == 0); + initialized = true; + PARA_INFO_LOG("%susing UTF-8 character encoding\n", + have_utf8? "" : "not "); + } + return have_utf8; +} + +/* + * glibc's wcswidth returns -1 if the string contains a tab character, which + * makes the function next to useless. The two functions below are taken from + * mutt. + */ + +#define IsWPrint(wc) (iswprint(wc) || wc >= 0xa0) + +static int mutt_wcwidth(wchar_t wc, size_t pos) +{ + int n; + + if (wc == 0x09) /* tab */ + return (pos | 7) + 1 - pos; + n = wcwidth(wc); + if (IsWPrint(wc) && n > 0) + return n; + if (!(wc & ~0x7f)) + return 2; + if (!(wc & ~0xffff)) + return 6; + return 10; +} + +static size_t mutt_wcswidth(const wchar_t *s, size_t n) +{ + size_t w = 0; + + while (n--) + w += mutt_wcwidth(*s++, w); + return w; +} + +/** + * Skip a given number of cells at the beginning of a string. + * + * \param s The input string. + * \param cells_to_skip Desired number of cells that should be skipped. + * \param bytes_to_skip Result. + * + * This function computes how many input bytes must be skipped to advance a + * string by the given width. If the current character encoding is not UTF-8, + * this is simply the given number of cells, i.e. \a cells_to_skip. Otherwise, + * \a s is treated as a multibyte string and on successful return, \a s + + * bytes_to_skip points to the start of a multibyte string such that the total + * width of the multibyte characters that are skipped by advancing \a s that + * many bytes equals at least \a cells_to_skip. + * + * \return Standard. + */ +int skip_cells(const char *s, size_t cells_to_skip, size_t *bytes_to_skip) +{ + wchar_t wc; + mbstate_t ps; + size_t n, bytes_parsed, cells_skipped; + + *bytes_to_skip = 0; + if (cells_to_skip == 0) + return 0; + if (!utf8_mode()) { + *bytes_to_skip = cells_to_skip; + return 0; + } + bytes_parsed = cells_skipped = 0; + memset(&ps, 0, sizeof(ps)); + n = strlen(s); + while (cells_to_skip > cells_skipped) { + size_t mbret; + + mbret = mbrtowc(&wc, s + bytes_parsed, n - bytes_parsed, &ps); + assert(mbret != 0); + if (mbret == (size_t)-1 || mbret == (size_t)-2) + return -ERRNO_TO_PARA_ERROR(EILSEQ); + bytes_parsed += mbret; + cells_skipped += mutt_wcwidth(wc, cells_skipped); + } + *bytes_to_skip = bytes_parsed; + return 1; +} + +/** + * Compute the width of an UTF-8 string. + * + * \param s The string. + * \param result The width of \a s is returned here. + * + * If not in UTF8-mode. this function is just a wrapper for strlen(3). + * Otherwise \s is treated as an UTF-8 string and its display width is + * computed. Note that this function may fail if the underlying call to + * mbsrtowcs(3) fails, so the caller must check the return value. + * + * \sa nl_langinfo(3), wcswidth(3). + * + * \return Standard. + */ +__must_check int strwidth(const char *s, size_t *result) +{ + const char *src = s; + mbstate_t state; + static wchar_t *dest; + size_t num_wchars; + + /* + * Never call any log function here. This may result in an endless loop + * as para_gui's para_log() calls this function. + */ + + if (!utf8_mode()) { + *result = strlen(s); + return 0; + } + memset(&state, 0, sizeof(state)); + *result = 0; + num_wchars = mbsrtowcs(NULL, &src, 0, &state); + if (num_wchars == (size_t)-1) + return -ERRNO_TO_PARA_ERROR(errno); + if (num_wchars == 0) + return 0; + dest = para_malloc(num_wchars * sizeof(*dest)); + src = s; + memset(&state, 0, sizeof(state)); + num_wchars = mbsrtowcs(dest, &src, num_wchars, &state); + assert(num_wchars > 0 && num_wchars != (size_t)-1); + *result = mutt_wcswidth(dest, num_wchars); + free(dest); + return 1; +} diff --git a/string.h b/string.h index cf02d978..fc55451a 100644 --- a/string.h +++ b/string.h @@ -91,3 +91,5 @@ void freep(void *arg); int compute_word_num(const char *buf, const char *delim, int offset); char *safe_strdup(const char *src, size_t len); char *key_value_copy(const char *src, size_t len, const char *key); +int skip_cells(const char *s, size_t cells_to_skip, size_t *result); +__must_check int strwidth(const char *s, size_t *result); -- 2.39.5