From: Bruno Haible Date: Wed, 6 Feb 2002 12:57:08 +0000 (+0000) Subject: New subroutines, for the Python backend. X-Git-Tag: v0.11.1~90 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c77882966e39bb5e625863a75f78fb4a9b84336b;p=thirdparty%2Fgettext.git New subroutines, for the Python backend. --- diff --git a/lib/ChangeLog b/lib/ChangeLog index 87b4f1dbe..7cb29fbb8 100644 --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,3 +1,9 @@ +2002-02-02 Bruno Haible + + * ucs4-utf8.h: New file. + * ucs4-utf16.h: New file. + * Makefile.am (libgettextlib_la_SOURCES): Add them. + 2002-02-03 Bruno Haible * xerror.c (multiline_warning): Remove temporary hack. diff --git a/lib/Makefile.am b/lib/Makefile.am index d6b5594ce..36ad91162 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -54,6 +54,7 @@ libgettextlib_la_SOURCES = \ sh-quote.h sh-quote.c \ strtoul.c \ tmpdir.h tmpdir.c \ + ucs4-utf8.h ucs4-utf16.h \ wait-process.h wait-process.c \ xerror.h xerror.c \ xmalloc.h xmalloc.c xstrdup.c \ diff --git a/lib/ucs4-utf16.h b/lib/ucs4-utf16.h new file mode 100644 index 000000000..7431386af --- /dev/null +++ b/lib/ucs4-utf16.h @@ -0,0 +1,68 @@ +/* Conversion UCS-4 to UTF-16. + Copyright (C) 2002 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#include + +/* Prototypes for local functions. Needed to ensure compiler checking of + function argument counts despite of K&R C function definition syntax. */ +static int +u16_uctomb_aux PARAMS ((unsigned short *s, unsigned int uc, int n)); +static inline int +u16_uctomb PARAMS ((unsigned short *s, unsigned int uc, int n)); + +/* Return the length (number of units) of the UTF-8 representation of uc, + after storing it at S. Return -1 upon failure, -2 if the number of + available units, N, is too small. */ +static int +u16_uctomb_aux (s, uc, n) + unsigned short *s; + unsigned int uc; + int n; +{ + if (uc >= 0x10000) + { + if (uc < 0x110000) + { + if (n >= 2) + { + s[0] = 0xd800 + ((uc - 0x10000) >> 10); + s[1] = 0xdc00 + ((uc - 0x10000) & 0x3ff); + return 2; + } + } + else + return -1; + } + return -2; +} + +static inline int +u16_uctomb (s, uc, n) + unsigned short *s; + unsigned int uc; + int n; +{ + if (uc < 0x10000 && n > 0) + { + s[0] = uc; + return 1; + } + else + return u16_uctomb_aux (s, uc, n); +} diff --git a/lib/ucs4-utf8.h b/lib/ucs4-utf8.h new file mode 100644 index 000000000..965d92a15 --- /dev/null +++ b/lib/ucs4-utf8.h @@ -0,0 +1,90 @@ +/* Conversion UCS-4 to UTF-8. + Copyright (C) 2002 Free Software Foundation, Inc. + Written by Bruno Haible , 2002. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#include + +/* Prototypes for local functions. Needed to ensure compiler checking of + function argument counts despite of K&R C function definition syntax. */ +static int +u8_uctomb_aux PARAMS ((unsigned char *s, unsigned int uc, int n)); +static inline int +u8_uctomb PARAMS ((unsigned char *s, unsigned int uc, int n)); + +/* Return the length (number of units) of the UTF-8 representation of uc, + after storing it at S. Return -1 upon failure, -2 if the number of + available units, N, is too small. */ +static int +u8_uctomb_aux (s, uc, n) + unsigned char *s; + unsigned int uc; + int n; +{ + int count; + + if (uc < 0x80) + count = 1; + else if (uc < 0x800) + count = 2; + else if (uc < 0x10000) + count = 3; +#if 0 + else if (uc < 0x200000) + count = 4; + else if (uc < 0x4000000) + count = 5; + else if (uc <= 0x7fffffff) + count = 6; +#else + else if (uc < 0x110000) + count = 4; +#endif + else + return -1; + + if (n < count) + return -2; + + switch (count) /* note: code falls through cases! */ + { +#if 0 + case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000; + case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000; +#endif + case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000; + case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800; + case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0; + case 1: s[0] = uc; + } + return count; +} + +static inline int +u8_uctomb (s, uc, n) + unsigned char *s; + unsigned int uc; + int n; +{ + if (uc < 0x80 && n > 0) + { + s[0] = uc; + return 1; + } + else + return u8_uctomb_aux (s, uc, n); +}