]> git.ipfire.org Git - thirdparty/vim.git/commitdiff
patch 8.2.1536: cannot get the class of a character; emoji widths are wrong v8.2.1536
authorBram Moolenaar <Bram@vim.org>
Fri, 28 Aug 2020 20:24:57 +0000 (22:24 +0200)
committerBram Moolenaar <Bram@vim.org>
Fri, 28 Aug 2020 20:24:57 +0000 (22:24 +0200)
Problem:    Cannot get the class of a character; emoji widths are wrong in
            some environments.
Solution:   Add charclass(). Update some emoji widths.  Add script to check
            emoji widths.

Filelist
runtime/doc/eval.txt
runtime/doc/usr_41.txt
src/evalfunc.c
src/mbyte.c
src/proto/mbyte.pro
src/testdir/emoji_list.vim [new file with mode: 0644]
src/testdir/test_functions.vim
src/version.c

index 42eaa1d3edd7079a997c83c40fbc35664a3ae3df..e44c1fd3a9ff7074089f664f9753dded430c9bff 100644 (file)
--- a/Filelist
+++ b/Filelist
@@ -197,6 +197,7 @@ SRC_ALL =   \
                src/testdir/samples/*.txt \
                src/testdir/samples/test000 \
                src/testdir/color_ramp.vim \
+               src/testdir/emoji_list.vim \
                src/testdir/silent.wav \
                src/testdir/popupbounce.vim \
                src/proto.h \
index 129c1e2e2c9c032eb5098e3cd819b8f715daf8bc..e0c84730380ea1901ce09a4f8382fe6fcb5f1e1b 100644 (file)
@@ -2425,6 +2425,7 @@ ch_status({handle} [, {options}])
                                String  status of channel {handle}
 changenr()                     Number  current change number
 char2nr({expr} [, {utf8}])     Number  ASCII/UTF8 value of first char in {expr}
+charclass({string})            Number  character class of {string}
 chdir({dir})                   String  change current working directory
 cindent({lnum})                        Number  C indent for line {lnum}
 clearmatches([{win}])          none    clear all matches
@@ -3520,6 +3521,18 @@ char2nr({expr} [, {utf8}])                                       *char2nr()*
                Can also be used as a |method|: >
                        GetChar()->char2nr()
 
+
+charclass({string})                                    *charclass()*
+               Return the character class of the first character in {string}.
+               The character class is one of:
+                       0       blank
+                       1       punctuation
+                       2       word character
+                       3       emoji
+                       other   specific Unicode class
+               The class is used in patterns and word motions.
+
+
 chdir({dir})                                           *chdir()*
                Change the current working directory to {dir}.  The scope of
                the directory change depends on the directory of the current
index bcd58c9fec070ffa2041c4a71c66fd692687276a..10e53baf7233f7a2ad0e0aa296a5f1fa251cb0cd 100644 (file)
@@ -600,6 +600,7 @@ String manipulation:                                        *string-functions*
        strtrans()              translate a string to make it printable
        tolower()               turn a string to lowercase
        toupper()               turn a string to uppercase
+       charclass()             class of a character
        match()                 position where a pattern matches in a string
        matchend()              position where a pattern match ends in a string
        matchstr()              match of a pattern in a string
index 567bbdfaacc7b93c0e25aa9072e0ed87e30b746a..62bbb8888d52d4ac1599beae0271d9f1a46cff8e 100644 (file)
@@ -564,6 +564,7 @@ static funcentry_T global_functions[] =
     {"ch_status",      1, 2, FEARG_1,    ret_string,   JOB_FUNC(f_ch_status)},
     {"changenr",       0, 0, 0,          ret_number,   f_changenr},
     {"char2nr",                1, 2, FEARG_1,    ret_number,   f_char2nr},
+    {"charclass",      1, 1, FEARG_1,    ret_number,   f_charclass},
     {"chdir",          1, 1, FEARG_1,    ret_string,   f_chdir},
     {"cindent",                1, 1, FEARG_1,    ret_number,   f_cindent},
     {"clearmatches",   0, 1, FEARG_1,    ret_void,     f_clearmatches},
index d72fb9191b49a0834752c3017205ee7639a3f71a..3faefa6af66f15ded4bed917f6458309fe0540fb 100644 (file)
@@ -132,7 +132,9 @@ static int dbcs_char2cells(int c);
 static int dbcs_ptr2cells_len(char_u *p, int size);
 static int dbcs_ptr2char(char_u *p);
 static int dbcs_head_off(char_u *base, char_u *p);
+#ifdef FEAT_EVAL
 static int cw_value(int c);
+#endif
 
 /*
  * Lookup table to quickly get the length in bytes of a UTF-8 character from
@@ -1388,8 +1390,7 @@ utf_char2cells(int c)
        {0x26ce, 0x26ce},
        {0x26d4, 0x26d4},
        {0x26ea, 0x26ea},
-       {0x26f2, 0x26f3},
-       {0x26f5, 0x26f5},
+       {0x26f2, 0x26f5},
        {0x26fa, 0x26fa},
        {0x26fd, 0x26fd},
        {0x2705, 0x2705},
@@ -1490,6 +1491,21 @@ utf_char2cells(int c)
     // based on http://unicode.org/emoji/charts/emoji-list.html
     static struct interval emoji_wide[] =
     {
+       {0x23ed, 0x23ef},
+       {0x23f1, 0x23f2},
+       {0x23f8, 0x23fa},
+       {0x24c2, 0x24c2},
+       {0x261d, 0x261d},
+       {0x26c8, 0x26c8},
+       {0x26cf, 0x26cf},
+       {0x26d1, 0x26d1},
+       {0x26d3, 0x26d3},
+       {0x26e9, 0x26e9},
+       {0x26f0, 0x26f1},
+       {0x26f7, 0x26f9},
+       {0x270c, 0x270d},
+       {0x2934, 0x2935},
+       {0x1f170, 0x1f189},
        {0x1f1e6, 0x1f1ff},
        {0x1f321, 0x1f321},
        {0x1f324, 0x1f32c},
@@ -1533,11 +1549,15 @@ utf_char2cells(int c)
 
     if (c >= 0x100)
     {
+#if defined(FEAT_EVAL) || defined(USE_WCHAR_FUNCTIONS)
        int     n;
+#endif
 
+#ifdef FEAT_EVAL
        n = cw_value(c);
        if (n != 0)
            return n;
+#endif
 
 #ifdef USE_WCHAR_FUNCTIONS
        /*
@@ -2667,8 +2687,7 @@ static struct interval emoji_all[] =
     {0x3299, 0x3299},
     {0x1f004, 0x1f004},
     {0x1f0cf, 0x1f0cf},
-    {0x1f170, 0x1f171},
-    {0x1f17e, 0x1f17f},
+    {0x1f170, 0x1f189},
     {0x1f18e, 0x1f18e},
     {0x1f191, 0x1f19a},
     {0x1f1e6, 0x1f1ff},
@@ -2835,6 +2854,10 @@ utf_class_buf(int c, buf_T *buf)
        return 1;           // punctuation
     }
 
+    // emoji
+    if (intable(emoji_all, sizeof(emoji_all), c))
+       return 3;
+
     // binary search in table
     while (top >= bot)
     {
@@ -2847,10 +2870,6 @@ utf_class_buf(int c, buf_T *buf)
            return (int)classes[mid].class;
     }
 
-    // emoji
-    if (intable(emoji_all, sizeof(emoji_all), c))
-       return 3;
-
     // most other characters are "word" characters
     return 2;
 }
@@ -5352,6 +5371,8 @@ string_convert_ext(
     return retval;
 }
 
+#if defined(FEAT_EVAL) || defined(PROTO)
+
 /*
  * Table set by setcellwidths().
  */
@@ -5525,3 +5546,17 @@ f_setcellwidths(typval_T *argvars, typval_T *rettv UNUSED)
     cw_table = table;
     cw_table_size = l->lv_len;
 }
+
+    void
+f_charclass(typval_T *argvars, typval_T *rettv UNUSED)
+{
+    if (argvars[0].v_type != VAR_STRING
+           || argvars[0].vval.v_string == NULL
+           || *argvars[0].vval.v_string == NUL)
+    {
+       emsg(_(e_stringreq));
+       return;
+    }
+    rettv->vval.v_number = mb_get_class(argvars[0].vval.v_string);
+}
+#endif
index 9385d856d8cd05758d682b4616c6abc60c1cfed5..7a6009ed6c610f8965901b4a34f2fab1561b5e3f 100644 (file)
@@ -85,4 +85,5 @@ int convert_input_safe(char_u *ptr, int len, int maxlen, char_u **restp, int *re
 char_u *string_convert(vimconv_T *vcp, char_u *ptr, int *lenp);
 char_u *string_convert_ext(vimconv_T *vcp, char_u *ptr, int *lenp, int *unconvlenp);
 void f_setcellwidths(typval_T *argvars, typval_T *rettv);
+void f_charclass(typval_T *argvars, typval_T *rettv);
 /* vim: set ft=c : */
diff --git a/src/testdir/emoji_list.vim b/src/testdir/emoji_list.vim
new file mode 100644 (file)
index 0000000..e6a73c3
--- /dev/null
@@ -0,0 +1,22 @@
+" Script to fill the window with emoji characters, one per line.
+
+if &modified
+  new
+else
+  enew
+endif
+
+" Use a compiled Vim9 function for speed
+def DoIt()
+  let lnum = 1
+  for c in range(0x100, 0x1ffff)
+    let cs = nr2char(c)
+    if charclass(cs) == 3
+      setline(lnum, '|' .. cs .. '| ' .. strwidth(cs))
+      lnum += 1
+    endif
+  endfor
+enddef
+
+call DoIt()
+set nomodified
index e15199b786a219f346825ee7a1f88926786c383e..fd9057ddc9c09cab092f076c421fe306267a4e49 100644 (file)
@@ -2077,6 +2077,13 @@ func Test_char2nr()
   set encoding=utf-8
 endfunc
 
+func Test_charclass()
+  call assert_equal(0, charclass(' '))
+  call assert_equal(1, charclass('.'))
+  call assert_equal(2, charclass('x'))
+  call assert_equal(3, charclass("\u203c"))
+endfunc
+
 func Test_eventhandler()
   call assert_equal(0, eventhandler())
 endfunc
index 88332a00fece25910d3680292d86d98a6c165e85..92e90fe21c1158e61e33afef42ef3beb208f2b92 100644 (file)
@@ -754,6 +754,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    1536,
 /**/
     1535,
 /**/