patch 9.1.1669: Vim script: no support for URI de-/encoding

author Yegappan Lakshmanan <yegappan@yahoo.com>

Sat, 23 Aug 2025 10:26:16 +0000 (06:26 -0400)

committer Christian Brabandt <cb@256bit.org>

Sat, 23 Aug 2025 10:26:16 +0000 (06:26 -0400)
author Yegappan Lakshmanan <yegappan@yahoo.com>
Sat, 23 Aug 2025 10:26:16 +0000 (06:26 -0400)
committer Christian Brabandt <cb@256bit.org>
Sat, 23 Aug 2025 10:26:16 +0000 (06:26 -0400)
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt

index 9ab97ecab415fd36927394ff410d336a9c35bb77..32cb40f1b77e7b4ddf1adc6da64a4a79f36f7faf 100644 (file)
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -747,6 +747,8 @@ undofile({name})            String  undo file name for {name}
  undotree([{buf}])              List    undo file tree for buffer {buf}
  uniq({list} [, {func} [, {dict}]])
                                 List    remove adjacent duplicates from a list
+uri_decode({string})           String  URI-decode a string
+uri_encode({string})           String  URI-encode a string
  utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
                                 Number  UTF-16 index of byte {idx} in {string}
  values({dict})                 List    values in {dict}
@@ -12187,6 +12189,59 @@ uniq({list} [, {func} [, {dict}]])                     *uniq()* *E882*
                 Return type: list<{type}>
  
  
+uri_decode({string})                                   *uri_decode()*
+               Returns the URI-decoded form of {string}, reversing
+               percent-encoding (converting sequences like "%3D" back to
+               the corresponding character).
+
+               The decoding follows standard percent-decoding rules:
+                   - "%HH" is replaced with the character for the hex value
+                     HH.
+                   - If the decoded bytes form valid UTF-8, they are combined
+                     into the corresponding character(s).  Otherwise, the
+                     bytes are kept as-is.
+                   - Invalid or incomplete encodings (e.g. "%GZ", "%3", or a
+                     trailing "%") are left unchanged.
+
+               Returns an empty String if {string} is empty.
+
+               Example: >
+                       :echo uri_decode('c%3A%5Cmy%5Cdir%5Cfoo%20bar')
+                       c:\my\dir\foo bar
+                       :echo uri_decode('%CE%B1%CE%B2%CE%B3')
+                       αβγ
+<
+               Can also be used as a |method|: >
+                       mystr->uri_decode()
+<
+               Return type: |String|
+
+uri_encode({string})                                   *uri_encode()*
+               Returns the URI-encoded form of {string}.  URI encoding
+               replaces unsafe or reserved characters with percent-encoded
+               sequences.
+
+               The encoding follows standard percent-encoding rules:
+                    - Alphanumeric characters [0-9A-Za-z] remain unchanged.
+                    - The characters "-", "_", ".", and "~" also remain
+                      unchanged.
+                    - All other characters are replaced with "%HH", where HH
+                      is the two-digit uppercase hexadecimal value.
+                    - Existing percent-encoded sequences are not modified.
+
+               Returns an empty String if {string} is empty.
+
+               Example: >
+                       :echo uri_encode('c:\my\dir\foo bar')
+                       c%3A%5Cmy%5Cdir%5Cfoo%20bar
+                       :echo uri_encode('key=value&name=αβγ')
+                       key%3Dvalue%26name%3D%CE%B1%CE%B2%CE%B3
+<
+               Can also be used as a |method|: >
+                       mystr->uri_encode()
+<
+               Return type: |String|
+
                                                         *utf16idx()*
  utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
                 Same as |charidx()| but returns the UTF-16 code unit index of
diff --git a/runtime/doc/tags b/runtime/doc/tags

index d42cac486adaeb0ba6651ae0e027337ecb35fc85..2046d6f933c9e694d47a2c01c0dbe74dc152324c 100644 (file)
--- a/runtime/doc/tags
+++ b/runtime/doc/tags
@@ -11101,6 +11101,8 @@ unix    os_unix.txt     /*unix*
  unlisted-buffer        windows.txt     /*unlisted-buffer*
  up-down-motions        motion.txt      /*up-down-motions*
  uppercase      change.txt      /*uppercase*
+uri_decode()   builtin.txt     /*uri_decode()*
+uri_encode()   builtin.txt     /*uri_encode()*
  urxvt-mouse    options.txt     /*urxvt-mouse*
  use-visual-cmds        version4.txt    /*use-visual-cmds*
  useful-mappings        tips.txt        /*useful-mappings*
diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt

index ce05623b521d36d9a5c426db8e15b99f796ac3ce..813211e7c889f5fb9517f17231ff8dcb1a3fb926 100644 (file)
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -807,6 +807,8 @@ String manipulation:                                        *string-functions*
         str2blob()              convert a list of strings into a blob
         blob2str()              convert a blob into a list of strings
         items()                 get List of String index-character pairs
+       uri_encode()            URI-encode a string
+       uri_decode()            URI-decode a string
  
  List manipulation:                                     *list-functions*
         get()                   get an item without error for wrong index
diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt

index 6ea5257590d10591e90e5475e76aa1570afabfb8..ce8832b259d687f15b06460128a377bfb471059c 100644 (file)
--- a/runtime/doc/version9.txt
+++ b/runtime/doc/version9.txt
@@ -41788,6 +41788,8 @@ Functions: ~
  |str2blob()|           convert a List of strings into a blob
  |test_null_tuple()|    return a null tuple
  |tuple2list()|         turn a Tuple of items into a List
+|uri_decode()|         URI-decode a string
+|uri_encode()|         URI-encode a string
  |wildtrigger()|                trigger wildcard expansion
  
  
diff --git a/src/evalfunc.c b/src/evalfunc.c

index 2dced8e3fc88d4e240ba8e757a9fb60824f857d5..91c7ca04e042abdc281bbe2517ddea6953484259 100644 (file)
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -3116,6 +3116,10 @@ static funcentry_T global_functions[] =
                         ret_dict_any,       f_undotree},
      {"uniq",           1, 3, FEARG_1,      arg13_sortuniq,
                         ret_first_arg,      f_uniq},
+    {"uri_decode",     1, 1, FEARG_1,      arg1_string,
+                       ret_string,         f_uridecode},
+    {"uri_encode",     1, 1, FEARG_1,      arg1_string,
+                       ret_string,         f_uriencode},
      {"utf16idx",       2, 4, FEARG_1,      arg4_string_number_bool_bool,
                         ret_number,         f_utf16idx},
      {"values",         1, 1, FEARG_1,      arg1_dict_any,
diff --git a/src/proto/strings.pro b/src/proto/strings.pro

index 8e7dc180d3a0b28be2bc50a00d7d7cbb5b3a5fcb..aeb2e641e3da3ca760b4cfaa1b8576f5eaf54899 100644 (file)
--- a/src/proto/strings.pro
+++ b/src/proto/strings.pro
@@ -52,4 +52,6 @@ void f_tolower(typval_T *argvars, typval_T *rettv);
  void f_toupper(typval_T *argvars, typval_T *rettv);
  void f_tr(typval_T *argvars, typval_T *rettv);
  void f_trim(typval_T *argvars, typval_T *rettv);
+void f_uridecode(typval_T *argvars, typval_T *rettv);
+void f_uriencode(typval_T *argvars, typval_T *rettv);
  /* vim: set ft=c : */
diff --git a/src/strings.c b/src/strings.c

index 87868b90c4cc61c0617008f0a441d99c999d3017..fbaf5dc27a96cb6fad5716ec47f17f22a2138d31 100644 (file)
--- a/src/strings.c
+++ b/src/strings.c
@@ -2310,6 +2310,151 @@ f_trim(typval_T *argvars, typval_T *rettv)
      rettv->vval.v_string = vim_strnsave(head, tail - head);
  }
  
+/*
+ * Decodes a URI-encoded string.
+ *
+ * Parameters:
+ *   str - The URI-encoded input string (may contain %XX sequences and '+').
+ *
+ * Returns:
+ *   A newly allocated string with URI encoding decoded:
+ *     - %XX sequences are converted to the corresponding character.
+ *     - If the input is malformed (e.g., incomplete % sequence), the original
+ *       characters are copied.
+ *   The output string will never be longer than the input string.
+ *   The caller is responsible for freeing the returned string.
+ *
+ * Returns NULL if input is NULL or memory allocation fails.
+ */
+    static char_u *
+uri_decode(char_u *str)
+{
+    if (str == NULL)
+       return NULL;
+
+    size_t len = STRLEN(str);
+
+    char_u *decoded = alloc(len + 1);
+    if (!decoded)
+       return NULL;
+
+    char_u     *p = decoded;
+    size_t     i = 0;
+
+    while (i < len)
+    {
+       if (str[i] == '%')
+       {
+           if (i + 2 >= len)
+           {
+               // Malformed encoding
+               *p++ = str[i++];
+               if (str[i] != NUL)
+                   *p++ = str[i++];
+           }
+           else
+           {
+               int val = hexhex2nr(&str[i + 1]);
+               if (val != -1)
+               {
+                   *p++ = (char_u)val;
+                   i += 3;
+               }
+               else
+               {
+                   // invalid hex digits following "%"
+                   for (int j = 0; j < 3; j++)
+                       *p++ = str[i++];
+               }
+           }
+
+       }
+       else
+           *p++ = str[i++];
+    }
+
+    *p = NUL;
+
+    return decoded;
+}
+
+/*
+ * "uri_decode({str})" function
+ */
+    void
+f_uridecode(typval_T *argvars, typval_T *rettv)
+{
+    rettv->v_type = VAR_STRING;
+    rettv->vval.v_string = NULL;
+
+    if (check_for_string_arg(argvars, 0) == FAIL)
+       return;
+
+    rettv->vval.v_string = uri_decode(tv_get_string(&argvars[0]));
+}
+
+/*
+ * Encodes a string for safe use in a URI.
+ *
+ * Parameters:
+ *   str - The input string to encode.
+ *
+ * Returns:
+ *   A newly allocated string where:
+ *     - Alphanumeric characters and '-', '_', '.', '~' are left unchanged.
+ *     - All other bytes are encoded as %XX (uppercase hex).
+ *   The caller is responsible for freeing the returned string.
+ *
+ *   Returns NULL if input is NULL or memory allocation fails.
+ */
+    static char_u *
+uri_encode(char_u *str)
+{
+    if (str == NULL)
+       return NULL;
+
+    size_t len = STRLEN(str);
+
+    // Worst case: every character needs encoding => 3x size + 1 for null
+    // terminator
+    char_u *encoded = alloc(len * 3 + 1);
+    if (encoded == NULL)
+       return NULL;
+
+    char_u *p = encoded;
+
+    for (size_t i = 0; i < len; ++i)
+    {
+       char_u c = str[i];
+       if (ASCII_ISALNUM(c) || c == '-' || c == '_' || c == '.' || c == '~')
+           *p++ = c;
+       else
+       {
+           sprintf((char *)p, "%%%02X", c);
+           p += 3;
+       }
+    }
+
+    *p = NUL;
+
+    return encoded;
+}
+
+/*
+ * "uri_encode({str})" function
+ */
+    void
+f_uriencode(typval_T *argvars, typval_T *rettv)
+{
+    rettv->v_type = VAR_STRING;
+    rettv->vval.v_string = NULL;
+
+    if (check_for_string_arg(argvars, 0) == FAIL)
+       return;
+
+    rettv->vval.v_string = uri_encode(tv_get_string(&argvars[0]));
+}
+
  static char *e_printf = N_(e_insufficient_arguments_for_printf);
  
  /*
diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim

index ca54d3729144e8b8d587a2fb15aa00871cf284fb..9dd96018102e300d4524c78c09d087b17d094def 100644 (file)
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -4503,4 +4503,61 @@ func Test_blob2str()
    call v9.CheckLegacyAndVim9Success(lines)
  endfunc
  
+" Test for uri_encode() and uri_decode() functions
+func Test_uriencoding()
+  let lines =<< trim END
+    #" uri encoding
+    call assert_equal('a1%20b2', uri_encode('a1 b2'))
+    call assert_equal('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-', uri_encode('-?&/#+=:[]@-'))
+    call assert_equal('%22%3C%3E%5E%60%7B%7C%7D', uri_encode('"<>^`{|}'))
+    call assert_equal('%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5', 'αβγδε'->uri_encode())
+    call assert_equal('r%C3%A9sum%C3%A9', uri_encode('résumé'))
+    call assert_equal('%E4%BD%A0%E5%A5%BD', uri_encode('你好'))
+    call assert_equal('%F0%9F%98%8A%F0%9F%98%8A', uri_encode('😊😊'))
+    call assert_equal('-_.~', uri_encode('-_.~'))
+    call assert_equal('', uri_encode(''))
+    call assert_equal('%2520%2523', uri_encode('%20%23'))
+    call assert_equal('', uri_encode(test_null_string()))
+    call assert_equal('a', uri_encode('a'))
+    call assert_equal('%20', uri_encode(' '))
+    call assert_equal('%CE%B1', uri_encode('α'))
+    call assert_equal('c%3A%5Cmy%5Cdir%5Ca%20b%20c', uri_encode('c:\my\dir\a b c'))
+    call assert_fails('call uri_encode([])', 'E1174: String required for argument 1')
+
+    #" uri decoding
+    call assert_equal('a1 b2', uri_decode('a1%20b2'))
+    call assert_equal('-?&/#+=:[]@-', uri_decode('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-'))
+    call assert_equal('"<>^`{|}', uri_decode('%22%3C%3E%5E%60%7B%7C%7D'))
+    call assert_equal('αβγδε', '%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5'->uri_decode())
+    call assert_equal('résumé', uri_decode('r%C3%A9sum%C3%A9'))
+    call assert_equal('你好', uri_decode('%E4%BD%A0%E5%A5%BD'))
+    call assert_equal('😊😊', uri_decode('%F0%9F%98%8A%F0%9F%98%8A'))
+    call assert_equal('a+b', uri_decode('a+b'))
+    call assert_equal('-_.~', uri_decode('-_.~'))
+    call assert_equal('', uri_decode(''))
+    call assert_equal('%20%23', uri_decode('%2520%2523'))
+    call assert_equal('', uri_decode(test_null_string()))
+    call assert_equal('a', uri_decode('a'))
+    call assert_equal(' ', uri_decode('%20'))
+    call assert_equal('α', uri_decode('%CE%B1'))
+    call assert_equal('c:\my\dir\a b c', uri_decode('c%3A%5Cmy%5Cdir%5Ca%20b%20c'))
+    call assert_equal('%', uri_decode('%'))
+    call assert_equal('%3', uri_decode('%3'))
+    call assert_equal(';', uri_decode('%3b'))
+    call assert_equal('a%xyb', uri_decode('a%xyb'))
+    call assert_fails('call uri_decode([])', 'E1174: String required for argument 1')
+
+    #" control characters
+    VAR cstr = "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10"
+    LET cstr ..= "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
+    VAR expected = ''
+    for i in range(1, 31)
+      LET expected ..= printf("%%%02X", i)
+    endfor
+    call assert_equal(expected, uri_encode(cstr))
+    call assert_equal(cstr, uri_decode(expected))
+  END
+  call v9.CheckLegacyAndVim9Success(lines)
+endfunc
+
  " vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c

index aa3142c5bff51b2bc7c91062a48668361708da94..6fe0936c710195641fc7440e3289d307d7455c06 100644 (file)
--- a/src/version.c
+++ b/src/version.c
@@ -724,6 +724,8 @@ static char *(features[]) =
  
  static int included_patches[] =
  {   /* Add new patch number below this line */
+/**/
+    1669,
  /**/
      1668,
  /**/
author	Yegappan Lakshmanan <yegappan@yahoo.com>
	Sat, 23 Aug 2025 10:26:16 +0000 (06:26 -0400)
committer	Christian Brabandt <cb@256bit.org>
	Sat, 23 Aug 2025 10:26:16 +0000 (06:26 -0400)
runtime/doc/builtin.txt		patch \| blob \| blame \| history
runtime/doc/tags		patch \| blob \| blame \| history
runtime/doc/usr_41.txt		patch \| blob \| blame \| history
runtime/doc/version9.txt		patch \| blob \| blame \| history
src/evalfunc.c		patch \| blob \| blame \| history
src/proto/strings.pro		patch \| blob \| blame \| history
src/strings.c		patch \| blob \| blame \| history
src/testdir/test_functions.vim		patch \| blob \| blame \| history
src/version.c		patch \| blob \| blame \| history