]> git.ipfire.org Git - thirdparty/vim.git/commitdiff
patch 9.1.1669: Vim script: no support for URI de-/encoding v9.1.1669
authorYegappan Lakshmanan <yegappan@yahoo.com>
Sat, 23 Aug 2025 10:26:16 +0000 (06:26 -0400)
committerChristian Brabandt <cb@256bit.org>
Sat, 23 Aug 2025 10:26:16 +0000 (06:26 -0400)
Problem:  Vim script: no support for URI de-/encoding
          (ubaldot)
Solution: Add the uri_encode() and uri_decode() functions
          (Yegappan Lakshmanan)

fixes: #17861
closes: #18034

Signed-off-by: Yegappan Lakshmanan <yegappan@yahoo.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
runtime/doc/builtin.txt
runtime/doc/tags
runtime/doc/usr_41.txt
runtime/doc/version9.txt
src/evalfunc.c
src/proto/strings.pro
src/strings.c
src/testdir/test_functions.vim
src/version.c

index 9ab97ecab415fd36927394ff410d336a9c35bb77..32cb40f1b77e7b4ddf1adc6da64a4a79f36f7faf 100644 (file)
@@ -747,6 +747,8 @@ undofile({name})            String  undo file name for {name}
 undotree([{buf}])              List    undo file tree for buffer {buf}
 uniq({list} [, {func} [, {dict}]])
                                List    remove adjacent duplicates from a list
+uri_decode({string})           String  URI-decode a string
+uri_encode({string})           String  URI-encode a string
 utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
                                Number  UTF-16 index of byte {idx} in {string}
 values({dict})                 List    values in {dict}
@@ -12187,6 +12189,59 @@ uniq({list} [, {func} [, {dict}]])                     *uniq()* *E882*
                Return type: list<{type}>
 
 
+uri_decode({string})                                   *uri_decode()*
+               Returns the URI-decoded form of {string}, reversing
+               percent-encoding (converting sequences like "%3D" back to
+               the corresponding character).
+
+               The decoding follows standard percent-decoding rules:
+                   - "%HH" is replaced with the character for the hex value
+                     HH.
+                   - If the decoded bytes form valid UTF-8, they are combined
+                     into the corresponding character(s).  Otherwise, the
+                     bytes are kept as-is.
+                   - Invalid or incomplete encodings (e.g. "%GZ", "%3", or a
+                     trailing "%") are left unchanged.
+
+               Returns an empty String if {string} is empty.
+
+               Example: >
+                       :echo uri_decode('c%3A%5Cmy%5Cdir%5Cfoo%20bar')
+                       c:\my\dir\foo bar
+                       :echo uri_decode('%CE%B1%CE%B2%CE%B3')
+                       αβγ
+<
+               Can also be used as a |method|: >
+                       mystr->uri_decode()
+<
+               Return type: |String|
+
+uri_encode({string})                                   *uri_encode()*
+               Returns the URI-encoded form of {string}.  URI encoding
+               replaces unsafe or reserved characters with percent-encoded
+               sequences.
+
+               The encoding follows standard percent-encoding rules:
+                    - Alphanumeric characters [0-9A-Za-z] remain unchanged.
+                    - The characters "-", "_", ".", and "~" also remain
+                      unchanged.
+                    - All other characters are replaced with "%HH", where HH
+                      is the two-digit uppercase hexadecimal value.
+                    - Existing percent-encoded sequences are not modified.
+
+               Returns an empty String if {string} is empty.
+
+               Example: >
+                       :echo uri_encode('c:\my\dir\foo bar')
+                       c%3A%5Cmy%5Cdir%5Cfoo%20bar
+                       :echo uri_encode('key=value&name=αβγ')
+                       key%3Dvalue%26name%3D%CE%B1%CE%B2%CE%B3
+<
+               Can also be used as a |method|: >
+                       mystr->uri_encode()
+<
+               Return type: |String|
+
                                                        *utf16idx()*
 utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
                Same as |charidx()| but returns the UTF-16 code unit index of
index d42cac486adaeb0ba6651ae0e027337ecb35fc85..2046d6f933c9e694d47a2c01c0dbe74dc152324c 100644 (file)
@@ -11101,6 +11101,8 @@ unix    os_unix.txt     /*unix*
 unlisted-buffer        windows.txt     /*unlisted-buffer*
 up-down-motions        motion.txt      /*up-down-motions*
 uppercase      change.txt      /*uppercase*
+uri_decode()   builtin.txt     /*uri_decode()*
+uri_encode()   builtin.txt     /*uri_encode()*
 urxvt-mouse    options.txt     /*urxvt-mouse*
 use-visual-cmds        version4.txt    /*use-visual-cmds*
 useful-mappings        tips.txt        /*useful-mappings*
index ce05623b521d36d9a5c426db8e15b99f796ac3ce..813211e7c889f5fb9517f17231ff8dcb1a3fb926 100644 (file)
@@ -807,6 +807,8 @@ String manipulation:                                        *string-functions*
        str2blob()              convert a list of strings into a blob
        blob2str()              convert a blob into a list of strings
        items()                 get List of String index-character pairs
+       uri_encode()            URI-encode a string
+       uri_decode()            URI-decode a string
 
 List manipulation:                                     *list-functions*
        get()                   get an item without error for wrong index
index 6ea5257590d10591e90e5475e76aa1570afabfb8..ce8832b259d687f15b06460128a377bfb471059c 100644 (file)
@@ -41788,6 +41788,8 @@ Functions: ~
 |str2blob()|           convert a List of strings into a blob
 |test_null_tuple()|    return a null tuple
 |tuple2list()|         turn a Tuple of items into a List
+|uri_decode()|         URI-decode a string
+|uri_encode()|         URI-encode a string
 |wildtrigger()|                trigger wildcard expansion
 
 
index 2dced8e3fc88d4e240ba8e757a9fb60824f857d5..91c7ca04e042abdc281bbe2517ddea6953484259 100644 (file)
@@ -3116,6 +3116,10 @@ static funcentry_T global_functions[] =
                        ret_dict_any,       f_undotree},
     {"uniq",           1, 3, FEARG_1,      arg13_sortuniq,
                        ret_first_arg,      f_uniq},
+    {"uri_decode",     1, 1, FEARG_1,      arg1_string,
+                       ret_string,         f_uridecode},
+    {"uri_encode",     1, 1, FEARG_1,      arg1_string,
+                       ret_string,         f_uriencode},
     {"utf16idx",       2, 4, FEARG_1,      arg4_string_number_bool_bool,
                        ret_number,         f_utf16idx},
     {"values",         1, 1, FEARG_1,      arg1_dict_any,
index 8e7dc180d3a0b28be2bc50a00d7d7cbb5b3a5fcb..aeb2e641e3da3ca760b4cfaa1b8576f5eaf54899 100644 (file)
@@ -52,4 +52,6 @@ void f_tolower(typval_T *argvars, typval_T *rettv);
 void f_toupper(typval_T *argvars, typval_T *rettv);
 void f_tr(typval_T *argvars, typval_T *rettv);
 void f_trim(typval_T *argvars, typval_T *rettv);
+void f_uridecode(typval_T *argvars, typval_T *rettv);
+void f_uriencode(typval_T *argvars, typval_T *rettv);
 /* vim: set ft=c : */
index 87868b90c4cc61c0617008f0a441d99c999d3017..fbaf5dc27a96cb6fad5716ec47f17f22a2138d31 100644 (file)
@@ -2310,6 +2310,151 @@ f_trim(typval_T *argvars, typval_T *rettv)
     rettv->vval.v_string = vim_strnsave(head, tail - head);
 }
 
+/*
+ * Decodes a URI-encoded string.
+ *
+ * Parameters:
+ *   str - The URI-encoded input string (may contain %XX sequences and '+').
+ *
+ * Returns:
+ *   A newly allocated string with URI encoding decoded:
+ *     - %XX sequences are converted to the corresponding character.
+ *     - If the input is malformed (e.g., incomplete % sequence), the original
+ *       characters are copied.
+ *   The output string will never be longer than the input string.
+ *   The caller is responsible for freeing the returned string.
+ *
+ * Returns NULL if input is NULL or memory allocation fails.
+ */
+    static char_u *
+uri_decode(char_u *str)
+{
+    if (str == NULL)
+       return NULL;
+
+    size_t len = STRLEN(str);
+
+    char_u *decoded = alloc(len + 1);
+    if (!decoded)
+       return NULL;
+
+    char_u     *p = decoded;
+    size_t     i = 0;
+
+    while (i < len)
+    {
+       if (str[i] == '%')
+       {
+           if (i + 2 >= len)
+           {
+               // Malformed encoding
+               *p++ = str[i++];
+               if (str[i] != NUL)
+                   *p++ = str[i++];
+           }
+           else
+           {
+               int val = hexhex2nr(&str[i + 1]);
+               if (val != -1)
+               {
+                   *p++ = (char_u)val;
+                   i += 3;
+               }
+               else
+               {
+                   // invalid hex digits following "%"
+                   for (int j = 0; j < 3; j++)
+                       *p++ = str[i++];
+               }
+           }
+
+       }
+       else
+           *p++ = str[i++];
+    }
+
+    *p = NUL;
+
+    return decoded;
+}
+
+/*
+ * "uri_decode({str})" function
+ */
+    void
+f_uridecode(typval_T *argvars, typval_T *rettv)
+{
+    rettv->v_type = VAR_STRING;
+    rettv->vval.v_string = NULL;
+
+    if (check_for_string_arg(argvars, 0) == FAIL)
+       return;
+
+    rettv->vval.v_string = uri_decode(tv_get_string(&argvars[0]));
+}
+
+/*
+ * Encodes a string for safe use in a URI.
+ *
+ * Parameters:
+ *   str - The input string to encode.
+ *
+ * Returns:
+ *   A newly allocated string where:
+ *     - Alphanumeric characters and '-', '_', '.', '~' are left unchanged.
+ *     - All other bytes are encoded as %XX (uppercase hex).
+ *   The caller is responsible for freeing the returned string.
+ *
+ *   Returns NULL if input is NULL or memory allocation fails.
+ */
+    static char_u *
+uri_encode(char_u *str)
+{
+    if (str == NULL)
+       return NULL;
+
+    size_t len = STRLEN(str);
+
+    // Worst case: every character needs encoding => 3x size + 1 for null
+    // terminator
+    char_u *encoded = alloc(len * 3 + 1);
+    if (encoded == NULL)
+       return NULL;
+
+    char_u *p = encoded;
+
+    for (size_t i = 0; i < len; ++i)
+    {
+       char_u c = str[i];
+       if (ASCII_ISALNUM(c) || c == '-' || c == '_' || c == '.' || c == '~')
+           *p++ = c;
+       else
+       {
+           sprintf((char *)p, "%%%02X", c);
+           p += 3;
+       }
+    }
+
+    *p = NUL;
+
+    return encoded;
+}
+
+/*
+ * "uri_encode({str})" function
+ */
+    void
+f_uriencode(typval_T *argvars, typval_T *rettv)
+{
+    rettv->v_type = VAR_STRING;
+    rettv->vval.v_string = NULL;
+
+    if (check_for_string_arg(argvars, 0) == FAIL)
+       return;
+
+    rettv->vval.v_string = uri_encode(tv_get_string(&argvars[0]));
+}
+
 static char *e_printf = N_(e_insufficient_arguments_for_printf);
 
 /*
index ca54d3729144e8b8d587a2fb15aa00871cf284fb..9dd96018102e300d4524c78c09d087b17d094def 100644 (file)
@@ -4503,4 +4503,61 @@ func Test_blob2str()
   call v9.CheckLegacyAndVim9Success(lines)
 endfunc
 
+" Test for uri_encode() and uri_decode() functions
+func Test_uriencoding()
+  let lines =<< trim END
+    #" uri encoding
+    call assert_equal('a1%20b2', uri_encode('a1 b2'))
+    call assert_equal('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-', uri_encode('-?&/#+=:[]@-'))
+    call assert_equal('%22%3C%3E%5E%60%7B%7C%7D', uri_encode('"<>^`{|}'))
+    call assert_equal('%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5', 'αβγδε'->uri_encode())
+    call assert_equal('r%C3%A9sum%C3%A9', uri_encode('résumé'))
+    call assert_equal('%E4%BD%A0%E5%A5%BD', uri_encode('你好'))
+    call assert_equal('%F0%9F%98%8A%F0%9F%98%8A', uri_encode('😊😊'))
+    call assert_equal('-_.~', uri_encode('-_.~'))
+    call assert_equal('', uri_encode(''))
+    call assert_equal('%2520%2523', uri_encode('%20%23'))
+    call assert_equal('', uri_encode(test_null_string()))
+    call assert_equal('a', uri_encode('a'))
+    call assert_equal('%20', uri_encode(' '))
+    call assert_equal('%CE%B1', uri_encode('α'))
+    call assert_equal('c%3A%5Cmy%5Cdir%5Ca%20b%20c', uri_encode('c:\my\dir\a b c'))
+    call assert_fails('call uri_encode([])', 'E1174: String required for argument 1')
+
+    #" uri decoding
+    call assert_equal('a1 b2', uri_decode('a1%20b2'))
+    call assert_equal('-?&/#+=:[]@-', uri_decode('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-'))
+    call assert_equal('"<>^`{|}', uri_decode('%22%3C%3E%5E%60%7B%7C%7D'))
+    call assert_equal('αβγδε', '%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5'->uri_decode())
+    call assert_equal('résumé', uri_decode('r%C3%A9sum%C3%A9'))
+    call assert_equal('你好', uri_decode('%E4%BD%A0%E5%A5%BD'))
+    call assert_equal('😊😊', uri_decode('%F0%9F%98%8A%F0%9F%98%8A'))
+    call assert_equal('a+b', uri_decode('a+b'))
+    call assert_equal('-_.~', uri_decode('-_.~'))
+    call assert_equal('', uri_decode(''))
+    call assert_equal('%20%23', uri_decode('%2520%2523'))
+    call assert_equal('', uri_decode(test_null_string()))
+    call assert_equal('a', uri_decode('a'))
+    call assert_equal(' ', uri_decode('%20'))
+    call assert_equal('α', uri_decode('%CE%B1'))
+    call assert_equal('c:\my\dir\a b c', uri_decode('c%3A%5Cmy%5Cdir%5Ca%20b%20c'))
+    call assert_equal('%', uri_decode('%'))
+    call assert_equal('%3', uri_decode('%3'))
+    call assert_equal(';', uri_decode('%3b'))
+    call assert_equal('a%xyb', uri_decode('a%xyb'))
+    call assert_fails('call uri_decode([])', 'E1174: String required for argument 1')
+
+    #" control characters
+    VAR cstr = "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10"
+    LET cstr ..= "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
+    VAR expected = ''
+    for i in range(1, 31)
+      LET expected ..= printf("%%%02X", i)
+    endfor
+    call assert_equal(expected, uri_encode(cstr))
+    call assert_equal(cstr, uri_decode(expected))
+  END
+  call v9.CheckLegacyAndVim9Success(lines)
+endfunc
+
 " vim: shiftwidth=2 sts=2 expandtab
index aa3142c5bff51b2bc7c91062a48668361708da94..6fe0936c710195641fc7440e3289d307d7455c06 100644 (file)
@@ -724,6 +724,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    1669,
 /**/
     1668,
 /**/