Replace %w with explicit A-Za-z0-9 ranges in URL encoding functions.
The %w pattern is locale-dependent and incorrectly matches high bytes
(0xE4, 0xE5, 0xE6) as word characters in UTF-8 locales like en_GB.UTF-8,
breaking URL encoding of non-ASCII characters.
if str == nil then
return ''
end
- str = string.gsub(str, "([^%w _%%%-%.~])",
+ -- Use explicit ASCII ranges instead of %w which is locale-dependent
+ -- and may match non-ASCII bytes in UTF-8 locales
+ str = string.gsub(str, "([^A-Za-z0-9 _%%%-%.~])",
function(c)
return string.format("%%%02X", string.byte(c))
end)
-- Encode space and other problematic characters that are common in redirect URLs
-- We're conservative - only encode what http_parser_parse_url actually rejects
-- Don't encode already-encoded sequences (%XX)
- local encoded = url_str:gsub("([^%w%-%._~:/?#%[%]@!$&'()*+,;=%%])", function(c)
+ -- Use explicit ASCII ranges instead of %w which is locale-dependent
+ local encoded = url_str:gsub("([^A-Za-z0-9%-%._~:/?#%[%]@!$&'()*+,;=%%])", function(c)
-- Don't double-encode already encoded characters
if c == '%' then
return c