_word_re = re.compile(r"\w+")
_word_beginning_split_re = re.compile(r"([-\s({\[<]+)")
+_uri_scheme_re = re.compile(r"^([\w\.\+-]{2,}:(/){0,2})$")
def contextfilter(f):
@evalcontextfilter
def do_urlize(
- eval_ctx, value, trim_url_limit=None, nofollow=False, target=None, rel=None
+ eval_ctx,
+ value,
+ trim_url_limit=None,
+ nofollow=False,
+ target=None,
+ rel=None,
+ extra_uri_schemes=None,
):
"""Converts URLs in plain text into clickable links.
{{ mytext|urlize(40, target='_blank') }}
+ If *extra_uri_schemes* are added then links will be generated for those
+ in addition to http(s): and mailto: schemes.
+
+ .. sourcecode:: jinja
+
+ {{ mytext|urlize(extra_uri_schemes=['tel:', 'ftp://']) }}
+ links are generated for tel and ftp.
+
.. versionchanged:: 2.8
The ``target`` parameter was added.
+
+ .. versionchanged:: 3.0
+ The ``extra_uri_schemes`` parameter was added.
"""
policies = eval_ctx.environment.policies
+
rel = set((rel or "").split() or [])
if nofollow:
rel.add("nofollow")
rel.update((policies["urlize.rel"] or "").split())
+ rel = " ".join(sorted(rel)) or None
+
if target is None:
target = policies["urlize.target"]
- rel = " ".join(sorted(rel)) or None
- rv = urlize(value, trim_url_limit, rel=rel, target=target)
+
+ if extra_uri_schemes is None:
+ extra_uri_schemes = policies["urlize.extra_uri_schemes"] or []
+ for uri_scheme in extra_uri_schemes:
+ if _uri_scheme_re.fullmatch(uri_scheme) is None:
+ raise FilterArgumentError(f"{uri_scheme} is not a valid URI scheme prefix.")
+
+ rv = urlize(
+ value,
+ trim_url_limit,
+ rel=rel,
+ target=target,
+ extra_uri_schemes=extra_uri_schemes,
+ )
if eval_ctx.autoescape:
rv = Markup(rv)
return rv
_punctuation_re = re.compile(
fr"^(?P<lead>(?:{_lead_pattern})*)(?P<middle>.*?)(?P<trail>(?:{_trail_pattern})*)$"
)
+_simple_http_https_re = re.compile(
+ r"^((https?://|www\.)(([\w%-]+\.)+)?([a-z]{2,63}|xn--[\w%]{2,59})|"
+ r"([\w%-]{2,63}\.)+(com|net|int|edu|gov|org|info|mil)|"
+ r"(https?://)((([\d]{1,3})(\.[\d]{1,3}){3})|"
+ r"(\[([\da-f]{0,4}:){2}([\da-f]{0,4}:?){1,6}\])))"
+ r"(?::[\d]{1,5})?(?:[/?#]\S*)?$",
+ re.IGNORECASE,
+)
_simple_email_re = re.compile(r"^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$")
_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
_entity_re = re.compile(r"&([^;]+);")
return pformat(obj)
-def urlize(text, trim_url_limit=None, rel=None, target=None):
+def urlize(text, trim_url_limit=None, rel=None, target=None, extra_uri_schemes=None):
"""Converts any URLs in text into clickable links. Works on http://,
- https:// and www. links. Links can have trailing punctuation (periods,
- commas, close-parens) and leading punctuation (opening parens) and
- it'll still do the right thing.
+ https://, www., mailto:, and email links. Links can have trailing
+ punctuation (periods, commas, close-parens) and leading punctuation
+ (opening parens) and it'll still do the right thing.
If trim_url_limit is not None, the URLs in link text will be limited
to trim_url_limit characters.
attribute.
If target is not None, a target attribute will be added to the link.
+
+ Known Limitations:
+ - Will not urlize emails or mailto: links if they include header fields
+ (for example, mailto:address@example.com?cc=copy@example.com).
+
+ .. versionchanged:: 3.0
+ Adds limited support for mailto: links
"""
def trim_url(x, limit=trim_url_limit):
match = _punctuation_re.match(word)
if match:
lead, middle, trail = match.groups()
- if middle.startswith("www.") or (
- "@" not in middle
- and not middle.startswith("http://")
- and not middle.startswith("https://")
- and len(middle) > 0
- and middle[0] in _letters + _digits
- and (
- middle.endswith(".org")
- or middle.endswith(".net")
- or middle.endswith(".com")
- )
- ):
- middle = (
- f'<a href="http://{middle}"{rel_attr}{target_attr}>'
- f"{trim_url(middle)}</a>"
- )
- if middle.startswith("http://") or middle.startswith("https://"):
- middle = (
- f'<a href="{middle}"{rel_attr}{target_attr}>{trim_url(middle)}</a>'
- )
+ # fix for mismatched opening and closing parentheses
+ pairs = [("(", ")"), ("<", ">"), ("<", ">")]
+ for start_char in re.findall(_lead_pattern, middle):
+ end_char = next(c for o, c in pairs if o == start_char)
+ while (
+ middle.count(start_char) > middle.count(end_char)
+ and end_char in trail
+ ):
+ end_char_index = trail.index(end_char)
+ middle = middle + trail[: end_char_index + len(end_char)]
+ trail = trail[end_char_index + len(end_char) :]
+
+ if _simple_http_https_re.match(middle):
+ if middle.startswith("https://") or middle.startswith("http://"):
+ middle = (
+ f'<a href="{middle}"{rel_attr}{target_attr}>'
+ f"{trim_url(middle)}</a>"
+ )
+ else:
+ middle = (
+ f'<a href="https://{middle}"{rel_attr}{target_attr}>'
+ f"{trim_url(middle)}</a>"
+ )
+
if (
"@" in middle
and not middle.startswith("www.")
and _simple_email_re.match(middle)
):
middle = f'<a href="mailto:{middle}">{middle}</a>'
+ if middle.startswith("mailto:") and _simple_email_re.match(middle[7:]):
+ middle = f'<a href="{middle}">{middle[7:]}</a>'
+
+ if extra_uri_schemes is not None:
+ schemes = {x for x in extra_uri_schemes if middle.startswith(x)}
+ for uri_scheme in schemes:
+ if len(middle) > len(uri_scheme):
+ middle = (
+ f'<a href="{middle}"{rel_attr}{target_attr}>'
+ f"{middle}</a>"
+ )
+
if lead + middle + trail != word:
words[i] = lead + middle + trail
+
return "".join(words)
assert tmpl.render() == "FOO"
def test_urlize(self, env):
+ tmpl = env.from_string('{{ "foo example.org bar"|urlize }}')
+ assert tmpl.render() == (
+ 'foo <a href="https://example.org" rel="noopener">' "example.org</a> bar"
+ )
tmpl = env.from_string('{{ "foo http://www.example.com/ bar"|urlize }}')
assert tmpl.render() == (
'foo <a href="http://www.example.com/" rel="noopener">'
"http://www.example.com/</a> bar"
)
+ tmpl = env.from_string('{{ "foo mailto:email@example.com bar"|urlize }}')
+ assert tmpl.render() == (
+ 'foo <a href="mailto:email@example.com">email@example.com</a> bar'
+ )
+ tmpl = env.from_string('{{ "foo email@example.com bar"|urlize }}')
+ assert tmpl.render() == (
+ 'foo <a href="mailto:email@example.com">email@example.com</a> bar'
+ )
def test_urlize_rel_policy(self):
env = Environment()
"http://www.example.com/</a> bar"
)
+ def test_urlize_extra_uri_schemes_parameter(self, env):
+ tmpl = env.from_string(
+ '{{ "foo tel:+1-514-555-1234 ftp://localhost bar"|'
+ 'urlize(extra_uri_schemes=["tel:", "ftp:"]) }}'
+ )
+ assert tmpl.render() == (
+ 'foo <a href="tel:+1-514-555-1234" rel="noopener">'
+ 'tel:+1-514-555-1234</a> <a href="ftp://localhost" rel="noopener">'
+ "ftp://localhost</a> bar"
+ )
+
def test_wordcount(self, env):
tmpl = env.from_string('{{ "foo bar baz"|wordcount }}')
assert tmpl.render() == "3"