_word_re = re.compile(r"\w+")
_word_beginning_split_re = re.compile(r"([-\s({\[<]+)")
-_uri_scheme_re = re.compile(r"^([\w\.\+-]{2,}:(/){0,2})$")
def contextfilter(f):
return pformat(value)
+_uri_scheme_re = re.compile(r"^([\w.+-]{2,}:(/){0,2})$")
+
+
@evalcontextfilter
def do_urlize(
eval_ctx,
nofollow=False,
target=None,
rel=None,
- extra_uri_schemes=None,
+ extra_schemes=None,
):
- """Converts URLs in plain text into clickable links.
-
- If you pass the filter an additional integer it will shorten the urls
- to that number. Also a third argument exists that makes the urls
- "nofollow":
-
- .. sourcecode:: jinja
-
- {{ mytext|urlize(40, true) }}
- links are shortened to 40 chars and defined with rel="nofollow"
-
- If *target* is specified, the ``target`` attribute will be added to the
- ``<a>`` tag:
-
- .. sourcecode:: jinja
-
- {{ mytext|urlize(40, target='_blank') }}
+ """Convert URLs in text into clickable links.
+
+ This may not recognize links in some situations. Usually, a more
+ comprehensive formatter, such as a Markdown library, is a better
+ choice.
+
+ Works on ``http://``, ``https://``, ``www.``, ``mailto:``, and email
+ addresses. Links with trailing punctuation (periods, commas, closing
+ parentheses) and leading punctuation (opening parentheses) are
+ recognized excluding the punctuation. Email addresses that include
+ header fields are not recognized (for example,
+ ``mailto:address@example.com?cc=copy@example.com``).
+
+ :param value: Original text containing URLs to link.
+ :param trim_url_limit: Shorten displayed URL values to this length.
+ :param nofollow: Add the ``rel=nofollow`` attribute to links.
+ :param target: Add the ``target`` attribute to links.
+ :param rel: Add the ``rel`` attribute to links.
+ :param extra_schemes: Recognize URLs that start with these schemes
+ in addition to the default behavior. Defaults to
+ ``env.policies["urlize.extra_schemes"]``, which defaults to no
+ extra schemes.
- If *extra_uri_schemes* are added then links will be generated for those
- in addition to http(s): and mailto: schemes.
+ .. versionchanged:: 3.0
+ The ``extra_schemes`` parameter was added.
- .. sourcecode:: jinja
+ .. versionchanged:: 3.0
+ Generate ``https://`` links for URLs without a scheme.
- {{ mytext|urlize(extra_uri_schemes=['tel:', 'ftp://']) }}
- links are generated for tel and ftp.
+ .. versionchanged:: 3.0
+ The parsing rules were updated. Recognize email addresses with
+ or without the ``mailto:`` scheme. Validate IP addresses. Ignore
+ parentheses and brackets in more cases.
.. versionchanged:: 2.8
The ``target`` parameter was added.
-
- .. versionchanged:: 3.0
- The ``extra_uri_schemes`` parameter was added.
"""
policies = eval_ctx.environment.policies
+ rel_parts = set((rel or "").split())
- rel = set((rel or "").split() or [])
if nofollow:
- rel.add("nofollow")
- rel.update((policies["urlize.rel"] or "").split())
- rel = " ".join(sorted(rel)) or None
+ rel_parts.add("nofollow")
+
+ rel_parts.update((policies["urlize.rel"] or "").split())
+ rel = " ".join(sorted(rel_parts)) or None
if target is None:
target = policies["urlize.target"]
- if extra_uri_schemes is None:
- extra_uri_schemes = policies["urlize.extra_uri_schemes"] or []
- for uri_scheme in extra_uri_schemes:
- if _uri_scheme_re.fullmatch(uri_scheme) is None:
- raise FilterArgumentError(f"{uri_scheme} is not a valid URI scheme prefix.")
+ if extra_schemes is None:
+ extra_schemes = policies["urlize.extra_schemes"] or ()
+
+ for scheme in extra_schemes:
+ if _uri_scheme_re.fullmatch(scheme) is None:
+ raise FilterArgumentError(f"{scheme!r} is not a valid URI scheme prefix.")
rv = urlize(
value,
- trim_url_limit,
+ trim_url_limit=trim_url_limit,
rel=rel,
target=target,
- extra_uri_schemes=extra_uri_schemes,
+ extra_schemes=extra_schemes,
)
+
if eval_ctx.autoescape:
rv = Markup(rv)
+
return rv
from markupsafe import escape
from markupsafe import Markup
-_word_split_re = re.compile(r"(\s+)")
-_lead_pattern = "|".join(map(re.escape, ("(", "<", "<")))
-_trail_pattern = "|".join(map(re.escape, (".", ",", ")", ">", "\n", ">")))
-_punctuation_re = re.compile(
- fr"^(?P<lead>(?:{_lead_pattern})*)(?P<middle>.*?)(?P<trail>(?:{_trail_pattern})*)$"
-)
-_simple_http_https_re = re.compile(
- r"^((https?://|www\.)(([\w%-]+\.)+)?([a-z]{2,63}|xn--[\w%]{2,59})|"
- r"([\w%-]{2,63}\.)+(com|net|int|edu|gov|org|info|mil)|"
- r"(https?://)((([\d]{1,3})(\.[\d]{1,3}){3})|"
- r"(\[([\da-f]{0,4}:){2}([\da-f]{0,4}:?){1,6}\])))"
- r"(?::[\d]{1,5})?(?:[/?#]\S*)?$",
- re.IGNORECASE,
-)
-_simple_email_re = re.compile(r"^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$")
_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
_entity_re = re.compile(r"&([^;]+);")
_letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
return pformat(obj)
-def urlize(text, trim_url_limit=None, rel=None, target=None, extra_uri_schemes=None):
- """Converts any URLs in text into clickable links. Works on http://,
- https://, www., mailto:, and email links. Links can have trailing
- punctuation (periods, commas, close-parens) and leading punctuation
- (opening parens) and it'll still do the right thing.
+_word_split_re = re.compile(r"(\s+)")
+_lead_pattern = "|".join(map(re.escape, ("(", "<", "<")))
+_trail_pattern = "|".join(map(re.escape, (".", ",", ")", ">", "\n", ">")))
+_punctuation_re = re.compile(
+ fr"^(?P<lead>(?:{_lead_pattern})*)(?P<middle>.*?)(?P<trail>(?:{_trail_pattern})*)$"
+)
+_http_re = re.compile(
+ r"""
+ ^
+ (
+ (https?://|www\.) # scheme or www
+ (([\w%-]+\.)+)? # subdomain
+ (
+ [a-z]{2,63} # basic tld
+ |
+ xn--[\w%]{2,59} # idna tld
+ )
+ |
+ ([\w%-]{2,63}\.)+ # basic domain
+ (com|net|int|edu|gov|org|info|mil) # basic tld
+ |
+ (https?://) # scheme
+ (
+ (([\d]{1,3})(\.[\d]{1,3}){3}) # IPv4
+ |
+ (\[([\da-f]{0,4}:){2}([\da-f]{0,4}:?){1,6}]) # IPv6
+ )
+ )
+ (?::[\d]{1,5})? # port
+ (?:[/?#]\S*)? # path, query, and fragment
+ $
+ """,
+ re.IGNORECASE | re.VERBOSE,
+)
+_simple_email_re = re.compile(r"^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$")
+
- If trim_url_limit is not None, the URLs in link text will be limited
- to trim_url_limit characters.
+def urlize(text, trim_url_limit=None, rel=None, target=None, extra_schemes=None):
+ """Convert URLs in text into clickable links.
- If nofollow is True, the URLs in link text will get a rel="nofollow"
- attribute.
+ This may not recognize links in some situations. Usually, a more
+ comprehensive formatter, such as a Markdown library, is a better
+ choice.
- If target is not None, a target attribute will be added to the link.
+ Works on ``http://``, ``https://``, ``www.``, ``mailto:``, and email
+ addresses. Links with trailing punctuation (periods, commas, closing
+ parentheses) and leading punctuation (opening parentheses) are
+ recognized excluding the punctuation. Email addresses that include
+ header fields are not recognized (for example,
+ ``mailto:address@example.com?cc=copy@example.com``).
- Known Limitations:
- - Will not urlize emails or mailto: links if they include header fields
- (for example, mailto:address@example.com?cc=copy@example.com).
+ :param text: Original text containing URLs to link.
+ :param trim_url_limit: Shorten displayed URL values to this length.
+ :param target: Add the ``target`` attribute to links.
+ :param rel: Add the ``rel`` attribute to links.
+ :param extra_schemes: Recognize URLs that start with these schemes
+ in addition to the default behavior.
.. versionchanged:: 3.0
- Adds limited support for mailto: links
+ The ``extra_schemes`` parameter was added.
+
+ .. versionchanged:: 3.0
+ Generate ``https://`` links for URLs without a scheme.
+
+ .. versionchanged:: 3.0
+ The parsing rules were updated. Recognize email addresses with
+ or without the ``mailto:`` scheme. Validate IP addresses. Ignore
+ parentheses and brackets in more cases.
"""
def trim_url(x, limit=trim_url_limit):
for i, word in enumerate(words):
match = _punctuation_re.match(word)
+
if match:
lead, middle, trail = match.groups()
# fix for mismatched opening and closing parentheses
pairs = [("(", ")"), ("<", ">"), ("<", ">")]
+
for start_char in re.findall(_lead_pattern, middle):
end_char = next(c for o, c in pairs if o == start_char)
+
while (
middle.count(start_char) > middle.count(end_char)
and end_char in trail
middle = middle + trail[: end_char_index + len(end_char)]
trail = trail[end_char_index + len(end_char) :]
- if _simple_http_https_re.match(middle):
+ if _http_re.match(middle):
if middle.startswith("https://") or middle.startswith("http://"):
middle = (
f'<a href="{middle}"{rel_attr}{target_attr}>'
and _simple_email_re.match(middle)
):
middle = f'<a href="mailto:{middle}">{middle}</a>'
+
if middle.startswith("mailto:") and _simple_email_re.match(middle[7:]):
middle = f'<a href="{middle}">{middle[7:]}</a>'
- if extra_uri_schemes is not None:
- schemes = {x for x in extra_uri_schemes if middle.startswith(x)}
+ if extra_schemes is not None:
+ schemes = {x for x in extra_schemes if middle.startswith(x)}
+
for uri_scheme in schemes:
if len(middle) > len(uri_scheme):
middle = (