2 * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 #ifndef SQUID_SRC_ANYP_URI_H
10 #define SQUID_SRC_ANYP_URI_H
12 #include "anyp/forward.h"
13 #include "anyp/UriScheme.h"
14 #include "ip/Address.h"
16 #include "sbuf/SBuf.h"
20 class HttpRequestMethod
;
26 * Represents a Uniform Resource Identifier.
27 * Can store both URL or URN representations.
29 * Governed by RFC 3986
36 Uri(): hostIsNumeric_(false) { *host_
= 0; }
37 Uri(AnyP::UriScheme
const &aScheme
);
38 Uri(const Uri
&) = default;
39 Uri(Uri
&&) = default;
40 Uri
&operator =(const Uri
&) = default;
41 Uri
&operator =(Uri
&&) = default;
44 scheme_
=AnyP::PROTO_NONE
;
45 hostIsNumeric_
= false;
51 void touch(); ///< clear the cached URI display forms
53 bool parse(const HttpRequestMethod
&, const SBuf
&url
);
55 /// \return a new URI that honors uri_whitespace
56 static char *cleanup(const char *uri
);
58 AnyP::UriScheme
const & getScheme() const {return scheme_
;}
60 /// convert the URL scheme to that given
61 void setScheme(const AnyP::ProtocolType
&p
, const char *str
) {
62 scheme_
= AnyP::UriScheme(p
, str
);
65 void setScheme(const AnyP::UriScheme
&s
) {
70 void userInfo(const SBuf
&s
) {userInfo_
=s
; touch();}
71 /// \returns raw userinfo subcomponent (or an empty string)
72 /// the caller is responsible for caller-specific encoding
73 const SBuf
&userInfo() const {return userInfo_
;}
75 void host(const char *src
);
76 const char *host(void) const {return host_
;}
77 int hostIsNumeric(void) const {return hostIsNumeric_
;}
78 Ip::Address
const & hostIP(void) const {return hostAddr_
;}
80 /// Successfully interpreted non-empty host subcomponent of the authority
81 /// component (if any). XXX: Remove hostOrIp() and print Host instead.
82 std::optional
<Host
> parsedHost() const;
84 /// \returns the host subcomponent of the authority component
85 /// If the host is an IPv6 address, returns that IP address with
86 /// [brackets]. See RFC 3986 Section 3.2.2.
87 SBuf
hostOrIp() const;
89 /// reset authority port subcomponent
90 void port(const Port p
) { port_
= p
; touch(); }
92 Port
port() const { return port_
; }
93 /// reset the port to the default port number for the current scheme
94 void defaultPort() { port(getScheme().defaultPort()); }
96 void path(const char *p
) {path_
=p
; touch();}
97 void path(const SBuf
&p
) {path_
=p
; touch();}
98 const SBuf
&path() const;
101 * Merge a relative-path URL into the existing URI details.
102 * Implements RFC 3986 section 5.2.3
104 * The caller must ensure relUrl is a valid relative-path.
106 * NP: absolute-path are also accepted, but path() method
107 * should be used instead when possible.
109 void addRelativePath(const char *relUrl
);
111 /// the static '/' default URL-path
112 static const SBuf
&SlashPath();
114 /// the static '*' pseudo-URI
115 static const SBuf
&Asterisk();
117 /// %-encode characters in a buffer which do not conform to
118 /// the provided set of expected characters.
119 static SBuf
Encode(const SBuf
&, const CharacterSet
&expected
);
121 /// %-decode the given buffer
122 static SBuf
Decode(const SBuf
&);
125 * The authority-form URI for currently stored values.
127 * As defined by RFC 7230 section 5.3.3 this form omits the
128 * userinfo@ field from RFC 3986 defined authority segment.
130 * \param requirePort when true the port will be included, otherwise
131 * port will be elided when it is the default for
132 * the current scheme.
134 SBuf
&authority(bool requirePort
= false) const;
137 * The absolute-form URI for currently stored values.
139 * As defined by RFC 7230 section 5.3.3 this form omits the
140 * userinfo@ field from RFC 3986 defined authority segments
141 * when the protocol scheme is http: or https:.
143 SBuf
&absolute() const;
146 void parseUrn(Parser::Tokenizer
&);
148 SBuf
parseHost(Parser::Tokenizer
&) const;
149 int parsePort(Parser::Tokenizer
&) const;
153 * The scheme of this URL. This has the 'type code' smell about it.
154 * In future we may want to make the methods that dispatch based on
155 * the scheme virtual and have a class per protocol.
157 * On the other hand, having Protocol as an explicit concept is useful,
158 * see for instance the ACLProtocol acl type. One way to represent this
159 * is to have one prototype URL with no host etc for each scheme,
160 * another is to have an explicit scheme class, and then each URL class
161 * could be a subclass of the scheme. Another way is one instance of
162 * a AnyP::UriScheme class instance for each URL scheme we support, and one
163 * class for each manner of treating the scheme : a Hierarchical URL, a
164 * non-hierarchical URL etc.
166 * Deferring the decision, its a type code for now. RBC 20060507.
168 * In order to make taking any of these routes easy, scheme is private,
169 * only settable at construction time, or with explicit setter
171 AnyP::UriScheme scheme_
;
173 SBuf userInfo_
; // aka 'URL-login'
175 // XXX: uses char[] instead of SBUf to reduce performance regressions
176 // from c_str() since most code using this is not yet using SBuf
177 char host_
[SQUIDHOSTNAMELEN
]; ///< string representation of the URI authority name or IP
178 bool hostIsNumeric_
; ///< whether the authority 'host' is a raw-IP
179 Ip::Address hostAddr_
; ///< binary representation of the URI authority if it is a raw-IP
181 Port port_
; ///< authority port subcomponent
183 // XXX: for now includes query-string.
184 SBuf path_
; ///< URI path segment
186 // pre-assembled URI forms
187 mutable SBuf authorityHttp_
; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
188 mutable SBuf authorityWithPort_
; ///< RFC 7230 section 5.3.3 authority with explicit port
189 mutable SBuf absolute_
; ///< RFC 7230 section 5.3.2 absolute-URI
192 inline std::ostream
&
193 operator <<(std::ostream
&os
, const Uri
&url
)
195 // none means explicit empty string for scheme.
196 if (url
.getScheme() != PROTO_NONE
)
197 os
<< url
.getScheme().image();
200 // no authority section on URN
201 if (url
.getScheme() != PROTO_URN
)
202 os
<< "//" << url
.authority();
204 // path is what it is - including absent
211 /* Deprecated functions for Legacy code handling URLs */
215 void urlInitialize(void);
216 /// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
217 /// \returns a pointer to a local static buffer containing request URI
218 /// that honors strip_query_terms and %-encodes unsafe URI characters
219 char *urlCanonicalCleanWithoutRequest(const SBuf
&url
, const HttpRequestMethod
&, const AnyP::UriScheme
&);
220 const char *urlCanonicalFakeHttps(const HttpRequest
* request
);
221 bool urlIsRelative(const char *);
222 char *urlRInternal(const char *host
, unsigned short port
, const char *dir
, const char *name
);
223 char *urlInternal(const char *dir
, const char *name
);
224 bool urlAppendDomain(char *host
); ///< apply append_domain config to the given hostname
226 enum MatchDomainNameFlags
{
228 mdnHonorWildcards
= 1 << 0,
229 mdnRejectSubsubDomains
= 1 << 1
233 * matchDomainName() matches a hostname (usually extracted from traffic)
234 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
235 * according to the following rules:
237 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
238 * -------------|-------------|-----------|-----------------------
239 * foo.com | foo.com | YES | YES
240 * .foo.com | foo.com | YES | YES
241 * x.foo.com | foo.com | NO | NO
242 * foo.com | .foo.com | YES | YES
243 * .foo.com | .foo.com | YES | YES
244 * x.foo.com | .foo.com | YES | YES
245 * .x.foo.com | .foo.com | YES | NO
246 * y.x.foo.com | .foo.com | YES | NO
248 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
249 * optional wildcards on hostname:
251 * HOST | DOMAIN | MATCH?
252 * -------------|--------------|-------
253 * *.foo.com | x.foo.com | YES
254 * *.foo.com | .x.foo.com | YES
255 * *.foo.com | .foo.com | YES
256 * *.foo.com | foo.com | NO
258 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
261 * \retval 0 means the host matches the domain
262 * \retval 1 means the host is greater than the domain
263 * \retval -1 means the host is less than the domain
265 int matchDomainName(const char *host
, const char *domain
, MatchDomainNameFlags flags
= mdnNone
);
266 bool urlCheckRequest(const HttpRequest
*);
267 void urlExtMethodConfigure(void);
269 #endif /* SQUID_SRC_ANYP_URI_H */