2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 #ifndef SQUID_SRC_ANYP_URI_H
10 #define SQUID_SRC_ANYP_URI_H
12 #include "anyp/UriScheme.h"
13 #include "ip/Address.h"
15 #include "sbuf/SBuf.h"
19 class HttpRequestMethod
;
25 * Represents a Uniform Resource Identifier.
26 * Can store both URL or URN representations.
28 * Governed by RFC 3986
35 Uri(): hostIsNumeric_(false) { *host_
= 0; }
36 Uri(AnyP::UriScheme
const &aScheme
);
37 Uri(const Uri
&) = default;
38 Uri(Uri
&&) = default;
39 Uri
&operator =(const Uri
&) = default;
40 Uri
&operator =(Uri
&&) = default;
43 scheme_
=AnyP::PROTO_NONE
;
44 hostIsNumeric_
= false;
50 void touch(); ///< clear the cached URI display forms
52 bool parse(const HttpRequestMethod
&, const SBuf
&url
);
54 /// \return a new URI that honors uri_whitespace
55 static char *cleanup(const char *uri
);
57 AnyP::UriScheme
const & getScheme() const {return scheme_
;}
59 /// convert the URL scheme to that given
60 void setScheme(const AnyP::ProtocolType
&p
, const char *str
) {
61 scheme_
= AnyP::UriScheme(p
, str
);
64 void setScheme(const AnyP::UriScheme
&s
) {
69 void userInfo(const SBuf
&s
) {userInfo_
=s
; touch();}
70 /// \returns raw userinfo subcomponent (or an empty string)
71 /// the caller is responsible for caller-specific encoding
72 const SBuf
&userInfo() const {return userInfo_
;}
74 void host(const char *src
);
75 const char *host(void) const {return host_
;}
76 int hostIsNumeric(void) const {return hostIsNumeric_
;}
77 Ip::Address
const & hostIP(void) const {return hostAddr_
;}
79 /// \returns the host subcomponent of the authority component
80 /// If the host is an IPv6 address, returns that IP address with
81 /// [brackets]. See RFC 3986 Section 3.2.2.
82 SBuf
hostOrIp() const;
84 /// reset authority port subcomponent
85 void port(const Port p
) { port_
= p
; touch(); }
87 Port
port() const { return port_
; }
88 /// reset the port to the default port number for the current scheme
89 void defaultPort() { port(getScheme().defaultPort()); }
91 void path(const char *p
) {path_
=p
; touch();}
92 void path(const SBuf
&p
) {path_
=p
; touch();}
93 const SBuf
&path() const;
96 * Merge a relative-path URL into the existing URI details.
97 * Implements RFC 3986 section 5.2.3
99 * The caller must ensure relUrl is a valid relative-path.
101 * NP: absolute-path are also accepted, but path() method
102 * should be used instead when possible.
104 void addRelativePath(const char *relUrl
);
106 /// the static '/' default URL-path
107 static const SBuf
&SlashPath();
109 /// the static '*' pseudo-URI
110 static const SBuf
&Asterisk();
112 /// %-encode characters in a buffer which do not conform to
113 /// the provided set of expected characters.
114 static SBuf
Encode(const SBuf
&, const CharacterSet
&expected
);
116 /// %-decode the given buffer
117 static SBuf
Decode(const SBuf
&);
120 * The authority-form URI for currently stored values.
122 * As defined by RFC 7230 section 5.3.3 this form omits the
123 * userinfo@ field from RFC 3986 defined authority segment.
125 * \param requirePort when true the port will be included, otherwise
126 * port will be elided when it is the default for
127 * the current scheme.
129 SBuf
&authority(bool requirePort
= false) const;
132 * The absolute-form URI for currently stored values.
134 * As defined by RFC 7230 section 5.3.3 this form omits the
135 * userinfo@ field from RFC 3986 defined authority segments
136 * when the protocol scheme is http: or https:.
138 SBuf
&absolute() const;
141 void parseUrn(Parser::Tokenizer
&);
143 SBuf
parseHost(Parser::Tokenizer
&) const;
144 int parsePort(Parser::Tokenizer
&) const;
148 * The scheme of this URL. This has the 'type code' smell about it.
149 * In future we may want to make the methods that dispatch based on
150 * the scheme virtual and have a class per protocol.
152 * On the other hand, having Protocol as an explicit concept is useful,
153 * see for instance the ACLProtocol acl type. One way to represent this
154 * is to have one prototype URL with no host etc for each scheme,
155 * another is to have an explicit scheme class, and then each URL class
156 * could be a subclass of the scheme. Another way is one instance of
157 * a AnyP::UriScheme class instance for each URL scheme we support, and one
158 * class for each manner of treating the scheme : a Hierarchical URL, a
159 * non-hierarchical URL etc.
161 * Deferring the decision, its a type code for now. RBC 20060507.
163 * In order to make taking any of these routes easy, scheme is private,
164 * only settable at construction time, or with explicit setter
166 AnyP::UriScheme scheme_
;
168 SBuf userInfo_
; // aka 'URL-login'
170 // XXX: uses char[] instead of SBUf to reduce performance regressions
171 // from c_str() since most code using this is not yet using SBuf
172 char host_
[SQUIDHOSTNAMELEN
]; ///< string representation of the URI authority name or IP
173 bool hostIsNumeric_
; ///< whether the authority 'host' is a raw-IP
174 Ip::Address hostAddr_
; ///< binary representation of the URI authority if it is a raw-IP
176 Port port_
; ///< authority port subcomponent
178 // XXX: for now includes query-string.
179 SBuf path_
; ///< URI path segment
181 // pre-assembled URI forms
182 mutable SBuf authorityHttp_
; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
183 mutable SBuf authorityWithPort_
; ///< RFC 7230 section 5.3.3 authority with explicit port
184 mutable SBuf absolute_
; ///< RFC 7230 section 5.3.2 absolute-URI
187 inline std::ostream
&
188 operator <<(std::ostream
&os
, const Uri
&url
)
190 // none means explicit empty string for scheme.
191 if (url
.getScheme() != PROTO_NONE
)
192 os
<< url
.getScheme().image();
195 // no authority section on URN
196 if (url
.getScheme() != PROTO_URN
)
197 os
<< "//" << url
.authority();
199 // path is what it is - including absent
206 /* Deprecated functions for Legacy code handling URLs */
210 void urlInitialize(void);
211 /// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
212 /// \returns a pointer to a local static buffer containing request URI
213 /// that honors strip_query_terms and %-encodes unsafe URI characters
214 char *urlCanonicalCleanWithoutRequest(const SBuf
&url
, const HttpRequestMethod
&, const AnyP::UriScheme
&);
215 const char *urlCanonicalFakeHttps(const HttpRequest
* request
);
216 bool urlIsRelative(const char *);
217 char *urlRInternal(const char *host
, unsigned short port
, const char *dir
, const char *name
);
218 char *urlInternal(const char *dir
, const char *name
);
219 bool urlAppendDomain(char *host
); ///< apply append_domain config to the given hostname
221 enum MatchDomainNameFlags
{
223 mdnHonorWildcards
= 1 << 0,
224 mdnRejectSubsubDomains
= 1 << 1
228 * matchDomainName() matches a hostname (usually extracted from traffic)
229 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
230 * according to the following rules:
232 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
233 * -------------|-------------|-----------|-----------------------
234 * foo.com | foo.com | YES | YES
235 * .foo.com | foo.com | YES | YES
236 * x.foo.com | foo.com | NO | NO
237 * foo.com | .foo.com | YES | YES
238 * .foo.com | .foo.com | YES | YES
239 * x.foo.com | .foo.com | YES | YES
240 * .x.foo.com | .foo.com | YES | NO
241 * y.x.foo.com | .foo.com | YES | NO
243 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
244 * optional wildcards on hostname:
246 * HOST | DOMAIN | MATCH?
247 * -------------|--------------|-------
248 * *.foo.com | x.foo.com | YES
249 * *.foo.com | .x.foo.com | YES
250 * *.foo.com | .foo.com | YES
251 * *.foo.com | foo.com | NO
253 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
256 * \retval 0 means the host matches the domain
257 * \retval 1 means the host is greater than the domain
258 * \retval -1 means the host is less than the domain
260 int matchDomainName(const char *host
, const char *domain
, MatchDomainNameFlags flags
= mdnNone
);
261 bool urlCheckRequest(const HttpRequest
*);
262 void urlExtMethodConfigure(void);
264 #endif /* SQUID_SRC_ANYP_URI_H */