]>
Commit | Line | Data |
---|---|---|
985c86bc | 1 | /* |
f70aedc4 | 2 | * Copyright (C) 1996-2021 The Squid Software Foundation and contributors |
985c86bc | 3 | * |
bbc27441 AJ |
4 | * Squid software is distributed under GPLv2+ license and includes |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
985c86bc | 7 | */ |
8 | ||
c8ab5ec6 AJ |
9 | #ifndef SQUID_SRC_ANYP_URI_H |
10 | #define SQUID_SRC_ANYP_URI_H | |
985c86bc | 11 | |
1ca54a54 | 12 | #include "anyp/UriScheme.h" |
5c51bffb AJ |
13 | #include "ip/Address.h" |
14 | #include "rfc2181.h" | |
65e41a45 | 15 | #include "sbuf/SBuf.h" |
985c86bc | 16 | |
51b5dcf5 AJ |
17 | #include <iosfwd> |
18 | ||
c8ab5ec6 AJ |
19 | class HttpRequestMethod; |
20 | ||
21 | namespace AnyP | |
22 | { | |
23 | ||
63be0a78 | 24 | /** |
c8ab5ec6 AJ |
25 | * Represents a Uniform Resource Identifier. |
26 | * Can store both URL or URN representations. | |
5c51bffb AJ |
27 | * |
28 | * Governed by RFC 3986 | |
63be0a78 | 29 | */ |
c8ab5ec6 | 30 | class Uri |
985c86bc | 31 | { |
c8ab5ec6 | 32 | MEMPROXY_CLASS(Uri); |
741c2986 AJ |
33 | |
34 | public: | |
c8ab5ec6 AJ |
35 | Uri() : hostIsNumeric_(false), port_(0) {*host_=0;} |
36 | Uri(AnyP::UriScheme const &aScheme); | |
37 | Uri(const Uri &other) { | |
d31d59d8 AJ |
38 | this->operator =(other); |
39 | } | |
c8ab5ec6 | 40 | Uri &operator =(const Uri &o) { |
d31d59d8 AJ |
41 | scheme_ = o.scheme_; |
42 | userInfo_ = o.userInfo_; | |
43 | memcpy(host_, o.host_, sizeof(host_)); | |
44 | hostIsNumeric_ = o.hostIsNumeric_; | |
45 | hostAddr_ = o.hostAddr_; | |
46 | port_ = o.port_; | |
47 | path_ = o.path_; | |
48 | touch(); | |
49 | return *this; | |
50 | } | |
4e3f4dc7 AJ |
51 | |
52 | void clear() { | |
53 | scheme_=AnyP::PROTO_NONE; | |
5c51bffb AJ |
54 | hostIsNumeric_ = false; |
55 | *host_ = 0; | |
56 | hostAddr_.setEmpty(); | |
57 | port_ = 0; | |
58 | touch(); | |
4e3f4dc7 | 59 | } |
5c51bffb | 60 | void touch(); ///< clear the cached URI display forms |
4e3f4dc7 | 61 | |
6c880a16 | 62 | bool parse(const HttpRequestMethod &, const SBuf &url); |
db59367a | 63 | |
bec110e4 EB |
64 | /// \return a new URI that honors uri_whitespace |
65 | static char *cleanup(const char *uri); | |
66 | ||
1ca54a54 | 67 | AnyP::UriScheme const & getScheme() const {return scheme_;} |
985c86bc | 68 | |
4e3f4dc7 | 69 | /// convert the URL scheme to that given |
d31d59d8 AJ |
70 | void setScheme(const AnyP::ProtocolType &p, const char *str) { |
71 | scheme_ = AnyP::UriScheme(p, str); | |
72 | touch(); | |
73 | } | |
6c880a16 AJ |
74 | void setScheme(const AnyP::UriScheme &s) { |
75 | scheme_ = s; | |
76 | touch(); | |
77 | } | |
4e3f4dc7 | 78 | |
5c51bffb | 79 | void userInfo(const SBuf &s) {userInfo_=s; touch();} |
614bd511 AJ |
80 | /// \returns raw userinfo subcomponent (or an empty string) |
81 | /// the caller is responsible for caller-specific encoding | |
92d6986d AJ |
82 | const SBuf &userInfo() const {return userInfo_;} |
83 | ||
5c51bffb AJ |
84 | void host(const char *src); |
85 | const char *host(void) const {return host_;} | |
86 | int hostIsNumeric(void) const {return hostIsNumeric_;} | |
87 | Ip::Address const & hostIP(void) const {return hostAddr_;} | |
88 | ||
9ce4a1eb CT |
89 | /// \returns the host subcomponent of the authority component |
90 | /// If the host is an IPv6 address, returns that IP address without | |
91 | /// [brackets]! See RFC 3986 Section 3.2.2. | |
92 | SBuf hostOrIp() const; | |
93 | ||
5c51bffb AJ |
94 | void port(unsigned short p) {port_=p; touch();} |
95 | unsigned short port() const {return port_;} | |
d754c7af EB |
96 | /// reset the port to the default port number for the current scheme |
97 | void defaultPort() { port(getScheme().defaultPort()); } | |
5c51bffb | 98 | |
51b5dcf5 AJ |
99 | void path(const char *p) {path_=p; touch();} |
100 | void path(const SBuf &p) {path_=p; touch();} | |
101 | const SBuf &path() const; | |
102 | ||
614bd511 AJ |
103 | /** |
104 | * Merge a relative-path URL into the existing URI details. | |
105 | * Implements RFC 3986 section 5.2.3 | |
106 | * | |
107 | * The caller must ensure relUrl is a valid relative-path. | |
108 | * | |
109 | * NP: absolute-path are also accepted, but path() method | |
110 | * should be used instead when possible. | |
111 | */ | |
112 | void addRelativePath(const char *relUrl); | |
113 | ||
51b5dcf5 AJ |
114 | /// the static '/' default URL-path |
115 | static const SBuf &SlashPath(); | |
116 | ||
c8ab5ec6 | 117 | /// the static '*' pseudo-URI |
2e260208 AJ |
118 | static const SBuf &Asterisk(); |
119 | ||
614bd511 AJ |
120 | /// %-encode characters in a buffer which do not conform to |
121 | /// the provided set of expected characters. | |
122 | static SBuf Encode(const SBuf &, const CharacterSet &expected); | |
123 | ||
5c51bffb AJ |
124 | /** |
125 | * The authority-form URI for currently stored values. | |
126 | * | |
127 | * As defined by RFC 7230 section 5.3.3 this form omits the | |
128 | * userinfo@ field from RFC 3986 defined authority segment. | |
129 | * | |
130 | * \param requirePort when true the port will be included, otherwise | |
131 | * port will be elided when it is the default for | |
132 | * the current scheme. | |
133 | */ | |
134 | SBuf &authority(bool requirePort = false) const; | |
135 | ||
c823e2da AJ |
136 | /** |
137 | * The absolute-form URI for currently stored values. | |
138 | * | |
139 | * As defined by RFC 7230 section 5.3.3 this form omits the | |
140 | * userinfo@ field from RFC 3986 defined authority segments | |
141 | * when the protocol scheme is http: or https:. | |
142 | */ | |
143 | SBuf &absolute() const; | |
144 | ||
985c86bc | 145 | private: |
6c880a16 | 146 | void parseUrn(Parser::Tokenizer&); |
91489e45 | 147 | |
63be0a78 | 148 | /** |
149 | \par | |
150 | * The scheme of this URL. This has the 'type code' smell about it. | |
26ac0430 AJ |
151 | * In future we may want to make the methods that dispatch based on |
152 | * the scheme virtual and have a class per protocol. | |
63be0a78 | 153 | \par |
154 | * On the other hand, having Protocol as an explicit concept is useful, | |
985c86bc | 155 | * see for instance the ACLProtocol acl type. One way to represent this |
26ac0430 | 156 | * is to have one prototype URL with no host etc for each scheme, |
985c86bc | 157 | * another is to have an explicit scheme class, and then each URL class |
26ac0430 | 158 | * could be a subclass of the scheme. Another way is one instance of |
c8ab5ec6 | 159 | * a AnyP::UriScheme class instance for each URL scheme we support, and one |
985c86bc | 160 | * class for each manner of treating the scheme : a Hierarchical URL, a |
63be0a78 | 161 | * non-hierarchical URL etc. |
162 | \par | |
985c86bc | 163 | * Deferring the decision, its a type code for now. RBC 20060507. |
63be0a78 | 164 | \par |
c8ab5ec6 AJ |
165 | * In order to make taking any of these routes easy, scheme is private, |
166 | * only settable at construction time, or with explicit setter | |
985c86bc | 167 | */ |
4e3f4dc7 | 168 | AnyP::UriScheme scheme_; |
92d6986d AJ |
169 | |
170 | SBuf userInfo_; // aka 'URL-login' | |
5c51bffb AJ |
171 | |
172 | // XXX: uses char[] instead of SBUf to reduce performance regressions | |
173 | // from c_str() since most code using this is not yet using SBuf | |
174 | char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP | |
175 | bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP | |
176 | Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP | |
177 | ||
178 | unsigned short port_; ///< URL port | |
179 | ||
51b5dcf5 | 180 | // XXX: for now includes query-string. |
c8ab5ec6 | 181 | SBuf path_; ///< URI path segment |
51b5dcf5 | 182 | |
c8ab5ec6 | 183 | // pre-assembled URI forms |
5c51bffb AJ |
184 | mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port |
185 | mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port | |
c823e2da | 186 | mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI |
985c86bc | 187 | }; |
188 | ||
c8ab5ec6 AJ |
189 | } // namespace AnyP |
190 | ||
51b5dcf5 | 191 | inline std::ostream & |
c8ab5ec6 | 192 | operator <<(std::ostream &os, const AnyP::Uri &url) |
51b5dcf5 | 193 | { |
d31d59d8 AJ |
194 | // none means explicit empty string for scheme. |
195 | if (url.getScheme() != AnyP::PROTO_NONE) | |
196 | os << url.getScheme().image(); | |
197 | os << ":"; | |
198 | ||
199 | // no authority section on URN | |
200 | if (url.getScheme() != AnyP::PROTO_URN) | |
201 | os << "//" << url.authority(); | |
202 | ||
203 | // path is what it is - including absent | |
204 | os << url.path(); | |
51b5dcf5 AJ |
205 | return os; |
206 | } | |
207 | ||
c8ab5ec6 AJ |
208 | /* Deprecated functions for Legacy code handling URLs */ |
209 | ||
fc54b8d2 | 210 | class HttpRequest; |
fc54b8d2 | 211 | |
8a648e8d | 212 | void urlInitialize(void); |
bec110e4 EB |
213 | /// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest |
214 | /// \returns a pointer to a local static buffer containing request URI | |
215 | /// that honors strip_query_terms and %-encodes unsafe URI characters | |
216 | char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &); | |
8a648e8d FC |
217 | const char *urlCanonicalFakeHttps(const HttpRequest * request); |
218 | bool urlIsRelative(const char *); | |
8a648e8d FC |
219 | char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name); |
220 | char *urlInternal(const char *dir, const char *name); | |
38aa10ef | 221 | bool urlAppendDomain(char *host); ///< apply append_domain config to the given hostname |
69f69080 | 222 | |
abbd7825 CT |
223 | enum MatchDomainNameFlags { |
224 | mdnNone = 0, | |
225 | mdnHonorWildcards = 1 << 0, | |
226 | mdnRejectSubsubDomains = 1 << 1 | |
227 | }; | |
228 | ||
69f69080 | 229 | /** |
abbd7825 CT |
230 | * matchDomainName() matches a hostname (usually extracted from traffic) |
231 | * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used | |
232 | * according to the following rules: | |
69f69080 | 233 | * |
abbd7825 CT |
234 | * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains |
235 | * -------------|-------------|-----------|----------------------- | |
236 | * foo.com | foo.com | YES | YES | |
237 | * .foo.com | foo.com | YES | YES | |
238 | * x.foo.com | foo.com | NO | NO | |
239 | * foo.com | .foo.com | YES | YES | |
240 | * .foo.com | .foo.com | YES | YES | |
241 | * x.foo.com | .foo.com | YES | YES | |
242 | * .x.foo.com | .foo.com | YES | NO | |
243 | * y.x.foo.com | .foo.com | YES | NO | |
69f69080 | 244 | * |
abbd7825 | 245 | * if mdnHonorWildcards flag is set then the matchDomainName() also accepts |
69f69080 CT |
246 | * optional wildcards on hostname: |
247 | * | |
248 | * HOST | DOMAIN | MATCH? | |
249 | * -------------|--------------|------- | |
250 | * *.foo.com | x.foo.com | YES | |
251 | * *.foo.com | .x.foo.com | YES | |
252 | * *.foo.com | .foo.com | YES | |
253 | * *.foo.com | foo.com | NO | |
254 | * | |
abbd7825 CT |
255 | * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is |
256 | * supported. | |
257 | * | |
69f69080 CT |
258 | * \retval 0 means the host matches the domain |
259 | * \retval 1 means the host is greater than the domain | |
260 | * \retval -1 means the host is less than the domain | |
261 | */ | |
6c1219b9 | 262 | int matchDomainName(const char *host, const char *domain, MatchDomainNameFlags flags = mdnNone); |
8a648e8d | 263 | int urlCheckRequest(const HttpRequest *); |
8a648e8d | 264 | void urlExtMethodConfigure(void); |
fc54b8d2 | 265 | |
c8ab5ec6 | 266 | #endif /* SQUID_SRC_ANYP_URI_H */ |
f53969cc | 267 |