]> git.ipfire.org Git - thirdparty/squid.git/blame_incremental - src/anyp/Uri.h
Source Format Enforcement (#763)
[thirdparty/squid.git] / src / anyp / Uri.h
... / ...
CommitLineData
1/*
2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#ifndef SQUID_SRC_ANYP_URI_H
10#define SQUID_SRC_ANYP_URI_H
11
12#include "anyp/UriScheme.h"
13#include "ip/Address.h"
14#include "rfc2181.h"
15#include "sbuf/SBuf.h"
16
17#include <iosfwd>
18
19class HttpRequestMethod;
20
21namespace AnyP
22{
23
24/**
25 * Represents a Uniform Resource Identifier.
26 * Can store both URL or URN representations.
27 *
28 * Governed by RFC 3986
29 */
30class Uri
31{
32 MEMPROXY_CLASS(Uri);
33
34public:
35 Uri() : hostIsNumeric_(false), port_(0) {*host_=0;}
36 Uri(AnyP::UriScheme const &aScheme);
37 Uri(const Uri &other) {
38 this->operator =(other);
39 }
40 Uri &operator =(const Uri &o) {
41 scheme_ = o.scheme_;
42 userInfo_ = o.userInfo_;
43 memcpy(host_, o.host_, sizeof(host_));
44 hostIsNumeric_ = o.hostIsNumeric_;
45 hostAddr_ = o.hostAddr_;
46 port_ = o.port_;
47 path_ = o.path_;
48 touch();
49 return *this;
50 }
51
52 void clear() {
53 scheme_=AnyP::PROTO_NONE;
54 hostIsNumeric_ = false;
55 *host_ = 0;
56 hostAddr_.setEmpty();
57 port_ = 0;
58 touch();
59 }
60 void touch(); ///< clear the cached URI display forms
61
62 bool parse(const HttpRequestMethod &, const SBuf &url);
63
64 /// \return a new URI that honors uri_whitespace
65 static char *cleanup(const char *uri);
66
67 AnyP::UriScheme const & getScheme() const {return scheme_;}
68
69 /// convert the URL scheme to that given
70 void setScheme(const AnyP::ProtocolType &p, const char *str) {
71 scheme_ = AnyP::UriScheme(p, str);
72 touch();
73 }
74 void setScheme(const AnyP::UriScheme &s) {
75 scheme_ = s;
76 touch();
77 }
78
79 void userInfo(const SBuf &s) {userInfo_=s; touch();}
80 /// \returns raw userinfo subcomponent (or an empty string)
81 /// the caller is responsible for caller-specific encoding
82 const SBuf &userInfo() const {return userInfo_;}
83
84 void host(const char *src);
85 const char *host(void) const {return host_;}
86 int hostIsNumeric(void) const {return hostIsNumeric_;}
87 Ip::Address const & hostIP(void) const {return hostAddr_;}
88
89 /// \returns the host subcomponent of the authority component
90 /// If the host is an IPv6 address, returns that IP address without
91 /// [brackets]! See RFC 3986 Section 3.2.2.
92 SBuf hostOrIp() const;
93
94 void port(unsigned short p) {port_=p; touch();}
95 unsigned short port() const {return port_;}
96 /// reset the port to the default port number for the current scheme
97 void defaultPort() { port(getScheme().defaultPort()); }
98
99 void path(const char *p) {path_=p; touch();}
100 void path(const SBuf &p) {path_=p; touch();}
101 const SBuf &path() const;
102
103 /**
104 * Merge a relative-path URL into the existing URI details.
105 * Implements RFC 3986 section 5.2.3
106 *
107 * The caller must ensure relUrl is a valid relative-path.
108 *
109 * NP: absolute-path are also accepted, but path() method
110 * should be used instead when possible.
111 */
112 void addRelativePath(const char *relUrl);
113
114 /// the static '/' default URL-path
115 static const SBuf &SlashPath();
116
117 /// the static '*' pseudo-URI
118 static const SBuf &Asterisk();
119
120 /// %-encode characters in a buffer which do not conform to
121 /// the provided set of expected characters.
122 static SBuf Encode(const SBuf &, const CharacterSet &expected);
123
124 /**
125 * The authority-form URI for currently stored values.
126 *
127 * As defined by RFC 7230 section 5.3.3 this form omits the
128 * userinfo@ field from RFC 3986 defined authority segment.
129 *
130 * \param requirePort when true the port will be included, otherwise
131 * port will be elided when it is the default for
132 * the current scheme.
133 */
134 SBuf &authority(bool requirePort = false) const;
135
136 /**
137 * The absolute-form URI for currently stored values.
138 *
139 * As defined by RFC 7230 section 5.3.3 this form omits the
140 * userinfo@ field from RFC 3986 defined authority segments
141 * when the protocol scheme is http: or https:.
142 */
143 SBuf &absolute() const;
144
145private:
146 void parseUrn(Parser::Tokenizer&);
147
148 /**
149 \par
150 * The scheme of this URL. This has the 'type code' smell about it.
151 * In future we may want to make the methods that dispatch based on
152 * the scheme virtual and have a class per protocol.
153 \par
154 * On the other hand, having Protocol as an explicit concept is useful,
155 * see for instance the ACLProtocol acl type. One way to represent this
156 * is to have one prototype URL with no host etc for each scheme,
157 * another is to have an explicit scheme class, and then each URL class
158 * could be a subclass of the scheme. Another way is one instance of
159 * a AnyP::UriScheme class instance for each URL scheme we support, and one
160 * class for each manner of treating the scheme : a Hierarchical URL, a
161 * non-hierarchical URL etc.
162 \par
163 * Deferring the decision, its a type code for now. RBC 20060507.
164 \par
165 * In order to make taking any of these routes easy, scheme is private,
166 * only settable at construction time, or with explicit setter
167 */
168 AnyP::UriScheme scheme_;
169
170 SBuf userInfo_; // aka 'URL-login'
171
172 // XXX: uses char[] instead of SBUf to reduce performance regressions
173 // from c_str() since most code using this is not yet using SBuf
174 char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP
175 bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP
176 Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP
177
178 unsigned short port_; ///< URL port
179
180 // XXX: for now includes query-string.
181 SBuf path_; ///< URI path segment
182
183 // pre-assembled URI forms
184 mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
185 mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
186 mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI
187};
188
189} // namespace AnyP
190
191inline std::ostream &
192operator <<(std::ostream &os, const AnyP::Uri &url)
193{
194 // none means explicit empty string for scheme.
195 if (url.getScheme() != AnyP::PROTO_NONE)
196 os << url.getScheme().image();
197 os << ":";
198
199 // no authority section on URN
200 if (url.getScheme() != AnyP::PROTO_URN)
201 os << "//" << url.authority();
202
203 // path is what it is - including absent
204 os << url.path();
205 return os;
206}
207
208/* Deprecated functions for Legacy code handling URLs */
209
210class HttpRequest;
211
212void urlInitialize(void);
213/// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
214/// \returns a pointer to a local static buffer containing request URI
215/// that honors strip_query_terms and %-encodes unsafe URI characters
216char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &);
217const char *urlCanonicalFakeHttps(const HttpRequest * request);
218bool urlIsRelative(const char *);
219char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name);
220char *urlInternal(const char *dir, const char *name);
221bool urlAppendDomain(char *host); ///< apply append_domain config to the given hostname
222
223enum MatchDomainNameFlags {
224 mdnNone = 0,
225 mdnHonorWildcards = 1 << 0,
226 mdnRejectSubsubDomains = 1 << 1
227};
228
229/**
230 * matchDomainName() matches a hostname (usually extracted from traffic)
231 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
232 * according to the following rules:
233 *
234 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
235 * -------------|-------------|-----------|-----------------------
236 * foo.com | foo.com | YES | YES
237 * .foo.com | foo.com | YES | YES
238 * x.foo.com | foo.com | NO | NO
239 * foo.com | .foo.com | YES | YES
240 * .foo.com | .foo.com | YES | YES
241 * x.foo.com | .foo.com | YES | YES
242 * .x.foo.com | .foo.com | YES | NO
243 * y.x.foo.com | .foo.com | YES | NO
244 *
245 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
246 * optional wildcards on hostname:
247 *
248 * HOST | DOMAIN | MATCH?
249 * -------------|--------------|-------
250 * *.foo.com | x.foo.com | YES
251 * *.foo.com | .x.foo.com | YES
252 * *.foo.com | .foo.com | YES
253 * *.foo.com | foo.com | NO
254 *
255 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
256 * supported.
257 *
258 * \retval 0 means the host matches the domain
259 * \retval 1 means the host is greater than the domain
260 * \retval -1 means the host is less than the domain
261 */
262int matchDomainName(const char *host, const char *domain, MatchDomainNameFlags flags = mdnNone);
263int urlCheckRequest(const HttpRequest *);
264void urlExtMethodConfigure(void);
265
266#endif /* SQUID_SRC_ANYP_URI_H */
267