]> git.ipfire.org Git - thirdparty/squid.git/blob - src/anyp/Uri.h
Source Format Enforcement (#532)
[thirdparty/squid.git] / src / anyp / Uri.h
1 /*
2 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #ifndef SQUID_SRC_ANYP_URI_H
10 #define SQUID_SRC_ANYP_URI_H
11
12 #include "anyp/UriScheme.h"
13 #include "ip/Address.h"
14 #include "rfc2181.h"
15 #include "sbuf/SBuf.h"
16
17 #include <iosfwd>
18
19 class HttpRequestMethod;
20
21 namespace AnyP
22 {
23
24 /**
25 * Represents a Uniform Resource Identifier.
26 * Can store both URL or URN representations.
27 *
28 * Governed by RFC 3986
29 */
30 class Uri
31 {
32 MEMPROXY_CLASS(Uri);
33
34 public:
35 Uri() : hostIsNumeric_(false), port_(0) {*host_=0;}
36 Uri(AnyP::UriScheme const &aScheme);
37 Uri(const Uri &other) {
38 this->operator =(other);
39 }
40 Uri &operator =(const Uri &o) {
41 scheme_ = o.scheme_;
42 userInfo_ = o.userInfo_;
43 memcpy(host_, o.host_, sizeof(host_));
44 hostIsNumeric_ = o.hostIsNumeric_;
45 hostAddr_ = o.hostAddr_;
46 port_ = o.port_;
47 path_ = o.path_;
48 touch();
49 return *this;
50 }
51
52 void clear() {
53 scheme_=AnyP::PROTO_NONE;
54 hostIsNumeric_ = false;
55 *host_ = 0;
56 hostAddr_.setEmpty();
57 port_ = 0;
58 touch();
59 }
60 void touch(); ///< clear the cached URI display forms
61
62 bool parse(const HttpRequestMethod &, const SBuf &url);
63
64 /// \return a new URI that honors uri_whitespace
65 static char *cleanup(const char *uri);
66
67 AnyP::UriScheme const & getScheme() const {return scheme_;}
68
69 /// convert the URL scheme to that given
70 void setScheme(const AnyP::ProtocolType &p, const char *str) {
71 scheme_ = AnyP::UriScheme(p, str);
72 touch();
73 }
74 void setScheme(const AnyP::UriScheme &s) {
75 scheme_ = s;
76 touch();
77 }
78
79 void userInfo(const SBuf &s) {userInfo_=s; touch();}
80 const SBuf &userInfo() const {return userInfo_;}
81
82 void host(const char *src);
83 const char *host(void) const {return host_;}
84 int hostIsNumeric(void) const {return hostIsNumeric_;}
85 Ip::Address const & hostIP(void) const {return hostAddr_;}
86
87 /// \returns the host subcomponent of the authority component
88 /// If the host is an IPv6 address, returns that IP address without
89 /// [brackets]! See RFC 3986 Section 3.2.2.
90 SBuf hostOrIp() const;
91
92 void port(unsigned short p) {port_=p; touch();}
93 unsigned short port() const {return port_;}
94 /// reset the port to the default port number for the current scheme
95 void defaultPort() { port(getScheme().defaultPort()); }
96
97 void path(const char *p) {path_=p; touch();}
98 void path(const SBuf &p) {path_=p; touch();}
99 const SBuf &path() const;
100
101 /// the static '/' default URL-path
102 static const SBuf &SlashPath();
103
104 /// the static '*' pseudo-URI
105 static const SBuf &Asterisk();
106
107 /**
108 * The authority-form URI for currently stored values.
109 *
110 * As defined by RFC 7230 section 5.3.3 this form omits the
111 * userinfo@ field from RFC 3986 defined authority segment.
112 *
113 * \param requirePort when true the port will be included, otherwise
114 * port will be elided when it is the default for
115 * the current scheme.
116 */
117 SBuf &authority(bool requirePort = false) const;
118
119 /**
120 * The absolute-form URI for currently stored values.
121 *
122 * As defined by RFC 7230 section 5.3.3 this form omits the
123 * userinfo@ field from RFC 3986 defined authority segments
124 * when the protocol scheme is http: or https:.
125 */
126 SBuf &absolute() const;
127
128 private:
129 void parseUrn(Parser::Tokenizer&);
130
131 /**
132 \par
133 * The scheme of this URL. This has the 'type code' smell about it.
134 * In future we may want to make the methods that dispatch based on
135 * the scheme virtual and have a class per protocol.
136 \par
137 * On the other hand, having Protocol as an explicit concept is useful,
138 * see for instance the ACLProtocol acl type. One way to represent this
139 * is to have one prototype URL with no host etc for each scheme,
140 * another is to have an explicit scheme class, and then each URL class
141 * could be a subclass of the scheme. Another way is one instance of
142 * a AnyP::UriScheme class instance for each URL scheme we support, and one
143 * class for each manner of treating the scheme : a Hierarchical URL, a
144 * non-hierarchical URL etc.
145 \par
146 * Deferring the decision, its a type code for now. RBC 20060507.
147 \par
148 * In order to make taking any of these routes easy, scheme is private,
149 * only settable at construction time, or with explicit setter
150 */
151 AnyP::UriScheme scheme_;
152
153 SBuf userInfo_; // aka 'URL-login'
154
155 // XXX: uses char[] instead of SBUf to reduce performance regressions
156 // from c_str() since most code using this is not yet using SBuf
157 char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP
158 bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP
159 Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP
160
161 unsigned short port_; ///< URL port
162
163 // XXX: for now includes query-string.
164 SBuf path_; ///< URI path segment
165
166 // pre-assembled URI forms
167 mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
168 mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
169 mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI
170 };
171
172 } // namespace AnyP
173
174 inline std::ostream &
175 operator <<(std::ostream &os, const AnyP::Uri &url)
176 {
177 // none means explicit empty string for scheme.
178 if (url.getScheme() != AnyP::PROTO_NONE)
179 os << url.getScheme().image();
180 os << ":";
181
182 // no authority section on URN
183 if (url.getScheme() != AnyP::PROTO_URN)
184 os << "//" << url.authority();
185
186 // path is what it is - including absent
187 os << url.path();
188 return os;
189 }
190
191 /* Deprecated functions for Legacy code handling URLs */
192
193 class HttpRequest;
194
195 void urlInitialize(void);
196 /// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
197 /// \returns a pointer to a local static buffer containing request URI
198 /// that honors strip_query_terms and %-encodes unsafe URI characters
199 char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &);
200 const char *urlCanonicalFakeHttps(const HttpRequest * request);
201 bool urlIsRelative(const char *);
202 char *urlMakeAbsolute(const HttpRequest *, const char *);
203 char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name);
204 char *urlInternal(const char *dir, const char *name);
205 bool urlAppendDomain(char *host); ///< apply append_domain config to the given hostname
206
207 enum MatchDomainNameFlags {
208 mdnNone = 0,
209 mdnHonorWildcards = 1 << 0,
210 mdnRejectSubsubDomains = 1 << 1
211 };
212
213 /**
214 * matchDomainName() matches a hostname (usually extracted from traffic)
215 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
216 * according to the following rules:
217 *
218 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
219 * -------------|-------------|-----------|-----------------------
220 * foo.com | foo.com | YES | YES
221 * .foo.com | foo.com | YES | YES
222 * x.foo.com | foo.com | NO | NO
223 * foo.com | .foo.com | YES | YES
224 * .foo.com | .foo.com | YES | YES
225 * x.foo.com | .foo.com | YES | YES
226 * .x.foo.com | .foo.com | YES | NO
227 * y.x.foo.com | .foo.com | YES | NO
228 *
229 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
230 * optional wildcards on hostname:
231 *
232 * HOST | DOMAIN | MATCH?
233 * -------------|--------------|-------
234 * *.foo.com | x.foo.com | YES
235 * *.foo.com | .x.foo.com | YES
236 * *.foo.com | .foo.com | YES
237 * *.foo.com | foo.com | NO
238 *
239 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
240 * supported.
241 *
242 * \retval 0 means the host matches the domain
243 * \retval 1 means the host is greater than the domain
244 * \retval -1 means the host is less than the domain
245 */
246 int matchDomainName(const char *host, const char *domain, MatchDomainNameFlags flags = mdnNone);
247 int urlCheckRequest(const HttpRequest *);
248 char *urlHostname(const char *url);
249 void urlExtMethodConfigure(void);
250
251 #endif /* SQUID_SRC_ANYP_URI_H */
252