]> git.ipfire.org Git - thirdparty/squid.git/blob - src/anyp/Uri.h
CI: Upgrade GitHub Setup Node and CodeQL actions to Node 20 (#1845)
[thirdparty/squid.git] / src / anyp / Uri.h
1 /*
2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #ifndef SQUID_SRC_ANYP_URI_H
10 #define SQUID_SRC_ANYP_URI_H
11
12 #include "anyp/UriScheme.h"
13 #include "ip/Address.h"
14 #include "rfc2181.h"
15 #include "sbuf/SBuf.h"
16
17 #include <iosfwd>
18
19 class HttpRequestMethod;
20
21 namespace AnyP
22 {
23
24 /**
25 * Represents a Uniform Resource Identifier.
26 * Can store both URL or URN representations.
27 *
28 * Governed by RFC 3986
29 */
30 class Uri
31 {
32 MEMPROXY_CLASS(Uri);
33
34 public:
35 Uri(): hostIsNumeric_(false) { *host_ = 0; }
36 Uri(AnyP::UriScheme const &aScheme);
37 Uri(const Uri &) = default;
38 Uri(Uri &&) = default;
39 Uri &operator =(const Uri &) = default;
40 Uri &operator =(Uri &&) = default;
41
42 void clear() {
43 scheme_=AnyP::PROTO_NONE;
44 hostIsNumeric_ = false;
45 *host_ = 0;
46 hostAddr_.setEmpty();
47 port_ = std::nullopt;
48 touch();
49 }
50 void touch(); ///< clear the cached URI display forms
51
52 bool parse(const HttpRequestMethod &, const SBuf &url);
53
54 /// \return a new URI that honors uri_whitespace
55 static char *cleanup(const char *uri);
56
57 AnyP::UriScheme const & getScheme() const {return scheme_;}
58
59 /// convert the URL scheme to that given
60 void setScheme(const AnyP::ProtocolType &p, const char *str) {
61 scheme_ = AnyP::UriScheme(p, str);
62 touch();
63 }
64 void setScheme(const AnyP::UriScheme &s) {
65 scheme_ = s;
66 touch();
67 }
68
69 void userInfo(const SBuf &s) {userInfo_=s; touch();}
70 /// \returns raw userinfo subcomponent (or an empty string)
71 /// the caller is responsible for caller-specific encoding
72 const SBuf &userInfo() const {return userInfo_;}
73
74 void host(const char *src);
75 const char *host(void) const {return host_;}
76 int hostIsNumeric(void) const {return hostIsNumeric_;}
77 Ip::Address const & hostIP(void) const {return hostAddr_;}
78
79 /// \returns the host subcomponent of the authority component
80 /// If the host is an IPv6 address, returns that IP address with
81 /// [brackets]. See RFC 3986 Section 3.2.2.
82 SBuf hostOrIp() const;
83
84 /// reset authority port subcomponent
85 void port(const Port p) { port_ = p; touch(); }
86 /// \copydoc port_
87 Port port() const { return port_; }
88 /// reset the port to the default port number for the current scheme
89 void defaultPort() { port(getScheme().defaultPort()); }
90
91 void path(const char *p) {path_=p; touch();}
92 void path(const SBuf &p) {path_=p; touch();}
93 const SBuf &path() const;
94
95 /**
96 * Merge a relative-path URL into the existing URI details.
97 * Implements RFC 3986 section 5.2.3
98 *
99 * The caller must ensure relUrl is a valid relative-path.
100 *
101 * NP: absolute-path are also accepted, but path() method
102 * should be used instead when possible.
103 */
104 void addRelativePath(const char *relUrl);
105
106 /// the static '/' default URL-path
107 static const SBuf &SlashPath();
108
109 /// the static '*' pseudo-URI
110 static const SBuf &Asterisk();
111
112 /// %-encode characters in a buffer which do not conform to
113 /// the provided set of expected characters.
114 static SBuf Encode(const SBuf &, const CharacterSet &expected);
115
116 /// %-decode the given buffer
117 static SBuf Decode(const SBuf &);
118
119 /**
120 * The authority-form URI for currently stored values.
121 *
122 * As defined by RFC 7230 section 5.3.3 this form omits the
123 * userinfo@ field from RFC 3986 defined authority segment.
124 *
125 * \param requirePort when true the port will be included, otherwise
126 * port will be elided when it is the default for
127 * the current scheme.
128 */
129 SBuf &authority(bool requirePort = false) const;
130
131 /**
132 * The absolute-form URI for currently stored values.
133 *
134 * As defined by RFC 7230 section 5.3.3 this form omits the
135 * userinfo@ field from RFC 3986 defined authority segments
136 * when the protocol scheme is http: or https:.
137 */
138 SBuf &absolute() const;
139
140 private:
141 void parseUrn(Parser::Tokenizer&);
142
143 SBuf parseHost(Parser::Tokenizer &) const;
144 int parsePort(Parser::Tokenizer &) const;
145
146 /**
147 \par
148 * The scheme of this URL. This has the 'type code' smell about it.
149 * In future we may want to make the methods that dispatch based on
150 * the scheme virtual and have a class per protocol.
151 \par
152 * On the other hand, having Protocol as an explicit concept is useful,
153 * see for instance the ACLProtocol acl type. One way to represent this
154 * is to have one prototype URL with no host etc for each scheme,
155 * another is to have an explicit scheme class, and then each URL class
156 * could be a subclass of the scheme. Another way is one instance of
157 * a AnyP::UriScheme class instance for each URL scheme we support, and one
158 * class for each manner of treating the scheme : a Hierarchical URL, a
159 * non-hierarchical URL etc.
160 \par
161 * Deferring the decision, its a type code for now. RBC 20060507.
162 \par
163 * In order to make taking any of these routes easy, scheme is private,
164 * only settable at construction time, or with explicit setter
165 */
166 AnyP::UriScheme scheme_;
167
168 SBuf userInfo_; // aka 'URL-login'
169
170 // XXX: uses char[] instead of SBUf to reduce performance regressions
171 // from c_str() since most code using this is not yet using SBuf
172 char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP
173 bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP
174 Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP
175
176 Port port_; ///< authority port subcomponent
177
178 // XXX: for now includes query-string.
179 SBuf path_; ///< URI path segment
180
181 // pre-assembled URI forms
182 mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
183 mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
184 mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI
185 };
186
187 inline std::ostream &
188 operator <<(std::ostream &os, const Uri &url)
189 {
190 // none means explicit empty string for scheme.
191 if (url.getScheme() != PROTO_NONE)
192 os << url.getScheme().image();
193 os << ":";
194
195 // no authority section on URN
196 if (url.getScheme() != PROTO_URN)
197 os << "//" << url.authority();
198
199 // path is what it is - including absent
200 os << url.path();
201 return os;
202 }
203
204 } // namespace AnyP
205
206 /* Deprecated functions for Legacy code handling URLs */
207
208 class HttpRequest;
209
210 void urlInitialize(void);
211 /// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
212 /// \returns a pointer to a local static buffer containing request URI
213 /// that honors strip_query_terms and %-encodes unsafe URI characters
214 char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &);
215 const char *urlCanonicalFakeHttps(const HttpRequest * request);
216 bool urlIsRelative(const char *);
217 char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name);
218 char *urlInternal(const char *dir, const char *name);
219 bool urlAppendDomain(char *host); ///< apply append_domain config to the given hostname
220
221 enum MatchDomainNameFlags {
222 mdnNone = 0,
223 mdnHonorWildcards = 1 << 0,
224 mdnRejectSubsubDomains = 1 << 1
225 };
226
227 /**
228 * matchDomainName() matches a hostname (usually extracted from traffic)
229 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
230 * according to the following rules:
231 *
232 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
233 * -------------|-------------|-----------|-----------------------
234 * foo.com | foo.com | YES | YES
235 * .foo.com | foo.com | YES | YES
236 * x.foo.com | foo.com | NO | NO
237 * foo.com | .foo.com | YES | YES
238 * .foo.com | .foo.com | YES | YES
239 * x.foo.com | .foo.com | YES | YES
240 * .x.foo.com | .foo.com | YES | NO
241 * y.x.foo.com | .foo.com | YES | NO
242 *
243 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
244 * optional wildcards on hostname:
245 *
246 * HOST | DOMAIN | MATCH?
247 * -------------|--------------|-------
248 * *.foo.com | x.foo.com | YES
249 * *.foo.com | .x.foo.com | YES
250 * *.foo.com | .foo.com | YES
251 * *.foo.com | foo.com | NO
252 *
253 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
254 * supported.
255 *
256 * \retval 0 means the host matches the domain
257 * \retval 1 means the host is greater than the domain
258 * \retval -1 means the host is less than the domain
259 */
260 int matchDomainName(const char *host, const char *domain, MatchDomainNameFlags flags = mdnNone);
261 bool urlCheckRequest(const HttpRequest *);
262 void urlExtMethodConfigure(void);
263
264 #endif /* SQUID_SRC_ANYP_URI_H */
265