]> git.ipfire.org Git - thirdparty/squid.git/blob - src/anyp/Uri.h
Fix SQUID_YESNO 'syntax error near unexpected token' (#2117)
[thirdparty/squid.git] / src / anyp / Uri.h
1 /*
2 * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #ifndef SQUID_SRC_ANYP_URI_H
10 #define SQUID_SRC_ANYP_URI_H
11
12 #include "anyp/forward.h"
13 #include "anyp/UriScheme.h"
14 #include "ip/Address.h"
15 #include "rfc2181.h"
16 #include "sbuf/SBuf.h"
17
18 #include <iosfwd>
19
20 class HttpRequestMethod;
21
22 namespace AnyP
23 {
24
25 /**
26 * Represents a Uniform Resource Identifier.
27 * Can store both URL or URN representations.
28 *
29 * Governed by RFC 3986
30 */
31 class Uri
32 {
33 MEMPROXY_CLASS(Uri);
34
35 public:
36 Uri(): hostIsNumeric_(false) { *host_ = 0; }
37 Uri(AnyP::UriScheme const &aScheme);
38 Uri(const Uri &) = default;
39 Uri(Uri &&) = default;
40 Uri &operator =(const Uri &) = default;
41 Uri &operator =(Uri &&) = default;
42
43 void clear() {
44 scheme_=AnyP::PROTO_NONE;
45 hostIsNumeric_ = false;
46 *host_ = 0;
47 hostAddr_.setEmpty();
48 port_ = std::nullopt;
49 touch();
50 }
51 void touch(); ///< clear the cached URI display forms
52
53 bool parse(const HttpRequestMethod &, const SBuf &url);
54
55 /// \return a new URI that honors uri_whitespace
56 static char *cleanup(const char *uri);
57
58 AnyP::UriScheme const & getScheme() const {return scheme_;}
59
60 /// convert the URL scheme to that given
61 void setScheme(const AnyP::ProtocolType &p, const char *str) {
62 scheme_ = AnyP::UriScheme(p, str);
63 touch();
64 }
65 void setScheme(const AnyP::UriScheme &s) {
66 scheme_ = s;
67 touch();
68 }
69
70 void userInfo(const SBuf &s) {userInfo_=s; touch();}
71 /// \returns raw userinfo subcomponent (or an empty string)
72 /// the caller is responsible for caller-specific encoding
73 const SBuf &userInfo() const {return userInfo_;}
74
75 void host(const char *src);
76 const char *host(void) const {return host_;}
77 int hostIsNumeric(void) const {return hostIsNumeric_;}
78 Ip::Address const & hostIP(void) const {return hostAddr_;}
79
80 /// Successfully interpreted non-empty host subcomponent of the authority
81 /// component (if any). XXX: Remove hostOrIp() and print Host instead.
82 std::optional<Host> parsedHost() const;
83
84 /// \returns the host subcomponent of the authority component
85 /// If the host is an IPv6 address, returns that IP address with
86 /// [brackets]. See RFC 3986 Section 3.2.2.
87 SBuf hostOrIp() const;
88
89 /// reset authority port subcomponent
90 void port(const Port p) { port_ = p; touch(); }
91 /// \copydoc port_
92 Port port() const { return port_; }
93 /// reset the port to the default port number for the current scheme
94 void defaultPort() { port(getScheme().defaultPort()); }
95
96 void path(const char *p) {path_=p; touch();}
97 void path(const SBuf &p) {path_=p; touch();}
98 const SBuf &path() const;
99
100 /**
101 * Merge a relative-path URL into the existing URI details.
102 * Implements RFC 3986 section 5.2.3
103 *
104 * The caller must ensure relUrl is a valid relative-path.
105 *
106 * NP: absolute-path are also accepted, but path() method
107 * should be used instead when possible.
108 */
109 void addRelativePath(const char *relUrl);
110
111 /// the static '/' default URL-path
112 static const SBuf &SlashPath();
113
114 /// the static '*' pseudo-URI
115 static const SBuf &Asterisk();
116
117 /// %-encode characters in a buffer which do not conform to
118 /// the provided set of expected characters.
119 static SBuf Encode(const SBuf &, const CharacterSet &expected);
120
121 /// %-decode the given buffer
122 static SBuf Decode(const SBuf &);
123
124 /**
125 * The authority-form URI for currently stored values.
126 *
127 * As defined by RFC 7230 section 5.3.3 this form omits the
128 * userinfo@ field from RFC 3986 defined authority segment.
129 *
130 * \param requirePort when true the port will be included, otherwise
131 * port will be elided when it is the default for
132 * the current scheme.
133 */
134 SBuf &authority(bool requirePort = false) const;
135
136 /**
137 * The absolute-form URI for currently stored values.
138 *
139 * As defined by RFC 7230 section 5.3.3 this form omits the
140 * userinfo@ field from RFC 3986 defined authority segments
141 * when the protocol scheme is http: or https:.
142 */
143 SBuf &absolute() const;
144
145 private:
146 void parseUrn(Parser::Tokenizer&);
147
148 SBuf parseHost(Parser::Tokenizer &) const;
149 int parsePort(Parser::Tokenizer &) const;
150
151 /**
152 \par
153 * The scheme of this URL. This has the 'type code' smell about it.
154 * In future we may want to make the methods that dispatch based on
155 * the scheme virtual and have a class per protocol.
156 \par
157 * On the other hand, having Protocol as an explicit concept is useful,
158 * see for instance the ACLProtocol acl type. One way to represent this
159 * is to have one prototype URL with no host etc for each scheme,
160 * another is to have an explicit scheme class, and then each URL class
161 * could be a subclass of the scheme. Another way is one instance of
162 * a AnyP::UriScheme class instance for each URL scheme we support, and one
163 * class for each manner of treating the scheme : a Hierarchical URL, a
164 * non-hierarchical URL etc.
165 \par
166 * Deferring the decision, its a type code for now. RBC 20060507.
167 \par
168 * In order to make taking any of these routes easy, scheme is private,
169 * only settable at construction time, or with explicit setter
170 */
171 AnyP::UriScheme scheme_;
172
173 SBuf userInfo_; // aka 'URL-login'
174
175 // XXX: uses char[] instead of SBUf to reduce performance regressions
176 // from c_str() since most code using this is not yet using SBuf
177 char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP
178 bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP
179 Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP
180
181 Port port_; ///< authority port subcomponent
182
183 // XXX: for now includes query-string.
184 SBuf path_; ///< URI path segment
185
186 // pre-assembled URI forms
187 mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
188 mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
189 mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI
190 };
191
192 inline std::ostream &
193 operator <<(std::ostream &os, const Uri &url)
194 {
195 // none means explicit empty string for scheme.
196 if (url.getScheme() != PROTO_NONE)
197 os << url.getScheme().image();
198 os << ":";
199
200 // no authority section on URN
201 if (url.getScheme() != PROTO_URN)
202 os << "//" << url.authority();
203
204 // path is what it is - including absent
205 os << url.path();
206 return os;
207 }
208
209 } // namespace AnyP
210
211 /* Deprecated functions for Legacy code handling URLs */
212
213 class HttpRequest;
214
215 void urlInitialize(void);
216 /// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
217 /// \returns a pointer to a local static buffer containing request URI
218 /// that honors strip_query_terms and %-encodes unsafe URI characters
219 char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &);
220 const char *urlCanonicalFakeHttps(const HttpRequest * request);
221 bool urlIsRelative(const char *);
222 char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name);
223 char *urlInternal(const char *dir, const char *name);
224 bool urlAppendDomain(char *host); ///< apply append_domain config to the given hostname
225
226 enum MatchDomainNameFlags {
227 mdnNone = 0,
228 mdnHonorWildcards = 1 << 0,
229 mdnRejectSubsubDomains = 1 << 1
230 };
231
232 /**
233 * matchDomainName() matches a hostname (usually extracted from traffic)
234 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
235 * according to the following rules:
236 *
237 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
238 * -------------|-------------|-----------|-----------------------
239 * foo.com | foo.com | YES | YES
240 * .foo.com | foo.com | YES | YES
241 * x.foo.com | foo.com | NO | NO
242 * foo.com | .foo.com | YES | YES
243 * .foo.com | .foo.com | YES | YES
244 * x.foo.com | .foo.com | YES | YES
245 * .x.foo.com | .foo.com | YES | NO
246 * y.x.foo.com | .foo.com | YES | NO
247 *
248 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
249 * optional wildcards on hostname:
250 *
251 * HOST | DOMAIN | MATCH?
252 * -------------|--------------|-------
253 * *.foo.com | x.foo.com | YES
254 * *.foo.com | .x.foo.com | YES
255 * *.foo.com | .foo.com | YES
256 * *.foo.com | foo.com | NO
257 *
258 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
259 * supported.
260 *
261 * \retval 0 means the host matches the domain
262 * \retval 1 means the host is greater than the domain
263 * \retval -1 means the host is less than the domain
264 */
265 int matchDomainName(const char *host, const char *domain, MatchDomainNameFlags flags = mdnNone);
266 bool urlCheckRequest(const HttpRequest *);
267 void urlExtMethodConfigure(void);
268
269 #endif /* SQUID_SRC_ANYP_URI_H */
270