]> git.ipfire.org Git - thirdparty/squid.git/blame - src/anyp/Uri.h
Source Format Enforcement (#763)
[thirdparty/squid.git] / src / anyp / Uri.h
CommitLineData
985c86bc 1/*
f70aedc4 2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
985c86bc 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
985c86bc 7 */
8
c8ab5ec6
AJ
9#ifndef SQUID_SRC_ANYP_URI_H
10#define SQUID_SRC_ANYP_URI_H
985c86bc 11
1ca54a54 12#include "anyp/UriScheme.h"
5c51bffb
AJ
13#include "ip/Address.h"
14#include "rfc2181.h"
65e41a45 15#include "sbuf/SBuf.h"
985c86bc 16
51b5dcf5
AJ
17#include <iosfwd>
18
c8ab5ec6
AJ
19class HttpRequestMethod;
20
21namespace AnyP
22{
23
63be0a78 24/**
c8ab5ec6
AJ
25 * Represents a Uniform Resource Identifier.
26 * Can store both URL or URN representations.
5c51bffb
AJ
27 *
28 * Governed by RFC 3986
63be0a78 29 */
c8ab5ec6 30class Uri
985c86bc 31{
c8ab5ec6 32 MEMPROXY_CLASS(Uri);
741c2986
AJ
33
34public:
c8ab5ec6
AJ
35 Uri() : hostIsNumeric_(false), port_(0) {*host_=0;}
36 Uri(AnyP::UriScheme const &aScheme);
37 Uri(const Uri &other) {
d31d59d8
AJ
38 this->operator =(other);
39 }
c8ab5ec6 40 Uri &operator =(const Uri &o) {
d31d59d8
AJ
41 scheme_ = o.scheme_;
42 userInfo_ = o.userInfo_;
43 memcpy(host_, o.host_, sizeof(host_));
44 hostIsNumeric_ = o.hostIsNumeric_;
45 hostAddr_ = o.hostAddr_;
46 port_ = o.port_;
47 path_ = o.path_;
48 touch();
49 return *this;
50 }
4e3f4dc7
AJ
51
52 void clear() {
53 scheme_=AnyP::PROTO_NONE;
5c51bffb
AJ
54 hostIsNumeric_ = false;
55 *host_ = 0;
56 hostAddr_.setEmpty();
57 port_ = 0;
58 touch();
4e3f4dc7 59 }
5c51bffb 60 void touch(); ///< clear the cached URI display forms
4e3f4dc7 61
6c880a16 62 bool parse(const HttpRequestMethod &, const SBuf &url);
db59367a 63
bec110e4
EB
64 /// \return a new URI that honors uri_whitespace
65 static char *cleanup(const char *uri);
66
1ca54a54 67 AnyP::UriScheme const & getScheme() const {return scheme_;}
985c86bc 68
4e3f4dc7 69 /// convert the URL scheme to that given
d31d59d8
AJ
70 void setScheme(const AnyP::ProtocolType &p, const char *str) {
71 scheme_ = AnyP::UriScheme(p, str);
72 touch();
73 }
6c880a16
AJ
74 void setScheme(const AnyP::UriScheme &s) {
75 scheme_ = s;
76 touch();
77 }
4e3f4dc7 78
5c51bffb 79 void userInfo(const SBuf &s) {userInfo_=s; touch();}
614bd511
AJ
80 /// \returns raw userinfo subcomponent (or an empty string)
81 /// the caller is responsible for caller-specific encoding
92d6986d
AJ
82 const SBuf &userInfo() const {return userInfo_;}
83
5c51bffb
AJ
84 void host(const char *src);
85 const char *host(void) const {return host_;}
86 int hostIsNumeric(void) const {return hostIsNumeric_;}
87 Ip::Address const & hostIP(void) const {return hostAddr_;}
88
9ce4a1eb
CT
89 /// \returns the host subcomponent of the authority component
90 /// If the host is an IPv6 address, returns that IP address without
91 /// [brackets]! See RFC 3986 Section 3.2.2.
92 SBuf hostOrIp() const;
93
5c51bffb
AJ
94 void port(unsigned short p) {port_=p; touch();}
95 unsigned short port() const {return port_;}
d754c7af
EB
96 /// reset the port to the default port number for the current scheme
97 void defaultPort() { port(getScheme().defaultPort()); }
5c51bffb 98
51b5dcf5
AJ
99 void path(const char *p) {path_=p; touch();}
100 void path(const SBuf &p) {path_=p; touch();}
101 const SBuf &path() const;
102
614bd511
AJ
103 /**
104 * Merge a relative-path URL into the existing URI details.
105 * Implements RFC 3986 section 5.2.3
106 *
107 * The caller must ensure relUrl is a valid relative-path.
108 *
109 * NP: absolute-path are also accepted, but path() method
110 * should be used instead when possible.
111 */
112 void addRelativePath(const char *relUrl);
113
51b5dcf5
AJ
114 /// the static '/' default URL-path
115 static const SBuf &SlashPath();
116
c8ab5ec6 117 /// the static '*' pseudo-URI
2e260208
AJ
118 static const SBuf &Asterisk();
119
614bd511
AJ
120 /// %-encode characters in a buffer which do not conform to
121 /// the provided set of expected characters.
122 static SBuf Encode(const SBuf &, const CharacterSet &expected);
123
5c51bffb
AJ
124 /**
125 * The authority-form URI for currently stored values.
126 *
127 * As defined by RFC 7230 section 5.3.3 this form omits the
128 * userinfo@ field from RFC 3986 defined authority segment.
129 *
130 * \param requirePort when true the port will be included, otherwise
131 * port will be elided when it is the default for
132 * the current scheme.
133 */
134 SBuf &authority(bool requirePort = false) const;
135
c823e2da
AJ
136 /**
137 * The absolute-form URI for currently stored values.
138 *
139 * As defined by RFC 7230 section 5.3.3 this form omits the
140 * userinfo@ field from RFC 3986 defined authority segments
141 * when the protocol scheme is http: or https:.
142 */
143 SBuf &absolute() const;
144
985c86bc 145private:
6c880a16 146 void parseUrn(Parser::Tokenizer&);
91489e45 147
63be0a78 148 /**
149 \par
150 * The scheme of this URL. This has the 'type code' smell about it.
26ac0430
AJ
151 * In future we may want to make the methods that dispatch based on
152 * the scheme virtual and have a class per protocol.
63be0a78 153 \par
154 * On the other hand, having Protocol as an explicit concept is useful,
985c86bc 155 * see for instance the ACLProtocol acl type. One way to represent this
26ac0430 156 * is to have one prototype URL with no host etc for each scheme,
985c86bc 157 * another is to have an explicit scheme class, and then each URL class
26ac0430 158 * could be a subclass of the scheme. Another way is one instance of
c8ab5ec6 159 * a AnyP::UriScheme class instance for each URL scheme we support, and one
985c86bc 160 * class for each manner of treating the scheme : a Hierarchical URL, a
63be0a78 161 * non-hierarchical URL etc.
162 \par
985c86bc 163 * Deferring the decision, its a type code for now. RBC 20060507.
63be0a78 164 \par
c8ab5ec6
AJ
165 * In order to make taking any of these routes easy, scheme is private,
166 * only settable at construction time, or with explicit setter
985c86bc 167 */
4e3f4dc7 168 AnyP::UriScheme scheme_;
92d6986d
AJ
169
170 SBuf userInfo_; // aka 'URL-login'
5c51bffb
AJ
171
172 // XXX: uses char[] instead of SBUf to reduce performance regressions
173 // from c_str() since most code using this is not yet using SBuf
174 char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP
175 bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP
176 Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP
177
178 unsigned short port_; ///< URL port
179
51b5dcf5 180 // XXX: for now includes query-string.
c8ab5ec6 181 SBuf path_; ///< URI path segment
51b5dcf5 182
c8ab5ec6 183 // pre-assembled URI forms
5c51bffb
AJ
184 mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
185 mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
c823e2da 186 mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI
985c86bc 187};
188
c8ab5ec6
AJ
189} // namespace AnyP
190
51b5dcf5 191inline std::ostream &
c8ab5ec6 192operator <<(std::ostream &os, const AnyP::Uri &url)
51b5dcf5 193{
d31d59d8
AJ
194 // none means explicit empty string for scheme.
195 if (url.getScheme() != AnyP::PROTO_NONE)
196 os << url.getScheme().image();
197 os << ":";
198
199 // no authority section on URN
200 if (url.getScheme() != AnyP::PROTO_URN)
201 os << "//" << url.authority();
202
203 // path is what it is - including absent
204 os << url.path();
51b5dcf5
AJ
205 return os;
206}
207
c8ab5ec6
AJ
208/* Deprecated functions for Legacy code handling URLs */
209
fc54b8d2 210class HttpRequest;
fc54b8d2 211
8a648e8d 212void urlInitialize(void);
bec110e4
EB
213/// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
214/// \returns a pointer to a local static buffer containing request URI
215/// that honors strip_query_terms and %-encodes unsafe URI characters
216char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &);
8a648e8d
FC
217const char *urlCanonicalFakeHttps(const HttpRequest * request);
218bool urlIsRelative(const char *);
8a648e8d
FC
219char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name);
220char *urlInternal(const char *dir, const char *name);
38aa10ef 221bool urlAppendDomain(char *host); ///< apply append_domain config to the given hostname
69f69080 222
abbd7825
CT
223enum MatchDomainNameFlags {
224 mdnNone = 0,
225 mdnHonorWildcards = 1 << 0,
226 mdnRejectSubsubDomains = 1 << 1
227};
228
69f69080 229/**
abbd7825
CT
230 * matchDomainName() matches a hostname (usually extracted from traffic)
231 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
232 * according to the following rules:
69f69080 233 *
abbd7825
CT
234 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
235 * -------------|-------------|-----------|-----------------------
236 * foo.com | foo.com | YES | YES
237 * .foo.com | foo.com | YES | YES
238 * x.foo.com | foo.com | NO | NO
239 * foo.com | .foo.com | YES | YES
240 * .foo.com | .foo.com | YES | YES
241 * x.foo.com | .foo.com | YES | YES
242 * .x.foo.com | .foo.com | YES | NO
243 * y.x.foo.com | .foo.com | YES | NO
69f69080 244 *
abbd7825 245 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
69f69080
CT
246 * optional wildcards on hostname:
247 *
248 * HOST | DOMAIN | MATCH?
249 * -------------|--------------|-------
250 * *.foo.com | x.foo.com | YES
251 * *.foo.com | .x.foo.com | YES
252 * *.foo.com | .foo.com | YES
253 * *.foo.com | foo.com | NO
254 *
abbd7825
CT
255 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
256 * supported.
257 *
69f69080
CT
258 * \retval 0 means the host matches the domain
259 * \retval 1 means the host is greater than the domain
260 * \retval -1 means the host is less than the domain
261 */
6c1219b9 262int matchDomainName(const char *host, const char *domain, MatchDomainNameFlags flags = mdnNone);
8a648e8d 263int urlCheckRequest(const HttpRequest *);
8a648e8d 264void urlExtMethodConfigure(void);
fc54b8d2 265
c8ab5ec6 266#endif /* SQUID_SRC_ANYP_URI_H */
f53969cc 267