]> git.ipfire.org Git - thirdparty/squid.git/blame - src/anyp/Uri.h
Source Format Enforcement (#532)
[thirdparty/squid.git] / src / anyp / Uri.h
CommitLineData
985c86bc 1/*
77b1029d 2 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
985c86bc 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
985c86bc 7 */
8
c8ab5ec6
AJ
9#ifndef SQUID_SRC_ANYP_URI_H
10#define SQUID_SRC_ANYP_URI_H
985c86bc 11
1ca54a54 12#include "anyp/UriScheme.h"
5c51bffb
AJ
13#include "ip/Address.h"
14#include "rfc2181.h"
65e41a45 15#include "sbuf/SBuf.h"
985c86bc 16
51b5dcf5
AJ
17#include <iosfwd>
18
c8ab5ec6
AJ
19class HttpRequestMethod;
20
21namespace AnyP
22{
23
63be0a78 24/**
c8ab5ec6
AJ
25 * Represents a Uniform Resource Identifier.
26 * Can store both URL or URN representations.
5c51bffb
AJ
27 *
28 * Governed by RFC 3986
63be0a78 29 */
c8ab5ec6 30class Uri
985c86bc 31{
c8ab5ec6 32 MEMPROXY_CLASS(Uri);
741c2986
AJ
33
34public:
c8ab5ec6
AJ
35 Uri() : hostIsNumeric_(false), port_(0) {*host_=0;}
36 Uri(AnyP::UriScheme const &aScheme);
37 Uri(const Uri &other) {
d31d59d8
AJ
38 this->operator =(other);
39 }
c8ab5ec6 40 Uri &operator =(const Uri &o) {
d31d59d8
AJ
41 scheme_ = o.scheme_;
42 userInfo_ = o.userInfo_;
43 memcpy(host_, o.host_, sizeof(host_));
44 hostIsNumeric_ = o.hostIsNumeric_;
45 hostAddr_ = o.hostAddr_;
46 port_ = o.port_;
47 path_ = o.path_;
48 touch();
49 return *this;
50 }
4e3f4dc7
AJ
51
52 void clear() {
53 scheme_=AnyP::PROTO_NONE;
5c51bffb
AJ
54 hostIsNumeric_ = false;
55 *host_ = 0;
56 hostAddr_.setEmpty();
57 port_ = 0;
58 touch();
4e3f4dc7 59 }
5c51bffb 60 void touch(); ///< clear the cached URI display forms
4e3f4dc7 61
6c880a16 62 bool parse(const HttpRequestMethod &, const SBuf &url);
db59367a 63
bec110e4
EB
64 /// \return a new URI that honors uri_whitespace
65 static char *cleanup(const char *uri);
66
1ca54a54 67 AnyP::UriScheme const & getScheme() const {return scheme_;}
985c86bc 68
4e3f4dc7 69 /// convert the URL scheme to that given
d31d59d8
AJ
70 void setScheme(const AnyP::ProtocolType &p, const char *str) {
71 scheme_ = AnyP::UriScheme(p, str);
72 touch();
73 }
6c880a16
AJ
74 void setScheme(const AnyP::UriScheme &s) {
75 scheme_ = s;
76 touch();
77 }
4e3f4dc7 78
5c51bffb 79 void userInfo(const SBuf &s) {userInfo_=s; touch();}
92d6986d
AJ
80 const SBuf &userInfo() const {return userInfo_;}
81
5c51bffb
AJ
82 void host(const char *src);
83 const char *host(void) const {return host_;}
84 int hostIsNumeric(void) const {return hostIsNumeric_;}
85 Ip::Address const & hostIP(void) const {return hostAddr_;}
86
9ce4a1eb
CT
87 /// \returns the host subcomponent of the authority component
88 /// If the host is an IPv6 address, returns that IP address without
89 /// [brackets]! See RFC 3986 Section 3.2.2.
90 SBuf hostOrIp() const;
91
5c51bffb
AJ
92 void port(unsigned short p) {port_=p; touch();}
93 unsigned short port() const {return port_;}
d754c7af
EB
94 /// reset the port to the default port number for the current scheme
95 void defaultPort() { port(getScheme().defaultPort()); }
5c51bffb 96
51b5dcf5
AJ
97 void path(const char *p) {path_=p; touch();}
98 void path(const SBuf &p) {path_=p; touch();}
99 const SBuf &path() const;
100
101 /// the static '/' default URL-path
102 static const SBuf &SlashPath();
103
c8ab5ec6 104 /// the static '*' pseudo-URI
2e260208
AJ
105 static const SBuf &Asterisk();
106
5c51bffb
AJ
107 /**
108 * The authority-form URI for currently stored values.
109 *
110 * As defined by RFC 7230 section 5.3.3 this form omits the
111 * userinfo@ field from RFC 3986 defined authority segment.
112 *
113 * \param requirePort when true the port will be included, otherwise
114 * port will be elided when it is the default for
115 * the current scheme.
116 */
117 SBuf &authority(bool requirePort = false) const;
118
c823e2da
AJ
119 /**
120 * The absolute-form URI for currently stored values.
121 *
122 * As defined by RFC 7230 section 5.3.3 this form omits the
123 * userinfo@ field from RFC 3986 defined authority segments
124 * when the protocol scheme is http: or https:.
125 */
126 SBuf &absolute() const;
127
985c86bc 128private:
6c880a16 129 void parseUrn(Parser::Tokenizer&);
91489e45 130
63be0a78 131 /**
132 \par
133 * The scheme of this URL. This has the 'type code' smell about it.
26ac0430
AJ
134 * In future we may want to make the methods that dispatch based on
135 * the scheme virtual and have a class per protocol.
63be0a78 136 \par
137 * On the other hand, having Protocol as an explicit concept is useful,
985c86bc 138 * see for instance the ACLProtocol acl type. One way to represent this
26ac0430 139 * is to have one prototype URL with no host etc for each scheme,
985c86bc 140 * another is to have an explicit scheme class, and then each URL class
26ac0430 141 * could be a subclass of the scheme. Another way is one instance of
c8ab5ec6 142 * a AnyP::UriScheme class instance for each URL scheme we support, and one
985c86bc 143 * class for each manner of treating the scheme : a Hierarchical URL, a
63be0a78 144 * non-hierarchical URL etc.
145 \par
985c86bc 146 * Deferring the decision, its a type code for now. RBC 20060507.
63be0a78 147 \par
c8ab5ec6
AJ
148 * In order to make taking any of these routes easy, scheme is private,
149 * only settable at construction time, or with explicit setter
985c86bc 150 */
4e3f4dc7 151 AnyP::UriScheme scheme_;
92d6986d
AJ
152
153 SBuf userInfo_; // aka 'URL-login'
5c51bffb
AJ
154
155 // XXX: uses char[] instead of SBUf to reduce performance regressions
156 // from c_str() since most code using this is not yet using SBuf
157 char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP
158 bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP
159 Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP
160
161 unsigned short port_; ///< URL port
162
51b5dcf5 163 // XXX: for now includes query-string.
c8ab5ec6 164 SBuf path_; ///< URI path segment
51b5dcf5 165
c8ab5ec6 166 // pre-assembled URI forms
5c51bffb
AJ
167 mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
168 mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
c823e2da 169 mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI
985c86bc 170};
171
c8ab5ec6
AJ
172} // namespace AnyP
173
51b5dcf5 174inline std::ostream &
c8ab5ec6 175operator <<(std::ostream &os, const AnyP::Uri &url)
51b5dcf5 176{
d31d59d8
AJ
177 // none means explicit empty string for scheme.
178 if (url.getScheme() != AnyP::PROTO_NONE)
179 os << url.getScheme().image();
180 os << ":";
181
182 // no authority section on URN
183 if (url.getScheme() != AnyP::PROTO_URN)
184 os << "//" << url.authority();
185
186 // path is what it is - including absent
187 os << url.path();
51b5dcf5
AJ
188 return os;
189}
190
c8ab5ec6
AJ
191/* Deprecated functions for Legacy code handling URLs */
192
fc54b8d2 193class HttpRequest;
fc54b8d2 194
8a648e8d 195void urlInitialize(void);
bec110e4
EB
196/// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
197/// \returns a pointer to a local static buffer containing request URI
198/// that honors strip_query_terms and %-encodes unsafe URI characters
199char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &);
8a648e8d
FC
200const char *urlCanonicalFakeHttps(const HttpRequest * request);
201bool urlIsRelative(const char *);
202char *urlMakeAbsolute(const HttpRequest *, const char *);
203char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name);
204char *urlInternal(const char *dir, const char *name);
38aa10ef 205bool urlAppendDomain(char *host); ///< apply append_domain config to the given hostname
69f69080 206
abbd7825
CT
207enum MatchDomainNameFlags {
208 mdnNone = 0,
209 mdnHonorWildcards = 1 << 0,
210 mdnRejectSubsubDomains = 1 << 1
211};
212
69f69080 213/**
abbd7825
CT
214 * matchDomainName() matches a hostname (usually extracted from traffic)
215 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
216 * according to the following rules:
69f69080 217 *
abbd7825
CT
218 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
219 * -------------|-------------|-----------|-----------------------
220 * foo.com | foo.com | YES | YES
221 * .foo.com | foo.com | YES | YES
222 * x.foo.com | foo.com | NO | NO
223 * foo.com | .foo.com | YES | YES
224 * .foo.com | .foo.com | YES | YES
225 * x.foo.com | .foo.com | YES | YES
226 * .x.foo.com | .foo.com | YES | NO
227 * y.x.foo.com | .foo.com | YES | NO
69f69080 228 *
abbd7825 229 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
69f69080
CT
230 * optional wildcards on hostname:
231 *
232 * HOST | DOMAIN | MATCH?
233 * -------------|--------------|-------
234 * *.foo.com | x.foo.com | YES
235 * *.foo.com | .x.foo.com | YES
236 * *.foo.com | .foo.com | YES
237 * *.foo.com | foo.com | NO
238 *
abbd7825
CT
239 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
240 * supported.
241 *
69f69080
CT
242 * \retval 0 means the host matches the domain
243 * \retval 1 means the host is greater than the domain
244 * \retval -1 means the host is less than the domain
245 */
6c1219b9 246int matchDomainName(const char *host, const char *domain, MatchDomainNameFlags flags = mdnNone);
8a648e8d 247int urlCheckRequest(const HttpRequest *);
8a648e8d
FC
248char *urlHostname(const char *url);
249void urlExtMethodConfigure(void);
fc54b8d2 250
c8ab5ec6 251#endif /* SQUID_SRC_ANYP_URI_H */
f53969cc 252