]> git.ipfire.org Git - thirdparty/squid.git/blob - src/URL.h
SourceFormat Enforcement
[thirdparty/squid.git] / src / URL.h
1 /*
2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #ifndef SQUID_SRC_URL_H
10 #define SQUID_SRC_URL_H
11
12 #include "anyp/UriScheme.h"
13 #include "ip/Address.h"
14 #include "rfc2181.h"
15 #include "sbuf/SBuf.h"
16
17 #include <iosfwd>
18
19 /**
20 * The URL class represents a Uniform Resource Location
21 *
22 * Governed by RFC 3986
23 */
24 class URL
25 {
26 MEMPROXY_CLASS(URL);
27
28 public:
29 URL() : hostIsNumeric_(false), port_(0) {*host_=0;}
30 URL(AnyP::UriScheme const &aScheme);
31 URL(const URL &other) {
32 this->operator =(other);
33 }
34 URL &operator =(const URL &o) {
35 scheme_ = o.scheme_;
36 userInfo_ = o.userInfo_;
37 memcpy(host_, o.host_, sizeof(host_));
38 hostIsNumeric_ = o.hostIsNumeric_;
39 hostAddr_ = o.hostAddr_;
40 port_ = o.port_;
41 path_ = o.path_;
42 touch();
43 return *this;
44 }
45
46 void clear() {
47 scheme_=AnyP::PROTO_NONE;
48 hostIsNumeric_ = false;
49 *host_ = 0;
50 hostAddr_.setEmpty();
51 port_ = 0;
52 touch();
53 }
54 void touch(); ///< clear the cached URI display forms
55
56 AnyP::UriScheme const & getScheme() const {return scheme_;}
57
58 /// convert the URL scheme to that given
59 void setScheme(const AnyP::ProtocolType &p, const char *str) {
60 scheme_ = AnyP::UriScheme(p, str);
61 touch();
62 }
63
64 void userInfo(const SBuf &s) {userInfo_=s; touch();}
65 const SBuf &userInfo() const {return userInfo_;}
66
67 void host(const char *src);
68 const char *host(void) const {return host_;}
69 int hostIsNumeric(void) const {return hostIsNumeric_;}
70 Ip::Address const & hostIP(void) const {return hostAddr_;}
71
72 void port(unsigned short p) {port_=p; touch();}
73 unsigned short port() const {return port_;}
74
75 void path(const char *p) {path_=p; touch();}
76 void path(const SBuf &p) {path_=p; touch();}
77 const SBuf &path() const;
78
79 /// the static '/' default URL-path
80 static const SBuf &SlashPath();
81
82 /// the static '*' pseudo-URL
83 static const SBuf &Asterisk();
84
85 /**
86 * The authority-form URI for currently stored values.
87 *
88 * As defined by RFC 7230 section 5.3.3 this form omits the
89 * userinfo@ field from RFC 3986 defined authority segment.
90 *
91 * \param requirePort when true the port will be included, otherwise
92 * port will be elided when it is the default for
93 * the current scheme.
94 */
95 SBuf &authority(bool requirePort = false) const;
96
97 /**
98 * The absolute-form URI for currently stored values.
99 *
100 * As defined by RFC 7230 section 5.3.3 this form omits the
101 * userinfo@ field from RFC 3986 defined authority segments
102 * when the protocol scheme is http: or https:.
103 */
104 SBuf &absolute() const;
105
106 private:
107 /**
108 \par
109 * The scheme of this URL. This has the 'type code' smell about it.
110 * In future we may want to make the methods that dispatch based on
111 * the scheme virtual and have a class per protocol.
112 \par
113 * On the other hand, having Protocol as an explicit concept is useful,
114 * see for instance the ACLProtocol acl type. One way to represent this
115 * is to have one prototype URL with no host etc for each scheme,
116 * another is to have an explicit scheme class, and then each URL class
117 * could be a subclass of the scheme. Another way is one instance of
118 * a AnyP::UriScheme class instance for each URL scheme we support, and one URL
119 * class for each manner of treating the scheme : a Hierarchical URL, a
120 * non-hierarchical URL etc.
121 \par
122 * Deferring the decision, its a type code for now. RBC 20060507.
123 \par
124 * In order to make taking any of these routes easy, scheme is private
125 * and immutable, only settable at construction time,
126 */
127 AnyP::UriScheme scheme_;
128
129 SBuf userInfo_; // aka 'URL-login'
130
131 // XXX: uses char[] instead of SBUf to reduce performance regressions
132 // from c_str() since most code using this is not yet using SBuf
133 char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP
134 bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP
135 Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP
136
137 unsigned short port_; ///< URL port
138
139 // XXX: for now includes query-string.
140 SBuf path_; ///< URL path segment
141
142 // pre-assembled URL forms
143 mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
144 mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
145 mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI
146 };
147
148 inline std::ostream &
149 operator <<(std::ostream &os, const URL &url)
150 {
151 // none means explicit empty string for scheme.
152 if (url.getScheme() != AnyP::PROTO_NONE)
153 os << url.getScheme().image();
154 os << ":";
155
156 // no authority section on URN
157 if (url.getScheme() != AnyP::PROTO_URN)
158 os << "//" << url.authority();
159
160 // path is what it is - including absent
161 os << url.path();
162 return os;
163 }
164
165 class HttpRequest;
166 class HttpRequestMethod;
167
168 void urlInitialize(void);
169 HttpRequest *urlParse(const HttpRequestMethod&, char *, HttpRequest *request = NULL);
170 char *urlCanonicalClean(const HttpRequest *);
171 const char *urlCanonicalFakeHttps(const HttpRequest * request);
172 bool urlIsRelative(const char *);
173 char *urlMakeAbsolute(const HttpRequest *, const char *);
174 char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name);
175 char *urlInternal(const char *dir, const char *name);
176
177 enum MatchDomainNameFlags {
178 mdnNone = 0,
179 mdnHonorWildcards = 1 << 0,
180 mdnRejectSubsubDomains = 1 << 1
181 };
182
183 /**
184 * matchDomainName() matches a hostname (usually extracted from traffic)
185 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
186 * according to the following rules:
187 *
188 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
189 * -------------|-------------|-----------|-----------------------
190 * foo.com | foo.com | YES | YES
191 * .foo.com | foo.com | YES | YES
192 * x.foo.com | foo.com | NO | NO
193 * foo.com | .foo.com | YES | YES
194 * .foo.com | .foo.com | YES | YES
195 * x.foo.com | .foo.com | YES | YES
196 * .x.foo.com | .foo.com | YES | NO
197 * y.x.foo.com | .foo.com | YES | NO
198 *
199 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
200 * optional wildcards on hostname:
201 *
202 * HOST | DOMAIN | MATCH?
203 * -------------|--------------|-------
204 * *.foo.com | x.foo.com | YES
205 * *.foo.com | .x.foo.com | YES
206 * *.foo.com | .foo.com | YES
207 * *.foo.com | foo.com | NO
208 *
209 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
210 * supported.
211 *
212 * \retval 0 means the host matches the domain
213 * \retval 1 means the host is greater than the domain
214 * \retval -1 means the host is less than the domain
215 */
216 int matchDomainName(const char *host, const char *domain, uint flags = mdnNone);
217 int urlCheckRequest(const HttpRequest *);
218 char *urlHostname(const char *url);
219 void urlExtMethodConfigure(void);
220
221 #endif /* SQUID_SRC_URL_H_H */
222