]> git.ipfire.org Git - thirdparty/squid.git/blame - src/URL.h
Docs: Copyright updates for 2018 (#114)
[thirdparty/squid.git] / src / URL.h
CommitLineData
985c86bc 1/*
5b74111a 2 * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
985c86bc 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
985c86bc 7 */
8
9#ifndef SQUID_SRC_URL_H
10#define SQUID_SRC_URL_H
11
1ca54a54 12#include "anyp/UriScheme.h"
5c51bffb
AJ
13#include "ip/Address.h"
14#include "rfc2181.h"
65e41a45 15#include "sbuf/SBuf.h"
985c86bc 16
51b5dcf5
AJ
17#include <iosfwd>
18
63be0a78 19/**
63be0a78 20 * The URL class represents a Uniform Resource Location
5c51bffb
AJ
21 *
22 * Governed by RFC 3986
63be0a78 23 */
985c86bc 24class URL
25{
985c86bc 26 MEMPROXY_CLASS(URL);
741c2986
AJ
27
28public:
d59e4742
FC
29 URL() : hostIsNumeric_(false), port_(0) {*host_=0;}
30 URL(AnyP::UriScheme const &aScheme);
d31d59d8
AJ
31 URL(const URL &other) {
32 this->operator =(other);
33 }
34 URL &operator =(const URL &o) {
35 scheme_ = o.scheme_;
36 userInfo_ = o.userInfo_;
37 memcpy(host_, o.host_, sizeof(host_));
38 hostIsNumeric_ = o.hostIsNumeric_;
39 hostAddr_ = o.hostAddr_;
40 port_ = o.port_;
41 path_ = o.path_;
42 touch();
43 return *this;
44 }
4e3f4dc7
AJ
45
46 void clear() {
47 scheme_=AnyP::PROTO_NONE;
5c51bffb
AJ
48 hostIsNumeric_ = false;
49 *host_ = 0;
50 hostAddr_.setEmpty();
51 port_ = 0;
52 touch();
4e3f4dc7 53 }
5c51bffb 54 void touch(); ///< clear the cached URI display forms
4e3f4dc7 55
8babada0 56 bool parse(const HttpRequestMethod &, const char *url);
db59367a 57
1ca54a54 58 AnyP::UriScheme const & getScheme() const {return scheme_;}
985c86bc 59
4e3f4dc7 60 /// convert the URL scheme to that given
d31d59d8
AJ
61 void setScheme(const AnyP::ProtocolType &p, const char *str) {
62 scheme_ = AnyP::UriScheme(p, str);
63 touch();
64 }
4e3f4dc7 65
5c51bffb 66 void userInfo(const SBuf &s) {userInfo_=s; touch();}
92d6986d
AJ
67 const SBuf &userInfo() const {return userInfo_;}
68
5c51bffb
AJ
69 void host(const char *src);
70 const char *host(void) const {return host_;}
71 int hostIsNumeric(void) const {return hostIsNumeric_;}
72 Ip::Address const & hostIP(void) const {return hostAddr_;}
73
74 void port(unsigned short p) {port_=p; touch();}
75 unsigned short port() const {return port_;}
76
51b5dcf5
AJ
77 void path(const char *p) {path_=p; touch();}
78 void path(const SBuf &p) {path_=p; touch();}
79 const SBuf &path() const;
80
81 /// the static '/' default URL-path
82 static const SBuf &SlashPath();
83
2e260208
AJ
84 /// the static '*' pseudo-URL
85 static const SBuf &Asterisk();
86
5c51bffb
AJ
87 /**
88 * The authority-form URI for currently stored values.
89 *
90 * As defined by RFC 7230 section 5.3.3 this form omits the
91 * userinfo@ field from RFC 3986 defined authority segment.
92 *
93 * \param requirePort when true the port will be included, otherwise
94 * port will be elided when it is the default for
95 * the current scheme.
96 */
97 SBuf &authority(bool requirePort = false) const;
98
c823e2da
AJ
99 /**
100 * The absolute-form URI for currently stored values.
101 *
102 * As defined by RFC 7230 section 5.3.3 this form omits the
103 * userinfo@ field from RFC 3986 defined authority segments
104 * when the protocol scheme is http: or https:.
105 */
106 SBuf &absolute() const;
107
985c86bc 108private:
91489e45
AJ
109 void parseFinish(const AnyP::ProtocolType, const char *const, const char *const, const char *const, const SBuf &, const int);
110
63be0a78 111 /**
112 \par
113 * The scheme of this URL. This has the 'type code' smell about it.
26ac0430
AJ
114 * In future we may want to make the methods that dispatch based on
115 * the scheme virtual and have a class per protocol.
63be0a78 116 \par
117 * On the other hand, having Protocol as an explicit concept is useful,
985c86bc 118 * see for instance the ACLProtocol acl type. One way to represent this
26ac0430 119 * is to have one prototype URL with no host etc for each scheme,
985c86bc 120 * another is to have an explicit scheme class, and then each URL class
26ac0430 121 * could be a subclass of the scheme. Another way is one instance of
1ca54a54 122 * a AnyP::UriScheme class instance for each URL scheme we support, and one URL
985c86bc 123 * class for each manner of treating the scheme : a Hierarchical URL, a
63be0a78 124 * non-hierarchical URL etc.
125 \par
985c86bc 126 * Deferring the decision, its a type code for now. RBC 20060507.
63be0a78 127 \par
26ac0430 128 * In order to make taking any of these routes easy, scheme is private
985c86bc 129 * and immutable, only settable at construction time,
130 */
4e3f4dc7 131 AnyP::UriScheme scheme_;
92d6986d
AJ
132
133 SBuf userInfo_; // aka 'URL-login'
5c51bffb
AJ
134
135 // XXX: uses char[] instead of SBUf to reduce performance regressions
136 // from c_str() since most code using this is not yet using SBuf
137 char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP
138 bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP
139 Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP
140
141 unsigned short port_; ///< URL port
142
51b5dcf5
AJ
143 // XXX: for now includes query-string.
144 SBuf path_; ///< URL path segment
145
5c51bffb
AJ
146 // pre-assembled URL forms
147 mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
148 mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
c823e2da 149 mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI
985c86bc 150};
151
51b5dcf5
AJ
152inline std::ostream &
153operator <<(std::ostream &os, const URL &url)
154{
d31d59d8
AJ
155 // none means explicit empty string for scheme.
156 if (url.getScheme() != AnyP::PROTO_NONE)
157 os << url.getScheme().image();
158 os << ":";
159
160 // no authority section on URN
161 if (url.getScheme() != AnyP::PROTO_URN)
162 os << "//" << url.authority();
163
164 // path is what it is - including absent
165 os << url.path();
51b5dcf5
AJ
166 return os;
167}
168
fc54b8d2
FC
169class HttpRequest;
170class HttpRequestMethod;
171
8a648e8d 172void urlInitialize(void);
8a648e8d
FC
173char *urlCanonicalClean(const HttpRequest *);
174const char *urlCanonicalFakeHttps(const HttpRequest * request);
175bool urlIsRelative(const char *);
176char *urlMakeAbsolute(const HttpRequest *, const char *);
177char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name);
178char *urlInternal(const char *dir, const char *name);
69f69080 179
abbd7825
CT
180enum MatchDomainNameFlags {
181 mdnNone = 0,
182 mdnHonorWildcards = 1 << 0,
183 mdnRejectSubsubDomains = 1 << 1
184};
185
69f69080 186/**
abbd7825
CT
187 * matchDomainName() matches a hostname (usually extracted from traffic)
188 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
189 * according to the following rules:
69f69080 190 *
abbd7825
CT
191 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
192 * -------------|-------------|-----------|-----------------------
193 * foo.com | foo.com | YES | YES
194 * .foo.com | foo.com | YES | YES
195 * x.foo.com | foo.com | NO | NO
196 * foo.com | .foo.com | YES | YES
197 * .foo.com | .foo.com | YES | YES
198 * x.foo.com | .foo.com | YES | YES
199 * .x.foo.com | .foo.com | YES | NO
200 * y.x.foo.com | .foo.com | YES | NO
69f69080 201 *
abbd7825 202 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
69f69080
CT
203 * optional wildcards on hostname:
204 *
205 * HOST | DOMAIN | MATCH?
206 * -------------|--------------|-------
207 * *.foo.com | x.foo.com | YES
208 * *.foo.com | .x.foo.com | YES
209 * *.foo.com | .foo.com | YES
210 * *.foo.com | foo.com | NO
211 *
abbd7825
CT
212 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
213 * supported.
214 *
69f69080
CT
215 * \retval 0 means the host matches the domain
216 * \retval 1 means the host is greater than the domain
217 * \retval -1 means the host is less than the domain
218 */
abbd7825 219int matchDomainName(const char *host, const char *domain, uint flags = mdnNone);
8a648e8d 220int urlCheckRequest(const HttpRequest *);
8a648e8d
FC
221char *urlHostname(const char *url);
222void urlExtMethodConfigure(void);
fc54b8d2 223
985c86bc 224#endif /* SQUID_SRC_URL_H_H */
f53969cc 225