]>
Commit | Line | Data |
---|---|---|
985c86bc | 1 | /* |
5b74111a | 2 | * Copyright (C) 1996-2018 The Squid Software Foundation and contributors |
985c86bc | 3 | * |
bbc27441 AJ |
4 | * Squid software is distributed under GPLv2+ license and includes |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
985c86bc | 7 | */ |
8 | ||
9 | #ifndef SQUID_SRC_URL_H | |
10 | #define SQUID_SRC_URL_H | |
11 | ||
1ca54a54 | 12 | #include "anyp/UriScheme.h" |
5c51bffb AJ |
13 | #include "ip/Address.h" |
14 | #include "rfc2181.h" | |
65e41a45 | 15 | #include "sbuf/SBuf.h" |
985c86bc | 16 | |
51b5dcf5 AJ |
17 | #include <iosfwd> |
18 | ||
63be0a78 | 19 | /** |
63be0a78 | 20 | * The URL class represents a Uniform Resource Location |
5c51bffb AJ |
21 | * |
22 | * Governed by RFC 3986 | |
63be0a78 | 23 | */ |
985c86bc | 24 | class URL |
25 | { | |
985c86bc | 26 | MEMPROXY_CLASS(URL); |
741c2986 AJ |
27 | |
28 | public: | |
d59e4742 FC |
29 | URL() : hostIsNumeric_(false), port_(0) {*host_=0;} |
30 | URL(AnyP::UriScheme const &aScheme); | |
d31d59d8 AJ |
31 | URL(const URL &other) { |
32 | this->operator =(other); | |
33 | } | |
34 | URL &operator =(const URL &o) { | |
35 | scheme_ = o.scheme_; | |
36 | userInfo_ = o.userInfo_; | |
37 | memcpy(host_, o.host_, sizeof(host_)); | |
38 | hostIsNumeric_ = o.hostIsNumeric_; | |
39 | hostAddr_ = o.hostAddr_; | |
40 | port_ = o.port_; | |
41 | path_ = o.path_; | |
42 | touch(); | |
43 | return *this; | |
44 | } | |
4e3f4dc7 AJ |
45 | |
46 | void clear() { | |
47 | scheme_=AnyP::PROTO_NONE; | |
5c51bffb AJ |
48 | hostIsNumeric_ = false; |
49 | *host_ = 0; | |
50 | hostAddr_.setEmpty(); | |
51 | port_ = 0; | |
52 | touch(); | |
4e3f4dc7 | 53 | } |
5c51bffb | 54 | void touch(); ///< clear the cached URI display forms |
4e3f4dc7 | 55 | |
8babada0 | 56 | bool parse(const HttpRequestMethod &, const char *url); |
db59367a | 57 | |
1ca54a54 | 58 | AnyP::UriScheme const & getScheme() const {return scheme_;} |
985c86bc | 59 | |
4e3f4dc7 | 60 | /// convert the URL scheme to that given |
d31d59d8 AJ |
61 | void setScheme(const AnyP::ProtocolType &p, const char *str) { |
62 | scheme_ = AnyP::UriScheme(p, str); | |
63 | touch(); | |
64 | } | |
4e3f4dc7 | 65 | |
5c51bffb | 66 | void userInfo(const SBuf &s) {userInfo_=s; touch();} |
92d6986d AJ |
67 | const SBuf &userInfo() const {return userInfo_;} |
68 | ||
5c51bffb AJ |
69 | void host(const char *src); |
70 | const char *host(void) const {return host_;} | |
71 | int hostIsNumeric(void) const {return hostIsNumeric_;} | |
72 | Ip::Address const & hostIP(void) const {return hostAddr_;} | |
73 | ||
74 | void port(unsigned short p) {port_=p; touch();} | |
75 | unsigned short port() const {return port_;} | |
76 | ||
51b5dcf5 AJ |
77 | void path(const char *p) {path_=p; touch();} |
78 | void path(const SBuf &p) {path_=p; touch();} | |
79 | const SBuf &path() const; | |
80 | ||
81 | /// the static '/' default URL-path | |
82 | static const SBuf &SlashPath(); | |
83 | ||
2e260208 AJ |
84 | /// the static '*' pseudo-URL |
85 | static const SBuf &Asterisk(); | |
86 | ||
5c51bffb AJ |
87 | /** |
88 | * The authority-form URI for currently stored values. | |
89 | * | |
90 | * As defined by RFC 7230 section 5.3.3 this form omits the | |
91 | * userinfo@ field from RFC 3986 defined authority segment. | |
92 | * | |
93 | * \param requirePort when true the port will be included, otherwise | |
94 | * port will be elided when it is the default for | |
95 | * the current scheme. | |
96 | */ | |
97 | SBuf &authority(bool requirePort = false) const; | |
98 | ||
c823e2da AJ |
99 | /** |
100 | * The absolute-form URI for currently stored values. | |
101 | * | |
102 | * As defined by RFC 7230 section 5.3.3 this form omits the | |
103 | * userinfo@ field from RFC 3986 defined authority segments | |
104 | * when the protocol scheme is http: or https:. | |
105 | */ | |
106 | SBuf &absolute() const; | |
107 | ||
985c86bc | 108 | private: |
91489e45 AJ |
109 | void parseFinish(const AnyP::ProtocolType, const char *const, const char *const, const char *const, const SBuf &, const int); |
110 | ||
63be0a78 | 111 | /** |
112 | \par | |
113 | * The scheme of this URL. This has the 'type code' smell about it. | |
26ac0430 AJ |
114 | * In future we may want to make the methods that dispatch based on |
115 | * the scheme virtual and have a class per protocol. | |
63be0a78 | 116 | \par |
117 | * On the other hand, having Protocol as an explicit concept is useful, | |
985c86bc | 118 | * see for instance the ACLProtocol acl type. One way to represent this |
26ac0430 | 119 | * is to have one prototype URL with no host etc for each scheme, |
985c86bc | 120 | * another is to have an explicit scheme class, and then each URL class |
26ac0430 | 121 | * could be a subclass of the scheme. Another way is one instance of |
1ca54a54 | 122 | * a AnyP::UriScheme class instance for each URL scheme we support, and one URL |
985c86bc | 123 | * class for each manner of treating the scheme : a Hierarchical URL, a |
63be0a78 | 124 | * non-hierarchical URL etc. |
125 | \par | |
985c86bc | 126 | * Deferring the decision, its a type code for now. RBC 20060507. |
63be0a78 | 127 | \par |
26ac0430 | 128 | * In order to make taking any of these routes easy, scheme is private |
985c86bc | 129 | * and immutable, only settable at construction time, |
130 | */ | |
4e3f4dc7 | 131 | AnyP::UriScheme scheme_; |
92d6986d AJ |
132 | |
133 | SBuf userInfo_; // aka 'URL-login' | |
5c51bffb AJ |
134 | |
135 | // XXX: uses char[] instead of SBUf to reduce performance regressions | |
136 | // from c_str() since most code using this is not yet using SBuf | |
137 | char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP | |
138 | bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP | |
139 | Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP | |
140 | ||
141 | unsigned short port_; ///< URL port | |
142 | ||
51b5dcf5 AJ |
143 | // XXX: for now includes query-string. |
144 | SBuf path_; ///< URL path segment | |
145 | ||
5c51bffb AJ |
146 | // pre-assembled URL forms |
147 | mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port | |
148 | mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port | |
c823e2da | 149 | mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI |
985c86bc | 150 | }; |
151 | ||
51b5dcf5 AJ |
152 | inline std::ostream & |
153 | operator <<(std::ostream &os, const URL &url) | |
154 | { | |
d31d59d8 AJ |
155 | // none means explicit empty string for scheme. |
156 | if (url.getScheme() != AnyP::PROTO_NONE) | |
157 | os << url.getScheme().image(); | |
158 | os << ":"; | |
159 | ||
160 | // no authority section on URN | |
161 | if (url.getScheme() != AnyP::PROTO_URN) | |
162 | os << "//" << url.authority(); | |
163 | ||
164 | // path is what it is - including absent | |
165 | os << url.path(); | |
51b5dcf5 AJ |
166 | return os; |
167 | } | |
168 | ||
fc54b8d2 FC |
169 | class HttpRequest; |
170 | class HttpRequestMethod; | |
171 | ||
8a648e8d | 172 | void urlInitialize(void); |
8a648e8d FC |
173 | char *urlCanonicalClean(const HttpRequest *); |
174 | const char *urlCanonicalFakeHttps(const HttpRequest * request); | |
175 | bool urlIsRelative(const char *); | |
176 | char *urlMakeAbsolute(const HttpRequest *, const char *); | |
177 | char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name); | |
178 | char *urlInternal(const char *dir, const char *name); | |
69f69080 | 179 | |
abbd7825 CT |
180 | enum MatchDomainNameFlags { |
181 | mdnNone = 0, | |
182 | mdnHonorWildcards = 1 << 0, | |
183 | mdnRejectSubsubDomains = 1 << 1 | |
184 | }; | |
185 | ||
69f69080 | 186 | /** |
abbd7825 CT |
187 | * matchDomainName() matches a hostname (usually extracted from traffic) |
188 | * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used | |
189 | * according to the following rules: | |
69f69080 | 190 | * |
abbd7825 CT |
191 | * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains |
192 | * -------------|-------------|-----------|----------------------- | |
193 | * foo.com | foo.com | YES | YES | |
194 | * .foo.com | foo.com | YES | YES | |
195 | * x.foo.com | foo.com | NO | NO | |
196 | * foo.com | .foo.com | YES | YES | |
197 | * .foo.com | .foo.com | YES | YES | |
198 | * x.foo.com | .foo.com | YES | YES | |
199 | * .x.foo.com | .foo.com | YES | NO | |
200 | * y.x.foo.com | .foo.com | YES | NO | |
69f69080 | 201 | * |
abbd7825 | 202 | * if mdnHonorWildcards flag is set then the matchDomainName() also accepts |
69f69080 CT |
203 | * optional wildcards on hostname: |
204 | * | |
205 | * HOST | DOMAIN | MATCH? | |
206 | * -------------|--------------|------- | |
207 | * *.foo.com | x.foo.com | YES | |
208 | * *.foo.com | .x.foo.com | YES | |
209 | * *.foo.com | .foo.com | YES | |
210 | * *.foo.com | foo.com | NO | |
211 | * | |
abbd7825 CT |
212 | * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is |
213 | * supported. | |
214 | * | |
69f69080 CT |
215 | * \retval 0 means the host matches the domain |
216 | * \retval 1 means the host is greater than the domain | |
217 | * \retval -1 means the host is less than the domain | |
218 | */ | |
abbd7825 | 219 | int matchDomainName(const char *host, const char *domain, uint flags = mdnNone); |
8a648e8d | 220 | int urlCheckRequest(const HttpRequest *); |
8a648e8d FC |
221 | char *urlHostname(const char *url); |
222 | void urlExtMethodConfigure(void); | |
fc54b8d2 | 223 | |
985c86bc | 224 | #endif /* SQUID_SRC_URL_H_H */ |
f53969cc | 225 |