]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/URL.h
2 * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 #ifndef SQUID_SRC_URL_H
10 #define SQUID_SRC_URL_H
12 #include "anyp/UriScheme.h"
13 #include "ip/Address.h"
15 #include "sbuf/SBuf.h"
20 * The URL class represents a Uniform Resource Location
22 * Governed by RFC 3986
29 URL() : hostIsNumeric_(false), port_(0) {*host_
=0;}
30 URL(AnyP::UriScheme
const &aScheme
);
31 URL(const URL
&other
) {
32 this->operator =(other
);
34 URL
&operator =(const URL
&o
) {
36 userInfo_
= o
.userInfo_
;
37 memcpy(host_
, o
.host_
, sizeof(host_
));
38 hostIsNumeric_
= o
.hostIsNumeric_
;
39 hostAddr_
= o
.hostAddr_
;
47 scheme_
=AnyP::PROTO_NONE
;
48 hostIsNumeric_
= false;
54 void touch(); ///< clear the cached URI display forms
56 bool parse(const HttpRequestMethod
&, const char *url
);
58 AnyP::UriScheme
const & getScheme() const {return scheme_
;}
60 /// convert the URL scheme to that given
61 void setScheme(const AnyP::ProtocolType
&p
, const char *str
) {
62 scheme_
= AnyP::UriScheme(p
, str
);
66 void userInfo(const SBuf
&s
) {userInfo_
=s
; touch();}
67 const SBuf
&userInfo() const {return userInfo_
;}
69 void host(const char *src
);
70 const char *host(void) const {return host_
;}
71 int hostIsNumeric(void) const {return hostIsNumeric_
;}
72 Ip::Address
const & hostIP(void) const {return hostAddr_
;}
74 void port(unsigned short p
) {port_
=p
; touch();}
75 unsigned short port() const {return port_
;}
77 void path(const char *p
) {path_
=p
; touch();}
78 void path(const SBuf
&p
) {path_
=p
; touch();}
79 const SBuf
&path() const;
81 /// the static '/' default URL-path
82 static const SBuf
&SlashPath();
84 /// the static '*' pseudo-URL
85 static const SBuf
&Asterisk();
88 * The authority-form URI for currently stored values.
90 * As defined by RFC 7230 section 5.3.3 this form omits the
91 * userinfo@ field from RFC 3986 defined authority segment.
93 * \param requirePort when true the port will be included, otherwise
94 * port will be elided when it is the default for
97 SBuf
&authority(bool requirePort
= false) const;
100 * The absolute-form URI for currently stored values.
102 * As defined by RFC 7230 section 5.3.3 this form omits the
103 * userinfo@ field from RFC 3986 defined authority segments
104 * when the protocol scheme is http: or https:.
106 SBuf
&absolute() const;
109 void parseFinish(const AnyP::ProtocolType
, const char *const, const char *const, const char *const, const SBuf
&, const int);
113 * The scheme of this URL. This has the 'type code' smell about it.
114 * In future we may want to make the methods that dispatch based on
115 * the scheme virtual and have a class per protocol.
117 * On the other hand, having Protocol as an explicit concept is useful,
118 * see for instance the ACLProtocol acl type. One way to represent this
119 * is to have one prototype URL with no host etc for each scheme,
120 * another is to have an explicit scheme class, and then each URL class
121 * could be a subclass of the scheme. Another way is one instance of
122 * a AnyP::UriScheme class instance for each URL scheme we support, and one URL
123 * class for each manner of treating the scheme : a Hierarchical URL, a
124 * non-hierarchical URL etc.
126 * Deferring the decision, its a type code for now. RBC 20060507.
128 * In order to make taking any of these routes easy, scheme is private
129 * and immutable, only settable at construction time,
131 AnyP::UriScheme scheme_
;
133 SBuf userInfo_
; // aka 'URL-login'
135 // XXX: uses char[] instead of SBUf to reduce performance regressions
136 // from c_str() since most code using this is not yet using SBuf
137 char host_
[SQUIDHOSTNAMELEN
]; ///< string representation of the URI authority name or IP
138 bool hostIsNumeric_
; ///< whether the authority 'host' is a raw-IP
139 Ip::Address hostAddr_
; ///< binary representation of the URI authority if it is a raw-IP
141 unsigned short port_
; ///< URL port
143 // XXX: for now includes query-string.
144 SBuf path_
; ///< URL path segment
146 // pre-assembled URL forms
147 mutable SBuf authorityHttp_
; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
148 mutable SBuf authorityWithPort_
; ///< RFC 7230 section 5.3.3 authority with explicit port
149 mutable SBuf absolute_
; ///< RFC 7230 section 5.3.2 absolute-URI
152 inline std::ostream
&
153 operator <<(std::ostream
&os
, const URL
&url
)
155 // none means explicit empty string for scheme.
156 if (url
.getScheme() != AnyP::PROTO_NONE
)
157 os
<< url
.getScheme().image();
160 // no authority section on URN
161 if (url
.getScheme() != AnyP::PROTO_URN
)
162 os
<< "//" << url
.authority();
164 // path is what it is - including absent
170 class HttpRequestMethod
;
172 void urlInitialize(void);
173 char *urlCanonicalClean(const HttpRequest
*);
174 const char *urlCanonicalFakeHttps(const HttpRequest
* request
);
175 bool urlIsRelative(const char *);
176 char *urlMakeAbsolute(const HttpRequest
*, const char *);
177 char *urlRInternal(const char *host
, unsigned short port
, const char *dir
, const char *name
);
178 char *urlInternal(const char *dir
, const char *name
);
180 enum MatchDomainNameFlags
{
182 mdnHonorWildcards
= 1 << 0,
183 mdnRejectSubsubDomains
= 1 << 1
187 * matchDomainName() matches a hostname (usually extracted from traffic)
188 * with a domainname when mdnNone or mdnRejectSubsubDomains flags are used
189 * according to the following rules:
191 * HOST | DOMAIN | mdnNone | mdnRejectSubsubDomains
192 * -------------|-------------|-----------|-----------------------
193 * foo.com | foo.com | YES | YES
194 * .foo.com | foo.com | YES | YES
195 * x.foo.com | foo.com | NO | NO
196 * foo.com | .foo.com | YES | YES
197 * .foo.com | .foo.com | YES | YES
198 * x.foo.com | .foo.com | YES | YES
199 * .x.foo.com | .foo.com | YES | NO
200 * y.x.foo.com | .foo.com | YES | NO
202 * if mdnHonorWildcards flag is set then the matchDomainName() also accepts
203 * optional wildcards on hostname:
205 * HOST | DOMAIN | MATCH?
206 * -------------|--------------|-------
207 * *.foo.com | x.foo.com | YES
208 * *.foo.com | .x.foo.com | YES
209 * *.foo.com | .foo.com | YES
210 * *.foo.com | foo.com | NO
212 * The combination of mdnHonorWildcards and mdnRejectSubsubDomains flags is
215 * \retval 0 means the host matches the domain
216 * \retval 1 means the host is greater than the domain
217 * \retval -1 means the host is less than the domain
219 int matchDomainName(const char *host
, const char *domain
, uint flags
= mdnNone
);
220 int urlCheckRequest(const HttpRequest
*);
221 char *urlHostname(const char *url
);
222 void urlExtMethodConfigure(void);
224 #endif /* SQUID_SRC_URL_H_H */