]> git.ipfire.org Git - thirdparty/squid.git/blame - src/URL.h
SourceFormat Enforcement
[thirdparty/squid.git] / src / URL.h
CommitLineData
985c86bc 1/*
ef57eb7b 2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
985c86bc 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
985c86bc 7 */
8
9#ifndef SQUID_SRC_URL_H
10#define SQUID_SRC_URL_H
11
1ca54a54 12#include "anyp/UriScheme.h"
5c51bffb
AJ
13#include "ip/Address.h"
14#include "rfc2181.h"
92d6986d 15#include "SBuf.h"
985c86bc 16
51b5dcf5
AJ
17#include <iosfwd>
18
63be0a78 19/**
63be0a78 20 * The URL class represents a Uniform Resource Location
5c51bffb
AJ
21 *
22 * Governed by RFC 3986
63be0a78 23 */
985c86bc 24class URL
25{
985c86bc 26 MEMPROXY_CLASS(URL);
741c2986
AJ
27
28public:
d59e4742
FC
29 URL() : hostIsNumeric_(false), port_(0) {*host_=0;}
30 URL(AnyP::UriScheme const &aScheme);
4e3f4dc7
AJ
31
32 void clear() {
33 scheme_=AnyP::PROTO_NONE;
5c51bffb
AJ
34 hostIsNumeric_ = false;
35 *host_ = 0;
36 hostAddr_.setEmpty();
37 port_ = 0;
38 touch();
4e3f4dc7 39 }
5c51bffb 40 void touch(); ///< clear the cached URI display forms
4e3f4dc7 41
1ca54a54 42 AnyP::UriScheme const & getScheme() const {return scheme_;}
985c86bc 43
4e3f4dc7 44 /// convert the URL scheme to that given
5c51bffb 45 void setScheme(const AnyP::ProtocolType &p) {scheme_=p; touch();}
4e3f4dc7 46
5c51bffb 47 void userInfo(const SBuf &s) {userInfo_=s; touch();}
92d6986d
AJ
48 const SBuf &userInfo() const {return userInfo_;}
49
5c51bffb
AJ
50 void host(const char *src);
51 const char *host(void) const {return host_;}
52 int hostIsNumeric(void) const {return hostIsNumeric_;}
53 Ip::Address const & hostIP(void) const {return hostAddr_;}
54
55 void port(unsigned short p) {port_=p; touch();}
56 unsigned short port() const {return port_;}
57
51b5dcf5
AJ
58 void path(const char *p) {path_=p; touch();}
59 void path(const SBuf &p) {path_=p; touch();}
60 const SBuf &path() const;
61
62 /// the static '/' default URL-path
63 static const SBuf &SlashPath();
64
2e260208
AJ
65 /// the static '*' pseudo-URL
66 static const SBuf &Asterisk();
67
5c51bffb
AJ
68 /**
69 * The authority-form URI for currently stored values.
70 *
71 * As defined by RFC 7230 section 5.3.3 this form omits the
72 * userinfo@ field from RFC 3986 defined authority segment.
73 *
74 * \param requirePort when true the port will be included, otherwise
75 * port will be elided when it is the default for
76 * the current scheme.
77 */
78 SBuf &authority(bool requirePort = false) const;
79
c823e2da
AJ
80 /**
81 * The absolute-form URI for currently stored values.
82 *
83 * As defined by RFC 7230 section 5.3.3 this form omits the
84 * userinfo@ field from RFC 3986 defined authority segments
85 * when the protocol scheme is http: or https:.
86 */
87 SBuf &absolute() const;
88
985c86bc 89private:
63be0a78 90 /**
91 \par
92 * The scheme of this URL. This has the 'type code' smell about it.
26ac0430
AJ
93 * In future we may want to make the methods that dispatch based on
94 * the scheme virtual and have a class per protocol.
63be0a78 95 \par
96 * On the other hand, having Protocol as an explicit concept is useful,
985c86bc 97 * see for instance the ACLProtocol acl type. One way to represent this
26ac0430 98 * is to have one prototype URL with no host etc for each scheme,
985c86bc 99 * another is to have an explicit scheme class, and then each URL class
26ac0430 100 * could be a subclass of the scheme. Another way is one instance of
1ca54a54 101 * a AnyP::UriScheme class instance for each URL scheme we support, and one URL
985c86bc 102 * class for each manner of treating the scheme : a Hierarchical URL, a
63be0a78 103 * non-hierarchical URL etc.
104 \par
985c86bc 105 * Deferring the decision, its a type code for now. RBC 20060507.
63be0a78 106 \par
26ac0430 107 * In order to make taking any of these routes easy, scheme is private
985c86bc 108 * and immutable, only settable at construction time,
109 */
4e3f4dc7 110 AnyP::UriScheme scheme_;
92d6986d
AJ
111
112 SBuf userInfo_; // aka 'URL-login'
5c51bffb
AJ
113
114 // XXX: uses char[] instead of SBUf to reduce performance regressions
115 // from c_str() since most code using this is not yet using SBuf
116 char host_[SQUIDHOSTNAMELEN]; ///< string representation of the URI authority name or IP
117 bool hostIsNumeric_; ///< whether the authority 'host' is a raw-IP
118 Ip::Address hostAddr_; ///< binary representation of the URI authority if it is a raw-IP
119
120 unsigned short port_; ///< URL port
121
51b5dcf5
AJ
122 // XXX: for now includes query-string.
123 SBuf path_; ///< URL path segment
124
5c51bffb
AJ
125 // pre-assembled URL forms
126 mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port
127 mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
c823e2da 128 mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI
985c86bc 129};
130
51b5dcf5
AJ
131inline std::ostream &
132operator <<(std::ostream &os, const URL &url)
133{
134 if (const char *sc = url.getScheme().c_str())
135 os << sc << ":";
136 os << "//" << url.authority() << url.path();
137 return os;
138}
139
fc54b8d2
FC
140class HttpRequest;
141class HttpRequestMethod;
142
8a648e8d
FC
143void urlInitialize(void);
144HttpRequest *urlParse(const HttpRequestMethod&, char *, HttpRequest *request = NULL);
8a648e8d
FC
145char *urlCanonicalClean(const HttpRequest *);
146const char *urlCanonicalFakeHttps(const HttpRequest * request);
147bool urlIsRelative(const char *);
148char *urlMakeAbsolute(const HttpRequest *, const char *);
149char *urlRInternal(const char *host, unsigned short port, const char *dir, const char *name);
150char *urlInternal(const char *dir, const char *name);
69f69080
CT
151
152/**
153 * matchDomainName() compares a hostname (usually extracted from traffic)
154 * with a domainname (usually from an ACL) according to the following rules:
155 *
156 * HOST | DOMAIN | MATCH?
157 * -------------|-------------|------
158 * foo.com | foo.com | YES
159 * .foo.com | foo.com | YES
160 * x.foo.com | foo.com | NO
161 * foo.com | .foo.com | YES
162 * .foo.com | .foo.com | YES
163 * x.foo.com | .foo.com | YES
164 *
165 * We strip leading dots on hosts (but not domains!) so that
166 * ".foo.com" is always the same as "foo.com".
167 *
168 * if honorWildcards is true then the matchDomainName() also accepts
169 * optional wildcards on hostname:
170 *
171 * HOST | DOMAIN | MATCH?
172 * -------------|--------------|-------
173 * *.foo.com | x.foo.com | YES
174 * *.foo.com | .x.foo.com | YES
175 * *.foo.com | .foo.com | YES
176 * *.foo.com | foo.com | NO
177 *
178 * \retval 0 means the host matches the domain
179 * \retval 1 means the host is greater than the domain
180 * \retval -1 means the host is less than the domain
181 */
182int matchDomainName(const char *host, const char *domain, bool honorWildcards = false);
8a648e8d 183int urlCheckRequest(const HttpRequest *);
8a648e8d
FC
184char *urlHostname(const char *url);
185void urlExtMethodConfigure(void);
fc54b8d2 186
985c86bc 187#endif /* SQUID_SRC_URL_H_H */
f53969cc 188