/* Unfortunately, some glibc versions hide parts of RFC 3542 API
if _GNU_SOURCE is not defined. */
-#define _GNU_SOURCE 1
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
+#include <poll.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include "nest/bird.h"
#include "lib/lists.h"
#include "lib/resource.h"
-#include "lib/timer.h"
#include "lib/socket.h"
#include "lib/event.h"
+#include "lib/timer.h"
#include "lib/string.h"
#include "nest/iface.h"
+#include "conf/conf.h"
-#include "lib/unix.h"
-#include "lib/sysio.h"
+#include "sysdep/unix/unix.h"
+#include CONFIG_INCLUDE_SYSIO_H
/* Maximum number of calls of tx handler for one socket in one
- * select iteration. Should be small enough to not monopolize CPU by
+ * poll iteration. Should be small enough to not monopolize CPU by
* one protocol instance.
*/
#define MAX_STEPS 4
-/* Maximum number of calls of rx handler for all sockets in one select
+/* Maximum number of calls of rx handler for all sockets in one poll
iteration. RX callbacks are often much more costly so we limit
this to gen small latencies */
#define MAX_RX_STEPS 4
+
/*
* Tracked Files
*/
NULL
};
-void *
-tracked_fopen(pool *p, char *name, char *mode)
+struct rfile *
+rf_open(pool *p, char *name, char *mode)
{
FILE *f = fopen(name, mode);
- if (f)
- {
- struct rfile *r = ralloc(p, &rf_class);
- r->f = f;
- }
- return f;
-}
-
-/**
- * DOC: Timers
- *
- * Timers are resources which represent a wish of a module to call
- * a function at the specified time. The platform dependent code
- * doesn't guarantee exact timing, only that a timer function
- * won't be called before the requested time.
- *
- * In BIRD, time is represented by values of the &bird_clock_t type
- * which are integral numbers interpreted as a relative number of seconds since
- * some fixed time point in past. The current time can be read
- * from variable @now with reasonable accuracy and is monotonic. There is also
- * a current 'absolute' time in variable @now_real reported by OS.
- *
- * Each timer is described by a &timer structure containing a pointer
- * to the handler function (@hook), data private to this function (@data),
- * time the function should be called at (@expires, 0 for inactive timers),
- * for the other fields see |timer.h|.
- */
-
-#define NEAR_TIMER_LIMIT 4
-
-static list near_timers, far_timers;
-static bird_clock_t first_far_timer = TIME_INFINITY;
-
-/* now must be different from 0, because 0 is a special value in timer->expires */
-bird_clock_t now = 1, now_real, boot_time;
-
-static void
-update_times_plain(void)
-{
- bird_clock_t new_time = time(NULL);
- int delta = new_time - now_real;
-
- if ((delta >= 0) && (delta < 60))
- now += delta;
- else if (now_real != 0)
- log(L_WARN "Time jump, delta %d s", delta);
-
- now_real = new_time;
-}
-
-static void
-update_times_gettime(void)
-{
- struct timespec ts;
- int rv;
-
- rv = clock_gettime(CLOCK_MONOTONIC, &ts);
- if (rv != 0)
- die("clock_gettime: %m");
-
- if (ts.tv_sec != now) {
- if (ts.tv_sec < now)
- log(L_ERR "Monotonic timer is broken");
-
- now = ts.tv_sec;
- now_real = time(NULL);
- }
-}
-
-static int clock_monotonic_available;
-
-static inline void
-update_times(void)
-{
- if (clock_monotonic_available)
- update_times_gettime();
- else
- update_times_plain();
-}
+ if (!f)
+ return NULL;
-static inline void
-init_times(void)
-{
- struct timespec ts;
- clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
- if (!clock_monotonic_available)
- log(L_WARN "Monotonic timer is missing");
+ struct rfile *r = ralloc(p, &rf_class);
+ r->f = f;
+ return r;
}
-
-static void
-tm_free(resource *r)
+void *
+rf_file(struct rfile *f)
{
- timer *t = (timer *) r;
-
- tm_stop(t);
+ return f->f;
}
-static void
-tm_dump(resource *r)
+int
+rf_fileno(struct rfile *f)
{
- timer *t = (timer *) r;
-
- debug("(code %p, data %p, ", t->hook, t->data);
- if (t->randomize)
- debug("rand %d, ", t->randomize);
- if (t->recurrent)
- debug("recur %d, ", t->recurrent);
- if (t->expires)
- debug("expires in %d sec)\n", t->expires - now);
- else
- debug("inactive)\n");
+ return fileno(f->f);
}
-static struct resclass tm_class = {
- "Timer",
- sizeof(timer),
- tm_free,
- tm_dump,
- NULL,
- NULL
-};
-/**
- * tm_new - create a timer
- * @p: pool
- *
- * This function creates a new timer resource and returns
- * a pointer to it. To use the timer, you need to fill in
- * the structure fields and call tm_start() to start timing.
+/*
+ * Time clock
*/
-timer *
-tm_new(pool *p)
-{
- timer *t = ralloc(p, &tm_class);
- return t;
-}
-
-static inline void
-tm_insert_near(timer *t)
-{
- node *n = HEAD(near_timers);
- while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
- n = n->next;
- insert_node(&t->n, n->prev);
-}
+btime boot_time;
-/**
- * tm_start - start a timer
- * @t: timer
- * @after: number of seconds the timer should be run after
- *
- * This function schedules the hook function of the timer to
- * be called after @after seconds. If the timer has been already
- * started, it's @expire time is replaced by the new value.
- *
- * You can have set the @randomize field of @t, the timeout
- * will be increased by a random number of seconds chosen
- * uniformly from range 0 .. @randomize.
- *
- * You can call tm_start() from the handler function of the timer
- * to request another run of the timer. Also, you can set the @recurrent
- * field to have the timer re-added automatically with the same timeout.
- */
void
-tm_start(timer *t, unsigned after)
+times_init(struct timeloop *loop)
{
- bird_clock_t when;
-
- if (t->randomize)
- after += random() % (t->randomize + 1);
- when = now + after;
- if (t->expires == when)
- return;
- if (t->expires)
- rem_node(&t->n);
- t->expires = when;
- if (after <= NEAR_TIMER_LIMIT)
- tm_insert_near(t);
- else
- {
- if (!first_far_timer || first_far_timer > when)
- first_far_timer = when;
- add_tail(&far_timers, &t->n);
- }
-}
+ struct timespec ts;
+ int rv;
-/**
- * tm_stop - stop a timer
- * @t: timer
- *
- * This function stops a timer. If the timer is already stopped,
- * nothing happens.
- */
-void
-tm_stop(timer *t)
-{
- if (t->expires)
- {
- rem_node(&t->n);
- t->expires = 0;
- }
-}
+ rv = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rv < 0)
+ die("Monotonic clock is missing");
-static void
-tm_dump_them(char *name, list *l)
-{
- node *n;
- timer *t;
+ if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40)))
+ log(L_WARN "Monotonic clock is crazy");
- debug("%s timers:\n", name);
- WALK_LIST(n, *l)
- {
- t = SKIP_BACK(timer, n, n);
- debug("%p ", t);
- tm_dump(&t->r);
- }
- debug("\n");
+ loop->last_time = ts.tv_sec S + ts.tv_nsec NS;
+ loop->real_time = 0;
}
void
-tm_dump_all(void)
+times_update(struct timeloop *loop)
{
- tm_dump_them("Near", &near_timers);
- tm_dump_them("Far", &far_timers);
-}
-
-static inline time_t
-tm_first_shot(void)
-{
- time_t x = first_far_timer;
-
- if (!EMPTY_LIST(near_timers))
- {
- timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
- if (t->expires < x)
- x = t->expires;
- }
- return x;
-}
-
-void io_log_event(void *hook, void *data);
-
-static void
-tm_shot(void)
-{
- timer *t;
- node *n, *m;
-
- if (first_far_timer <= now)
- {
- bird_clock_t limit = now + NEAR_TIMER_LIMIT;
- first_far_timer = TIME_INFINITY;
- n = HEAD(far_timers);
- while (m = n->next)
- {
- t = SKIP_BACK(timer, n, n);
- if (t->expires <= limit)
- {
- rem_node(n);
- tm_insert_near(t);
- }
- else if (t->expires < first_far_timer)
- first_far_timer = t->expires;
- n = m;
- }
- }
- while ((n = HEAD(near_timers)) -> next)
- {
- int delay;
- t = SKIP_BACK(timer, n, n);
- if (t->expires > now)
- break;
- rem_node(n);
- delay = t->expires - now;
- t->expires = 0;
- if (t->recurrent)
- {
- int i = t->recurrent - delay;
- if (i < 0)
- i = 0;
- tm_start(t, i);
- }
- io_log_event(t->hook, t->data);
- t->hook(t);
- }
-}
+ struct timespec ts;
+ int rv;
-/**
- * tm_parse_datetime - parse a date and time
- * @x: datetime string
- *
- * tm_parse_datetime() takes a textual representation of
- * a date and time (dd-mm-yyyy hh:mm:ss)
- * and converts it to the corresponding value of type &bird_clock_t.
- */
-bird_clock_t
-tm_parse_datetime(char *x)
-{
- struct tm tm;
- int n;
- time_t t;
-
- if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
- return tm_parse_date(x);
- tm.tm_mon--;
- tm.tm_year -= 1900;
- t = mktime(&tm);
- if (t == (time_t) -1)
- return 0;
- return t;
-}
-/**
- * tm_parse_date - parse a date
- * @x: date string
- *
- * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
- * and converts it to the corresponding value of type &bird_clock_t.
- */
-bird_clock_t
-tm_parse_date(char *x)
-{
- struct tm tm;
- int n;
- time_t t;
+ rv = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rv < 0)
+ die("clock_gettime: %m");
- if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
- return 0;
- tm.tm_mon--;
- tm.tm_year -= 1900;
- tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
- t = mktime(&tm);
- if (t == (time_t) -1)
- return 0;
- return t;
-}
+ btime new_time = ts.tv_sec S + ts.tv_nsec NS;
-static void
-tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
-{
- static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+ if (new_time < loop->last_time)
+ log(L_ERR "Monotonic clock is broken");
- if (delta < 20*3600)
- bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
- else if (delta < 360*86400)
- bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
- else
- bsprintf(x, "%d", tm->tm_year+1900);
+ loop->last_time = new_time;
+ loop->real_time = 0;
}
-#include "conf/conf.h"
-
-/**
- * tm_format_datetime - convert date and time to textual representation
- * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
- * @t: time
- *
- * This function formats the given relative time value @t to a textual
- * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
- */
void
-tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
+times_update_real_time(struct timeloop *loop)
{
- const char *fmt_used;
- struct tm *tm;
- bird_clock_t delta = now - t;
- t = now_real - delta;
- tm = localtime(&t);
-
- if (fmt_spec->fmt1 == NULL)
- return tm_format_reltime(x, tm, delta);
+ struct timespec ts;
+ int rv;
- if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
- fmt_used = fmt_spec->fmt1;
- else
- fmt_used = fmt_spec->fmt2;
+ rv = clock_gettime(CLOCK_REALTIME, &ts);
+ if (rv < 0)
+ die("clock_gettime: %m");
- int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
- if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
- strcpy(x, "<too-long>");
+ loop->real_time = ts.tv_sec S + ts.tv_nsec NS;
}
* Sockaddr helper functions
*/
-static inline int sockaddr_length(int af)
+static inline int UNUSED sockaddr_length(int af)
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
static inline void
-sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port)
+sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
{
memset(sa, 0, sizeof(struct sockaddr_in));
-#ifdef HAVE_SIN_LEN
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
sa->sin_len = sizeof(struct sockaddr_in);
#endif
sa->sin_family = AF_INET;
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
{
if (af == AF_INET)
- sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port);
+ sockaddr_fill4((struct sockaddr_in *) sa, a, port);
else if (af == AF_INET6)
sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
else
}
static inline void
-sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port)
+sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
{
*port = ntohs(sa->sin_port);
*a = ipa_from_in4(sa->sin_addr);
goto fail;
if (af == AF_INET)
- sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port);
+ sockaddr_read4((struct sockaddr_in *) sa, a, port);
else if (af == AF_INET6)
sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
else
return -1;
}
-const int fam_to_af[] = { [SK_FAM_IPV4] = AF_INET, [SK_FAM_IPV6] = AF_INET6 };
/*
* IPv6 multicast syscalls
}
static inline int
-sk_set_high_port(sock *s)
+sk_set_high_port(sock *s UNUSED)
{
/* Port range setting is optional, ignore it if not supported */
/**
* sk_set_md5_auth - add / remove MD5 security association for given socket
* @s: socket
- * @a: IP address of the other side
+ * @local: IP address of local side
+ * @remote: IP address of remote side
* @ifa: Interface for link-local IP address
- * @passwd: password used for MD5 authentication
+ * @passwd: Password used for MD5 authentication
+ * @setkey: Update also system SA/SP database
*
- * In TCP MD5 handling code in kernel, there is a set of pairs (address,
- * password) used to choose password according to address of the other side.
- * This function is useful for listening socket, for active sockets it is enough
- * to set s->password field.
+ * In TCP MD5 handling code in kernel, there is a set of security associations
+ * used for choosing password and other authentication parameters according to
+ * the local and remote address. This function is useful for listening socket,
+ * for active sockets it may be enough to set s->password field.
*
* When called with passwd != NULL, the new pair is added,
* When called with passwd == NULL, the existing pair is removed.
*
+ * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
+ * stored in global SA/SP database (but the behavior also must be enabled on
+ * per-socket basis). In case of multiple sockets to the same neighbor, the
+ * socket-specific state must be configured for each socket while global state
+ * just once per src-dst pair. The @setkey argument controls whether the global
+ * state (SA/SP database) is also updated.
+ *
* Result: 0 for success, -1 for an error.
*/
int
-sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
+sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
{ DUMMY; }
#endif
static list sock_list;
static struct birdsock *current_sock;
static struct birdsock *stored_sock;
-static int sock_recalc_fdsets_p;
static inline sock *
sk_next(sock *s)
}
}
+#ifdef HAVE_LIBSSH
+static void
+sk_ssh_free(sock *s)
+{
+ struct ssh_sock *ssh = s->ssh;
+
+ if (s->ssh == NULL)
+ return;
+
+ s->ssh = NULL;
+
+ if (ssh->channel)
+ {
+ if (ssh_channel_is_open(ssh->channel))
+ ssh_channel_close(ssh->channel);
+ ssh_channel_free(ssh->channel);
+ ssh->channel = NULL;
+ }
+
+ if (ssh->session)
+ {
+ ssh_disconnect(ssh->session);
+ ssh_free(ssh->session);
+ ssh->session = NULL;
+ }
+}
+#endif
+
static void
sk_free(resource *r)
{
sock *s = (sock *) r;
sk_free_bufs(s);
- if (s->fd >= 0)
- {
- close(s->fd);
- /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
- if (s->flags & SKF_THREAD)
- return;
+#ifdef HAVE_LIBSSH
+ if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
+ sk_ssh_free(s);
+#endif
+
+ if (s->fd < 0)
+ return;
+ /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
+ if (!(s->flags & SKF_THREAD))
+ {
if (s == current_sock)
current_sock = sk_next(s);
if (s == stored_sock)
stored_sock = sk_next(s);
rem_node(&s->n);
- sock_recalc_fdsets_p = 1;
}
+
+ if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
+ close(s->fd);
+
+ s->fd = -1;
}
void
sk_dump(resource *r)
{
sock *s = (sock *) r;
- static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
+ static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
sk_type_names[s->type],
int y = 1;
int fd = s->fd;
+ if (s->type == SK_SSH_ACTIVE)
+ return 0;
+
if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
ERR("O_NONBLOCK");
- if (!s->fam)
+ if (!s->af)
return 0;
if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
}
#endif
+ if (s->vrf && !s->iface)
+ {
+ /* Bind socket to associated VRF interface.
+ This is Linux-specific, but so is SO_BINDTODEVICE. */
+#ifdef SO_BINDTODEVICE
+ struct ifreq ifr = {};
+ strcpy(ifr.ifr_name, s->vrf->name);
+ if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
+ ERR("SO_BINDTODEVICE");
+#endif
+ }
+
if (s->iface)
{
#ifdef SO_BINDTODEVICE
- struct ifreq ifr;
+ struct ifreq ifr = {};
strcpy(ifr.ifr_name, s->iface->name);
if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
ERR("SO_BINDTODEVICE");
#endif
}
- if (s->priority >= 0)
- if (sk_set_priority(s, s->priority) < 0)
- return -1;
-
if (sk_is_ipv4(s))
{
if (s->flags & SKF_LADDR_RX)
if (sk_is_ipv6(s))
{
- if (s->flags & SKF_V6ONLY)
+ if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
ERR("IPV6_V6ONLY");
return -1;
}
+ /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
+ if (s->priority >= 0)
+ if (sk_set_priority(s, s->priority) < 0)
+ return -1;
+
return 0;
}
sk_insert(sock *s)
{
add_tail(&sock_list, &s->n);
- sock_recalc_fdsets_p = 1;
}
static void
int sa_len = sizeof(sa);
if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
- (sockaddr_read(&sa, fam_to_af[s->fam], &s->saddr, &s->iface, &s->sport) < 0))
+ (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
log(L_WARN "SOCK: Cannot get local IP address for TCP>");
s->type = SK_TCP;
s->tx_hook(s);
}
+#ifdef HAVE_LIBSSH
+static void
+sk_ssh_connected(sock *s)
+{
+ sk_alloc_bufs(s);
+ s->type = SK_SSH;
+ s->tx_hook(s);
+}
+#endif
+
static int
sk_passive_connected(sock *s, int type)
{
sock *t = sk_new(s->pool);
t->type = type;
- t->fam = s->fam;
+ t->data = s->data;
+ t->af = s->af;
t->fd = fd;
t->ttl = s->ttl;
t->tos = s->tos;
+ t->vrf = s->vrf;
t->rbsize = s->rbsize;
t->tbsize = s->tbsize;
if (type == SK_TCP)
{
if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
- (sockaddr_read(&loc_sa, fam_to_af[s->fam], &t->saddr, &t->iface, &t->sport) < 0))
+ (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
log(L_WARN "SOCK: Cannot get local IP address for TCP<");
- if (sockaddr_read(&rem_sa, fam_to_af[s->fam], &t->daddr, &t->iface, &t->dport) < 0)
+ if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
}
- if (fd >= FD_SETSIZE)
- {
- /* FIXME: Call err_hook instead ? */
- log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s",
- t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL,
- t->dport, "rejected due to FD_SETSIZE limit");
- close(fd);
- t->fd = -1;
- rfree(t);
- return 1;
- }
-
if (sk_setup(t) < 0)
{
/* FIXME: Call err_hook instead ? */
return 1;
}
+#ifdef HAVE_LIBSSH
+/*
+ * Return SSH_OK or SSH_AGAIN or SSH_ERROR
+ */
+static int
+sk_ssh_connect(sock *s)
+{
+ s->fd = ssh_get_fd(s->ssh->session);
+
+ /* Big fall thru automata */
+ switch (s->ssh->state)
+ {
+ case SK_SSH_CONNECT:
+ {
+ switch (ssh_connect(s->ssh->session))
+ {
+ case SSH_AGAIN:
+ /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
+ * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
+ * documented but our code relies on that.
+ */
+ return SSH_AGAIN;
+
+ case SSH_OK:
+ break;
+
+ default:
+ return SSH_ERROR;
+ }
+ } /* fallthrough */
+
+ case SK_SSH_SERVER_KNOWN:
+ {
+ s->ssh->state = SK_SSH_SERVER_KNOWN;
+
+ if (s->ssh->server_hostkey_path)
+ {
+ int server_identity_is_ok = 1;
+
+ /* Check server identity */
+ switch (ssh_is_server_known(s->ssh->session))
+ {
+#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
+ case SSH_SERVER_KNOWN_OK:
+ /* The server is known and has not changed. */
+ break;
+
+ case SSH_SERVER_NOT_KNOWN:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
+ break;
+
+ case SSH_SERVER_KNOWN_CHANGED:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
+ server_identity_is_ok = 0;
+ break;
+
+ case SSH_SERVER_FILE_NOT_FOUND:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
+ server_identity_is_ok = 0;
+ break;
+
+ case SSH_SERVER_ERROR:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
+ server_identity_is_ok = 0;
+ break;
+
+ case SSH_SERVER_FOUND_OTHER:
+ LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \
+ "It is a possible attack.");
+ server_identity_is_ok = 0;
+ break;
+ }
+
+ if (!server_identity_is_ok)
+ return SSH_ERROR;
+ }
+ } /* fallthrough */
+
+ case SK_SSH_USERAUTH:
+ {
+ s->ssh->state = SK_SSH_USERAUTH;
+ switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
+ {
+ case SSH_AUTH_AGAIN:
+ return SSH_AGAIN;
+
+ case SSH_AUTH_SUCCESS:
+ break;
+
+ default:
+ return SSH_ERROR;
+ }
+ } /* fallthrough */
+
+ case SK_SSH_CHANNEL:
+ {
+ s->ssh->state = SK_SSH_CHANNEL;
+ s->ssh->channel = ssh_channel_new(s->ssh->session);
+ if (s->ssh->channel == NULL)
+ return SSH_ERROR;
+ } /* fallthrough */
+
+ case SK_SSH_SESSION:
+ {
+ s->ssh->state = SK_SSH_SESSION;
+ switch (ssh_channel_open_session(s->ssh->channel))
+ {
+ case SSH_AGAIN:
+ return SSH_AGAIN;
+
+ case SSH_OK:
+ break;
+
+ default:
+ return SSH_ERROR;
+ }
+ } /* fallthrough */
+
+ case SK_SSH_SUBSYSTEM:
+ {
+ s->ssh->state = SK_SSH_SUBSYSTEM;
+ if (s->ssh->subsystem)
+ {
+ switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
+ {
+ case SSH_AGAIN:
+ return SSH_AGAIN;
+
+ case SSH_OK:
+ break;
+
+ default:
+ return SSH_ERROR;
+ }
+ }
+ } /* fallthrough */
+
+ case SK_SSH_ESTABLISHED:
+ s->ssh->state = SK_SSH_ESTABLISHED;
+ }
+
+ return SSH_OK;
+}
+
+/*
+ * Return file descriptor number if success
+ * Return -1 if failed
+ */
+static int
+sk_open_ssh(sock *s)
+{
+ if (!s->ssh)
+ bug("sk_open() sock->ssh is not allocated");
+
+ ssh_session sess = ssh_new();
+ if (sess == NULL)
+ ERR2("Cannot create a ssh session");
+ s->ssh->session = sess;
+
+ const int verbosity = SSH_LOG_NOLOG;
+ ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
+ ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
+ ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
+ /* TODO: Add SSH_OPTIONS_BINDADDR */
+ ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
+
+ if (s->ssh->server_hostkey_path)
+ ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
+
+ if (s->ssh->client_privkey_path)
+ ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
+
+ ssh_set_blocking(sess, 0);
+
+ switch (sk_ssh_connect(s))
+ {
+ case SSH_AGAIN:
+ break;
+
+ case SSH_OK:
+ sk_ssh_connected(s);
+ break;
+
+ case SSH_ERROR:
+ ERR2(ssh_get_error(sess));
+ break;
+ }
+
+ return ssh_get_fd(sess);
+
+ err:
+ return -1;
+}
+#endif
+
/**
* sk_open - open a socket
* @s: socket
int
sk_open(sock *s)
{
+ int af = AF_UNSPEC;
int fd = -1;
int do_bind = 0;
int bind_port = 0;
ip_addr bind_addr = IPA_NONE;
sockaddr sa;
+ if (s->type <= SK_IP)
+ {
+ /*
+ * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
+ * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
+ * But the specifications have to be consistent.
+ */
+
+ switch (s->subtype)
+ {
+ case 0:
+ ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
+ (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
+ af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
+ break;
+
+ case SK_IPV4:
+ ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
+ ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
+ af = AF_INET;
+ break;
+
+ case SK_IPV6:
+ ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
+ ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
+ af = AF_INET6;
+ break;
+
+ default:
+ bug("Invalid subtype %d", s->subtype);
+ }
+ }
+
switch (s->type)
{
case SK_TCP_ACTIVE:
s->ttx = ""; /* Force s->ttx != s->tpos */
/* Fall thru */
case SK_TCP_PASSIVE:
- fd = socket(fam_to_af[s->fam], SOCK_STREAM, IPPROTO_TCP);
+ fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
bind_port = s->sport;
bind_addr = s->saddr;
do_bind = bind_port || ipa_nonzero(bind_addr);
break;
+#ifdef HAVE_LIBSSH
+ case SK_SSH_ACTIVE:
+ s->ttx = ""; /* Force s->ttx != s->tpos */
+ fd = sk_open_ssh(s);
+ break;
+#endif
+
case SK_UDP:
- fd = socket(fam_to_af[s->fam], SOCK_DGRAM, IPPROTO_UDP);
+ fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
bind_port = s->sport;
bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
do_bind = 1;
break;
case SK_IP:
- fd = socket(fam_to_af[s->fam], SOCK_RAW, s->dport);
+ fd = socket(af, SOCK_RAW, s->dport);
bind_port = 0;
bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
do_bind = ipa_nonzero(bind_addr);
break;
case SK_MAGIC:
- s->fam = SK_FAM_NONE;
+ af = 0;
fd = s->fd;
break;
if (fd < 0)
ERR("socket");
- if (fd >= FD_SETSIZE)
- ERR2("FD_SETSIZE limit reached");
-
+ s->af = af;
s->fd = fd;
if (sk_setup(s) < 0)
if (sk_set_high_port(s) < 0)
log(L_WARN "Socket error: %s%#m", s->err);
- sockaddr_fill(&sa, fam_to_af[s->fam], bind_addr, s->iface, bind_port);
+ sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
ERR2("bind");
}
if (s->password)
- if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0)
+ if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
goto err;
switch (s->type)
{
case SK_TCP_ACTIVE:
- sockaddr_fill(&sa, fam_to_af[s->fam], s->daddr, s->iface, s->dport);
+ sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
sk_tcp_connected(s);
else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
ERR2("listen");
break;
+ case SK_SSH_ACTIVE:
case SK_MAGIC:
break;
if (!(s->flags & SKF_THREAD))
sk_insert(s);
+
return 0;
err:
struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
byte cmsg_buf[CMSG_TX_SPACE];
sockaddr dst;
+ int flags = 0;
- sockaddr_fill(&dst, fam_to_af[s->fam], s->daddr, s->iface, s->dport);
+ sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
struct msghdr msg = {
.msg_name = &dst.sa,
.msg_iovlen = 1
};
+#ifdef CONFIG_DONTROUTE_UNICAST
+ /* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we
+ cannot use it for other cases (e.g. when TTL security is used). */
+ if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1))
+ flags = MSG_DONTROUTE;
+#endif
+
#ifdef CONFIG_USE_HDRINCL
byte hdr[20];
struct iovec iov2[2] = { {hdr, 20}, iov };
if (s->flags & SKF_PKTINFO)
sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
- return sendmsg(s->fd, &msg, 0);
+ return sendmsg(s->fd, &msg, flags);
}
static inline int
// rv = ipv4_skip_header(pbuf, rv);
//endif
- sockaddr_read(&src, fam_to_af[s->fam], &s->faddr, NULL, &s->fport);
+ sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
sk_process_cmsgs(s, &msg);
if (msg.msg_flags & MSG_TRUNC)
reset_tx_buffer(s);
return 1;
+#ifdef HAVE_LIBSSH
+ case SK_SSH:
+ while (s->ttx != s->tpos)
+ {
+ e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
+
+ if (e < 0)
+ {
+ s->err = ssh_get_error(s->ssh->session);
+ s->err_hook(s, ssh_get_error_code(s->ssh->session));
+
+ reset_tx_buffer(s);
+ /* EPIPE is just a connection close notification during TX */
+ s->err_hook(s, (errno != EPIPE) ? errno : 0);
+ return -1;
+ }
+ s->ttx += e;
+ }
+ reset_tx_buffer(s);
+ return 1;
+#endif
+
case SK_UDP:
case SK_IP:
{
reset_tx_buffer(s);
return 1;
}
+
default:
bug("sk_maybe_write: unknown socket type %d", s->type);
}
int
sk_rx_ready(sock *s)
{
- fd_set rd, wr;
- struct timeval timo;
int rv;
-
- FD_ZERO(&rd);
- FD_ZERO(&wr);
- FD_SET(s->fd, &rd);
-
- timo.tv_sec = 0;
- timo.tv_usec = 0;
+ struct pollfd pfd = { .fd = s->fd };
+ pfd.events |= POLLIN;
redo:
- rv = select(s->fd+1, &rd, &wr, NULL, &timo);
+ rv = poll(&pfd, 1, 0);
if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
goto redo;
}
*/
+static void
+call_rx_hook(sock *s, int size)
+{
+ if (s->rx_hook(s, size))
+ {
+ /* We need to be careful since the socket could have been deleted by the hook */
+ if (current_sock == s)
+ s->rpos = s->rbuf;
+ }
+}
+
+#ifdef HAVE_LIBSSH
+static int
+sk_read_ssh(sock *s)
+{
+ ssh_channel rchans[2] = { s->ssh->channel, NULL };
+ struct timeval timev = { 1, 0 };
+
+ if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
+ return 1; /* Try again */
+
+ if (ssh_channel_is_eof(s->ssh->channel) != 0)
+ {
+ /* The remote side is closing the connection */
+ s->err_hook(s, 0);
+ return 0;
+ }
+
+ if (rchans[0] == NULL)
+ return 0; /* No data is available on the socket */
+
+ const uint used_bytes = s->rpos - s->rbuf;
+ const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
+ if (read_bytes > 0)
+ {
+ /* Received data */
+ s->rpos += read_bytes;
+ call_rx_hook(s, used_bytes + read_bytes);
+ return 1;
+ }
+ else if (read_bytes == 0)
+ {
+ if (ssh_channel_is_eof(s->ssh->channel) != 0)
+ {
+ /* The remote side is closing the connection */
+ s->err_hook(s, 0);
+ }
+ }
+ else
+ {
+ s->err = ssh_get_error(s->ssh->session);
+ s->err_hook(s, ssh_get_error_code(s->ssh->session));
+ }
+
+ return 0; /* No data is available on the socket */
+}
+#endif
+
/* sk_read() and sk_write() are called from BFD's event loop */
int
-sk_read(sock *s)
+sk_read(sock *s, int revents)
{
switch (s->type)
{
{
if (errno != EINTR && errno != EAGAIN)
s->err_hook(s, errno);
+ else if (errno == EAGAIN && !(revents & POLLIN))
+ {
+ log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
+ s->err_hook(s, 0);
+ }
}
else if (!c)
s->err_hook(s, 0);
else
{
s->rpos += c;
- if (s->rx_hook(s, s->rpos - s->rbuf))
- {
- /* We need to be careful since the socket could have been deleted by the hook */
- if (current_sock == s)
- s->rpos = s->rbuf;
- }
+ call_rx_hook(s, s->rpos - s->rbuf);
return 1;
}
return 0;
}
+#ifdef HAVE_LIBSSH
+ case SK_SSH:
+ return sk_read_ssh(s);
+#endif
+
case SK_MAGIC:
return s->rx_hook(s, 0);
case SK_TCP_ACTIVE:
{
sockaddr sa;
- sockaddr_fill(&sa, fam_to_af[s->fam], s->daddr, s->iface, s->dport);
+ sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
sk_tcp_connected(s);
return 0;
}
+#ifdef HAVE_LIBSSH
+ case SK_SSH_ACTIVE:
+ {
+ switch (sk_ssh_connect(s))
+ {
+ case SSH_OK:
+ sk_ssh_connected(s);
+ break;
+
+ case SSH_AGAIN:
+ return 1;
+
+ case SSH_ERROR:
+ s->err = ssh_get_error(s->ssh->session);
+ s->err_hook(s, ssh_get_error_code(s->ssh->session));
+ break;
+ }
+ return 0;
+ }
+#endif
+
default:
if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
{
}
int sk_is_ipv4(sock *s)
-{ return s->fam == SK_FAM_IPV4; }
+{ return s->af == AF_INET; }
int sk_is_ipv6(sock *s)
-{ return s->fam == SK_FAM_IPV6; }
+{ return s->af == AF_INET6; }
+
+void
+sk_err(sock *s, int revents)
+{
+ int se = 0, sse = sizeof(se);
+ if ((s->type != SK_MAGIC) && (revents & POLLERR))
+ if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
+ {
+ log(L_ERR "IO: Socket error: SO_ERROR: %m");
+ se = 0;
+ }
+
+ s->err_hook(s, se);
+}
void
sk_dump_all(void)
struct timespec ts;
int rv;
- if (!clock_monotonic_available)
- return;
-
/*
* This is third time-tracking procedure (after update_times() above and
* times_update() in BFD), dedicated to internal event log and latency
if (rv < 0)
die("clock_gettime: %m");
- last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
+ last_time = ts.tv_sec S + ts.tv_nsec NS;
if (event_open)
{
volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
volatile int async_dump_flag;
+volatile int async_shutdown_flag;
void
io_init(void)
{
- init_list(&near_timers);
- init_list(&far_timers);
init_list(&sock_list);
init_list(&global_event_list);
krt_io_init();
- init_times();
- update_times();
- boot_time = now;
- srandom((int) now_real);
+ // XXX init_times();
+ // XXX update_times();
+ boot_time = current_time();
+
+ u64 now = (u64) current_real_time();
+ srandom((uint) (now ^ (now >> 32)));
}
static int short_loops = 0;
void
io_loop(void)
{
- fd_set rd, wr;
- struct timeval timo;
- time_t tout;
- int hi, events;
+ int poll_tout, timeout;
+ int nfds, events, pout;
+ timer *t;
sock *s;
node *n;
+ int fdmax = 256;
+ struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
watchdog_start1();
- sock_recalc_fdsets_p = 1;
for(;;)
{
+ times_update(&main_timeloop);
events = ev_run_list(&global_event_list);
- update_times();
- tout = tm_first_shot();
- if (tout <= now)
- {
- tm_shot();
- continue;
- }
- timo.tv_sec = events ? 0 : MIN(tout - now, 3);
- timo.tv_usec = 0;
-
+ timers_fire(&main_timeloop);
io_close_event();
- if (sock_recalc_fdsets_p)
- {
- sock_recalc_fdsets_p = 0;
- FD_ZERO(&rd);
- FD_ZERO(&wr);
- }
+ // FIXME
+ poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
+ if (t = timers_first(&main_timeloop))
+ {
+ times_update(&main_timeloop);
+ timeout = (tm_remains(t) TO_MS) + 1;
+ poll_tout = MIN(poll_tout, timeout);
+ }
- hi = 0;
+ nfds = 0;
WALK_LIST(n, sock_list)
{
+ pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
s = SKIP_BACK(sock, n, n);
if (s->rx_hook)
{
- FD_SET(s->fd, &rd);
- if (s->fd > hi)
- hi = s->fd;
+ pfd[nfds].fd = s->fd;
+ pfd[nfds].events |= POLLIN;
}
- else
- FD_CLR(s->fd, &rd);
if (s->tx_hook && s->ttx != s->tpos)
{
- FD_SET(s->fd, &wr);
- if (s->fd > hi)
- hi = s->fd;
+ pfd[nfds].fd = s->fd;
+ pfd[nfds].events |= POLLOUT;
+ }
+ if (pfd[nfds].fd != -1)
+ {
+ s->index = nfds;
+ nfds++;
}
else
- FD_CLR(s->fd, &wr);
+ s->index = -1;
+
+ if (nfds >= fdmax)
+ {
+ fdmax *= 2;
+ pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
+ }
}
/*
* Yes, this is racy. But even if the signal comes before this test
- * and entering select(), it gets caught on the next timer tick.
+ * and entering poll(), it gets caught on the next timer tick.
*/
if (async_config_flag)
continue;
}
- /* And finally enter select() to find active sockets */
+ /* And finally enter poll() to find active sockets */
watchdog_stop();
- hi = select(hi+1, &rd, &wr, NULL, &timo);
+ pout = poll(pfd, nfds, poll_tout);
watchdog_start();
- if (hi < 0)
+ if (pout < 0)
{
if (errno == EINTR || errno == EAGAIN)
continue;
- die("select: %m");
+ die("poll: %m");
}
- if (hi)
+ if (pout)
{
+ times_update(&main_timeloop);
+
/* guaranteed to be non-empty */
current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
while (current_sock)
{
sock *s = current_sock;
+ if (s->index == -1)
+ {
+ current_sock = sk_next(s);
+ goto next;
+ }
+
int e;
int steps;
steps = MAX_STEPS;
- if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
+ if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
do
{
steps--;
io_log_event(s->rx_hook, s->data);
- e = sk_read(s);
+ e = sk_read(s, pfd[s->index].revents);
if (s != current_sock)
goto next;
}
while (e && s->rx_hook && steps);
steps = MAX_STEPS;
- if (FD_ISSET(s->fd, &wr))
+ if (pfd[s->index].revents & POLLOUT)
do
{
steps--;
goto next;
}
while (e && steps);
+
current_sock = sk_next(s);
next: ;
}
while (current_sock && count < MAX_RX_STEPS)
{
sock *s = current_sock;
- int e UNUSED;
+ if (s->index == -1)
+ {
+ current_sock = sk_next(s);
+ goto next2;
+ }
- if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
+ if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
{
count++;
io_log_event(s->rx_hook, s->data);
- e = sk_read(s);
+ sk_read(s, pfd[s->index].revents);
if (s != current_sock)
- goto next2;
+ goto next2;
}
+
+ if (pfd[s->index].revents & (POLLHUP | POLLERR))
+ {
+ sk_err(s, pfd[s->index].revents);
+ if (s != current_sock)
+ goto next2;
+ }
+
current_sock = sk_next(s);
next2: ;
}
+
stored_sock = current_sock;
}
}