From 38cbcf2b55d398d41638f037b885b20a400f0528 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Wed, 30 Nov 2016 21:37:36 +0100 Subject: [PATCH] linux: add support for ethtool GLINKSETTINGS With a recent enough kernel, one can use GLINKSETTINGS instead of GSET. This is needed to overcome the limitation on the number of supported/advertising modes supported by GSET (which is a fixed-length bitset). We use the same kind of compatibility structure than for ethtool. Also, update the list of supported MAU types to support higher speed interfaces. Unfortunately, while Linux reports the exact advertised/supported modes, the operational mode of the interface is only reported with its speed and its kind (copper, fiber, ...). When this is fiber, we don't know if this is MMF or SMF or the supported distance. Therefore, we use our best guess (this was already the case, nothing new here). --- NEWS | 2 + src/daemon/interfaces-linux.c | 119 ++++++++++++++++++++++++---------- src/daemon/lldpd.h | 14 +++- src/daemon/priv-linux.c | 95 ++++++++++++++++++++++----- src/lldp-const.h | 31 ++++++++- 5 files changed, 207 insertions(+), 54 deletions(-) diff --git a/NEWS b/NEWS index 000f7275..161566be 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,8 @@ lldpd (0.9.6) * Change: + Add a compile-time option to restore pre-0.9.2 JSON format (when using json-c). Use `--enable-json0` to enable this option. + + Support for newer ethtool interface on Linux + (ETHTOOL_LINKSETTINGS) and additional speed settings. * Fix: + Correctly parse LLDP-MED civic address when the length of the TLV exceeds the length of the address. diff --git a/src/daemon/interfaces-linux.c b/src/daemon/interfaces-linux.c index 28611de9..1ca7c0fb 100644 --- a/src/daemon/interfaces-linux.c +++ b/src/daemon/interfaces-linux.c @@ -318,64 +318,107 @@ iflinux_get_permanent_mac(struct lldpd *cfg, fclose(netbond); } +static inline int +iflinux_ethtool_link_mode_test_bit(unsigned int nr, const uint32_t *mask) +{ + if (nr >= 32 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32) + return 0; + return !!(mask[nr / 32] & (1 << (nr % 32))); +} +static inline void +iflinux_ethtool_link_mode_unset_bit(unsigned int nr, uint32_t *mask) +{ + if (nr >= 32 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32) + return; + mask[nr / 32] &= ~(1 << (nr % 32)); +} +static inline int +iflinux_ethtool_link_mode_is_empty(const uint32_t *mask) +{ + for (unsigned int i = 0; + i < ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32; + ++i) { + if (mask[i] != 0) + return 0; + } + + return 1; +} + + /* Fill up MAC/PHY for a given hardware port */ static void iflinux_macphy(struct lldpd_hardware *hardware) { #ifdef ENABLE_DOT3 - struct ethtool_cmd ethc; + struct ethtool_link_usettings uset; struct lldpd_port *port = &hardware->h_lport; int j; int advertised_ethtool_to_rfc3636[][2] = { - {ADVERTISED_10baseT_Half, LLDP_DOT3_LINK_AUTONEG_10BASE_T}, - {ADVERTISED_10baseT_Full, LLDP_DOT3_LINK_AUTONEG_10BASET_FD}, - {ADVERTISED_100baseT_Half, LLDP_DOT3_LINK_AUTONEG_100BASE_TX}, - {ADVERTISED_100baseT_Full, LLDP_DOT3_LINK_AUTONEG_100BASE_TXFD}, - {ADVERTISED_1000baseT_Half, LLDP_DOT3_LINK_AUTONEG_1000BASE_T}, - {ADVERTISED_1000baseT_Full, LLDP_DOT3_LINK_AUTONEG_1000BASE_TFD}, - {ADVERTISED_1000baseKX_Full, LLDP_DOT3_LINK_AUTONEG_1000BASE_XFD}, - {ADVERTISED_Pause, LLDP_DOT3_LINK_AUTONEG_FDX_PAUSE}, - {ADVERTISED_Asym_Pause, LLDP_DOT3_LINK_AUTONEG_FDX_APAUSE}, - {0,0}}; + {ETHTOOL_LINK_MODE_10baseT_Half_BIT, LLDP_DOT3_LINK_AUTONEG_10BASE_T}, + {ETHTOOL_LINK_MODE_10baseT_Full_BIT, LLDP_DOT3_LINK_AUTONEG_10BASET_FD}, + {ETHTOOL_LINK_MODE_100baseT_Half_BIT, LLDP_DOT3_LINK_AUTONEG_100BASE_TX}, + {ETHTOOL_LINK_MODE_100baseT_Full_BIT, LLDP_DOT3_LINK_AUTONEG_100BASE_TXFD}, + {ETHTOOL_LINK_MODE_1000baseT_Half_BIT, LLDP_DOT3_LINK_AUTONEG_1000BASE_T}, + {ETHTOOL_LINK_MODE_1000baseT_Full_BIT, LLDP_DOT3_LINK_AUTONEG_1000BASE_TFD}, + {ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, LLDP_DOT3_LINK_AUTONEG_1000BASE_XFD}, + {ETHTOOL_LINK_MODE_Pause_BIT, LLDP_DOT3_LINK_AUTONEG_FDX_PAUSE}, + {ETHTOOL_LINK_MODE_Asym_Pause_BIT, LLDP_DOT3_LINK_AUTONEG_FDX_APAUSE}, + {-1, 0}}; log_debug("interfaces", "ask ethtool for the appropriate MAC/PHY for %s", hardware->h_ifname); - if (priv_ethtool(hardware->h_ifname, ðc) == 0) { - port->p_macphy.autoneg_support = (ethc.supported & SUPPORTED_Autoneg) ? 1 : 0; - port->p_macphy.autoneg_enabled = (ethc.autoneg == AUTONEG_DISABLE) ? 0 : 1; - for (j=0; advertised_ethtool_to_rfc3636[j][0]; j++) { - if (ethc.advertising & advertised_ethtool_to_rfc3636[j][0]) { + if (priv_ethtool(hardware->h_ifname, &uset) == 0) { + port->p_macphy.autoneg_support = iflinux_ethtool_link_mode_test_bit( + ETHTOOL_LINK_MODE_Autoneg_BIT, uset.link_modes.supported); + port->p_macphy.autoneg_enabled = (uset.base.autoneg == AUTONEG_DISABLE) ? 0 : 1; + for (j=0; advertised_ethtool_to_rfc3636[j][0] >= 0; j++) { + if (iflinux_ethtool_link_mode_test_bit( + advertised_ethtool_to_rfc3636[j][0], + uset.link_modes.advertising)) { port->p_macphy.autoneg_advertised |= advertised_ethtool_to_rfc3636[j][1]; - ethc.advertising &= ~advertised_ethtool_to_rfc3636[j][0]; + iflinux_ethtool_link_mode_unset_bit( + advertised_ethtool_to_rfc3636[j][0], + uset.link_modes.advertising); } } - if (ethc.advertising) + iflinux_ethtool_link_mode_unset_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, uset.link_modes.advertising); + iflinux_ethtool_link_mode_unset_bit(ETHTOOL_LINK_MODE_TP_BIT, uset.link_modes.advertising); + iflinux_ethtool_link_mode_unset_bit(ETHTOOL_LINK_MODE_AUI_BIT, uset.link_modes.advertising); + iflinux_ethtool_link_mode_unset_bit(ETHTOOL_LINK_MODE_MII_BIT, uset.link_modes.advertising); + iflinux_ethtool_link_mode_unset_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, uset.link_modes.advertising); + iflinux_ethtool_link_mode_unset_bit(ETHTOOL_LINK_MODE_BNC_BIT, uset.link_modes.advertising); + iflinux_ethtool_link_mode_unset_bit(ETHTOOL_LINK_MODE_Pause_BIT, uset.link_modes.advertising); + iflinux_ethtool_link_mode_unset_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, uset.link_modes.advertising); + iflinux_ethtool_link_mode_unset_bit(ETHTOOL_LINK_MODE_Backplane_BIT, uset.link_modes.advertising); + if (!iflinux_ethtool_link_mode_is_empty(uset.link_modes.advertising)) { port->p_macphy.autoneg_advertised |= LLDP_DOT3_LINK_AUTONEG_OTHER; - switch (ethc.speed) { + } + switch (uset.base.speed) { case SPEED_10: - port->p_macphy.mau_type = (ethc.duplex == DUPLEX_FULL) ? \ + port->p_macphy.mau_type = (uset.base.duplex == DUPLEX_FULL) ? \ LLDP_DOT3_MAU_10BASETFD : LLDP_DOT3_MAU_10BASETHD; - if (ethc.port == PORT_BNC) port->p_macphy.mau_type = LLDP_DOT3_MAU_10BASE2; - if (ethc.port == PORT_FIBRE) - port->p_macphy.mau_type = (ethc.duplex == DUPLEX_FULL) ? \ + if (uset.base.port == PORT_BNC) port->p_macphy.mau_type = LLDP_DOT3_MAU_10BASE2; + if (uset.base.port == PORT_FIBRE) + port->p_macphy.mau_type = (uset.base.duplex == DUPLEX_FULL) ? \ LLDP_DOT3_MAU_10BASEFLFD : LLDP_DOT3_MAU_10BASEFLHD; break; case SPEED_100: - port->p_macphy.mau_type = (ethc.duplex == DUPLEX_FULL) ? \ + port->p_macphy.mau_type = (uset.base.duplex == DUPLEX_FULL) ? \ LLDP_DOT3_MAU_100BASETXFD : LLDP_DOT3_MAU_100BASETXHD; - if (ethc.port == PORT_BNC) - port->p_macphy.mau_type = (ethc.duplex == DUPLEX_FULL) ? \ + if (uset.base.port == PORT_BNC) + port->p_macphy.mau_type = (uset.base.duplex == DUPLEX_FULL) ? \ LLDP_DOT3_MAU_100BASET2FD : LLDP_DOT3_MAU_100BASET2HD; - if (ethc.port == PORT_FIBRE) - port->p_macphy.mau_type = (ethc.duplex == DUPLEX_FULL) ? \ + if (uset.base.port == PORT_FIBRE) + port->p_macphy.mau_type = (uset.base.duplex == DUPLEX_FULL) ? \ LLDP_DOT3_MAU_100BASEFXFD : LLDP_DOT3_MAU_100BASEFXHD; break; case SPEED_1000: - port->p_macphy.mau_type = (ethc.duplex == DUPLEX_FULL) ? \ + port->p_macphy.mau_type = (uset.base.duplex == DUPLEX_FULL) ? \ LLDP_DOT3_MAU_1000BASETFD : LLDP_DOT3_MAU_1000BASETHD; - if (ethc.port == PORT_FIBRE) - port->p_macphy.mau_type = (ethc.duplex == DUPLEX_FULL) ? \ + if (uset.base.port == PORT_FIBRE) + port->p_macphy.mau_type = (uset.base.duplex == DUPLEX_FULL) ? \ LLDP_DOT3_MAU_1000BASEXFD : LLDP_DOT3_MAU_1000BASEXHD; break; case SPEED_10000: @@ -383,11 +426,19 @@ iflinux_macphy(struct lldpd_hardware *hardware) // 10GIGBASER. It's not unusual to have 10GIGBASER on // fiber either but we don't have 10GIGBASET for // copper. No good solution. - port->p_macphy.mau_type = (ethc.port == PORT_FIBRE) ? \ - LLDP_DOT3_MAU_10GIGBASELR : LLDP_DOT3_MAU_10GIGBASECX4; + port->p_macphy.mau_type = (uset.base.port == PORT_FIBRE) ? \ + LLDP_DOT3_MAU_10GIGBASELR : LLDP_DOT3_MAU_10GIGBASECX4; break; + case SPEED_40000: + // Same kind of approximation. + port->p_macphy.mau_type = (uset.base.port == PORT_FIBRE) ? \ + LLDP_DOT3_MAU_40GBASELR4 : LLDP_DOT3_MAU_40GBASECR4; + case SPEED_100000: + // Ditto + port->p_macphy.mau_type = (uset.base.port == PORT_FIBRE) ? \ + LLDP_DOT3_MAU_100GBASELR4 : LLDP_DOT3_MAU_100GBASECR10; } - if (ethc.port == PORT_AUI) port->p_macphy.mau_type = LLDP_DOT3_MAU_AUI; + if (uset.base.port == PORT_AUI) port->p_macphy.mau_type = LLDP_DOT3_MAU_AUI; } #endif } diff --git a/src/daemon/lldpd.h b/src/daemon/lldpd.h index b9e9d7b2..762e83b7 100644 --- a/src/daemon/lldpd.h +++ b/src/daemon/lldpd.h @@ -212,9 +212,21 @@ void priv_wait(void); void priv_ctl_cleanup(const char *ctlname); char *priv_gethostname(void); #ifdef HOST_OS_LINUX +#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX) +#define ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ + uint32_t name[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32] + +struct ethtool_link_usettings { + struct ethtool_link_settings base; + struct { + ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); + ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); + } link_modes; +}; int priv_open(char*); void asroot_open(void); -int priv_ethtool(char*, struct ethtool_cmd*); +int priv_ethtool(char*, struct ethtool_link_usettings*); # ifdef ENABLE_OLDIES void asroot_ethtool(void); # endif diff --git a/src/daemon/priv-linux.c b/src/daemon/priv-linux.c index a1f7e597..a24051ec 100644 --- a/src/daemon/priv-linux.c +++ b/src/daemon/priv-linux.c @@ -18,6 +18,7 @@ #include "lldpd.h" #include +#include #include #include #include @@ -54,28 +55,88 @@ priv_open(char *file) } static int -asroot_ethtool_real(const char *ifname, struct ethtool_cmd *ethc) { +asroot_ethtool_real(const char *ifname, struct ethtool_link_usettings *uset) { int rc, sock = -1; - struct ifreq ifr = { - .ifr_data = (caddr_t)ethc - }; + struct ifreq ifr = {}; + if ((rc = sock = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { + return rc; + } strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); - memset(ethc, 0, sizeof(struct ethtool_cmd)); - ethc->cmd = ETHTOOL_GSET; - if (((rc = sock = socket(AF_INET, SOCK_DGRAM, 0)) == -1) || - (rc = ioctl(sock, SIOCETHTOOL, &ifr)) != 0) { - if (sock != -1) close(sock); - return rc; + /* Try with ETHTOOL_GLINKSETTINGS first */ + struct { + struct ethtool_link_settings req; + uint32_t link_mode_data[3 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32]; + } ecmd; + static int8_t nwords = 0; + + if (nwords == 0) { + /* Do a handshake first. We assume that this is device-independant. */ + memset(&ecmd, 0, sizeof(ecmd)); + ecmd.req.cmd = ETHTOOL_GLINKSETTINGS; + ifr.ifr_data = (caddr_t)&ecmd; + rc = ioctl(sock, SIOCETHTOOL, &ifr); + if (rc == 0) { + nwords = -ecmd.req.link_mode_masks_nwords; + log_debug("privsep", "glinksettings nwords is %" PRId8, nwords); + } } + if (nwords > 0) { + memset(&ecmd, 0, sizeof(ecmd)); + ecmd.req.cmd = ETHTOOL_GLINKSETTINGS; + ecmd.req.link_mode_masks_nwords = nwords; + ifr.ifr_data = (caddr_t)&ecmd; + rc = ioctl(sock, SIOCETHTOOL, &ifr); + if (rc == 0) { + log_debug("privsep", "got ethtool results for %s with GLINKSETTINGS", + ifname); + memcpy(&uset->base, &ecmd.req, sizeof(uset->base)); + unsigned int u32_offs = 0; + memcpy(uset->link_modes.supported, + &ecmd.link_mode_data[u32_offs], + 4 * ecmd.req.link_mode_masks_nwords); + u32_offs += ecmd.req.link_mode_masks_nwords; + memcpy(uset->link_modes.advertising, + &ecmd.link_mode_data[u32_offs], + 4 * ecmd.req.link_mode_masks_nwords); + u32_offs += ecmd.req.link_mode_masks_nwords; + memcpy(uset->link_modes.lp_advertising, + &ecmd.link_mode_data[u32_offs], + 4 * ecmd.req.link_mode_masks_nwords); + goto end; + } + } + + /* Try with ETHTOOL_GSET */ + struct ethtool_cmd ethc; + memset(ðc, 0, sizeof(ethc)); + ethc.cmd = ETHTOOL_GSET; + ifr.ifr_data = (caddr_t)ðc; + rc = ioctl(sock, SIOCETHTOOL, &ifr); + if (rc == 0) { + /* Do a partial copy (only what we need) */ + log_debug("privsep", "got ethtool results for %s with GSET", + ifname); + memset(uset, 0, sizeof(*uset)); + uset->base.cmd = ETHTOOL_GSET; + uset->base.link_mode_masks_nwords = 1; + uset->link_modes.supported[0] = ethc.supported; + uset->link_modes.advertising[0] = ethc.advertising; + uset->link_modes.lp_advertising[0] = ethc.lp_advertising; + uset->base.speed = (ethc.speed_hi << 16) | ethc.speed; + uset->base.duplex = ethc.duplex; + uset->base.port = ethc.port; + uset->base.autoneg = ethc.autoneg; + } +end: close(sock); return rc; } -/* Proxy for ethtool ioctl (GSET only). Not needed since +/* Proxy for ethtool ioctl (GSET/GLINKSETTINGS only). Not needed since * 0fdc100bdc4b7ab61ed632962c76dfe539047296 (2.6.37). */ int -priv_ethtool(char *ifname, struct ethtool_cmd *ethc) +priv_ethtool(char *ifname, struct ethtool_link_usettings *uset) { int rc; #ifdef ENABLE_OLDIES @@ -89,9 +150,9 @@ priv_ethtool(char *ifname, struct ethtool_cmd *ethc) must_read(PRIV_UNPRIVILEGED, &rc, sizeof(int)); if (rc != 0) return rc; - must_read(PRIV_UNPRIVILEGED, ethc, sizeof(struct ethtool_cmd)); + must_read(PRIV_UNPRIVILEGED, uset, sizeof(struct ethtool_link_usettings)); #else - rc = asroot_ethtool_real(ifname, ethc); + rc = asroot_ethtool_real(ifname, uset); #endif return rc; } @@ -160,7 +221,7 @@ asroot_open() void asroot_ethtool() { - struct ethtool_cmd ethc; + struct ethtool_link_usettings uset; int len, rc; char *ifname; @@ -169,11 +230,11 @@ asroot_ethtool() fatal("privsep", NULL); must_read(PRIV_PRIVILEGED, ifname, len); ifname[len] = '\0'; - rc = asroot_ethtool_real(ifname, ðc); + rc = asroot_ethtool_real(ifname, &uset); free(ifname); must_write(PRIV_PRIVILEGED, &rc, sizeof(int)); if (rc == -1) return; - must_write(PRIV_PRIVILEGED, ðc, sizeof(struct ethtool_cmd)); + must_write(PRIV_PRIVILEGED, &uset, sizeof(struct ethtool_link_usettings)); } #endif diff --git a/src/lldp-const.h b/src/lldp-const.h index 1e69e4bc..6ad2d071 100644 --- a/src/lldp-const.h +++ b/src/lldp-const.h @@ -47,7 +47,7 @@ #define LLDP_PORTID_SUBTYPE_LOCAL 7 #define LLDP_PORTID_SUBTYPE_MAX LLDP_PORTID_SUBTYPE_LOCAL -/* Operational MAU Type field, from RFC 3636 (see IANAifMauTypeListBits) */ +/* Operational MAU Type field. See: https://www.iana.org/assignments/ianamau-mib/ianamau-mib */ #define LLDP_DOT3_MAU_AUI 1 #define LLDP_DOT3_MAU_10BASE5 2 #define LLDP_DOT3_MAU_FOIRL 3 @@ -101,6 +101,31 @@ #define LLDP_DOT3_MAU_1000BASEPX10U 51 #define LLDP_DOT3_MAU_1000BASEPX20D 52 #define LLDP_DOT3_MAU_1000BASEPX20U 53 +#define LLDP_DOT3_MAU_10GBASET 54 +#define LLDP_DOT3_MAU_10GBASELRM 55 +#define LLDP_DOT3_MAU_1000BASEKX 56 +#define LLDP_DOT3_MAU_10GBASEKX4 57 +#define LLDP_DOT3_MAU_10GBASEKR 58 +#define LLDP_DOT3_MAU_10G1GBASEPRXD1 59 +#define LLDP_DOT3_MAU_10G1GBASEPRXD2 60 +#define LLDP_DOT3_MAU_10G1GBASEPRXD3 61 +#define LLDP_DOT3_MAU_10G1GBASEPRXU1 62 +#define LLDP_DOT3_MAU_10G1GBASEPRXU2 63 +#define LLDP_DOT3_MAU_10G1GBASEPRXU3 64 +#define LLDP_DOT3_MAU_10GBASEPRD1 65 +#define LLDP_DOT3_MAU_10GBASEPRD2 66 +#define LLDP_DOT3_MAU_10GBASEPRD3 67 +#define LLDP_DOT3_MAU_10GBASEPRU1 68 +#define LLDP_DOT3_MAU_10GBASEPRU3 69 +#define LLDP_DOT3_MAU_40GBASEKR4 70 +#define LLDP_DOT3_MAU_40GBASECR4 71 +#define LLDP_DOT3_MAU_40GBASESR4 72 +#define LLDP_DOT3_MAU_40GBASEFR 73 +#define LLDP_DOT3_MAU_40GBASELR4 74 +#define LLDP_DOT3_MAU_100GBASECR10 75 +#define LLDP_DOT3_MAU_100GBASESR10 76 +#define LLDP_DOT3_MAU_100GBASELR4 77 +#define LLDP_DOT3_MAU_100GBASEER4 78 /* Dot3 Power Devicetype */ #define LLDP_DOT3_POWER_PSE 1 @@ -129,7 +154,9 @@ #define LLDP_DOT3_POWER_PRIO_HIGH 2 #define LLDP_DOT3_POWER_PRIO_LOW 3 -/* PMD Auto-Negotiation Advertised Capability field, from RFC 3636 (see IANAifMauAutoNegCapBits) */ +/* PMD Auto-Negotiation Advertised Capability field, from RFC 3636 (see + * IANAifMauAutoNegCapBits). Unfortunately, we are limited to two bytes, so + * higher speed capabilities will map to "other". */ #define LLDP_DOT3_LINK_AUTONEG_OTHER 0x8000 #define LLDP_DOT3_LINK_AUTONEG_10BASE_T 0x4000 #define LLDP_DOT3_LINK_AUTONEG_10BASET_FD 0x2000 -- 2.39.5