Subject: cxgb3 - manage private iSCSI IP addresses From: Karen Xie References: FATE#304154,bnc#433500 The accelerated iSCSI traffic uses a private IP address unknown to the OS. Create a per port sysfs entry to pass an IP address to the NIC driver, and a control call for the iSCSI driver to grab it. The IP address is required in both drivers to manage ARP requests and connection set up. Signed-off-by: Karen Xie Signed-off-by: Chandra Seetharaman Signed-off-by: Hannes Reinecke --- --- drivers/net/cxgb3/adapter.h | 1 drivers/net/cxgb3/cxgb3_ctl_defs.h | 9 ++++ drivers/net/cxgb3/cxgb3_main.c | 46 +++++++++++++++++++++++++ drivers/net/cxgb3/cxgb3_offload.c | 45 +++++++++++++++++------- drivers/net/cxgb3/sge.c | 68 ++++++++++++++++++++++++++++++++++--- 5 files changed, 152 insertions(+), 17 deletions(-) --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h @@ -64,6 +64,7 @@ struct port_info { struct link_config link_config; struct net_device_stats netstats; int activity; + __be32 iscsi_ipv4addr; }; enum { /* adapter flags */ --- a/drivers/net/cxgb3/cxgb3_ctl_defs.h +++ b/drivers/net/cxgb3/cxgb3_ctl_defs.h @@ -57,6 +57,9 @@ enum { RDMA_GET_MIB = 19, GET_RX_PAGE_INFO = 50, + + GET_ISCSI_IPV4ADDR = 51, + SET_ISCSI_IPV4ADDR = 52, }; /* @@ -86,6 +89,12 @@ struct iff_mac { u16 vlan_tag; }; +/* Structure used to request a port's iSCSI IPv4 address */ +struct iscsi_ipv4addr { + struct net_device *dev; /* the net_device */ + __be32 ipv4addr; /* the return iSCSI IPv4 address */ +}; + struct pci_dev; /* --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "common.h" @@ -688,6 +689,47 @@ static struct attribute *offload_attrs[] static struct attribute_group offload_attr_group = {.attrs = offload_attrs }; +static ssize_t iscsi_ipv4addr_attr_show(struct device *d, char *buf) +{ + struct port_info *pi = netdev_priv(to_net_dev(d)); + + __be32 a = pi->iscsi_ipv4addr; + return sprintf(buf, NIPQUAD_FMT "\n", NIPQUAD(a)); +} + +static ssize_t iscsi_ipv4addr_attr_store(struct device *d, + const char *buf, size_t len) +{ + struct port_info *pi = netdev_priv(to_net_dev(d)); + + pi->iscsi_ipv4addr = in_aton(buf); + return len; +} + +#define ISCSI_IPADDR_ATTR(name) \ +static ssize_t show_##name(struct device *d, struct device_attribute *attr, \ + char *buf) \ +{ \ + return iscsi_ipv4addr_attr_show(d, buf); \ +} \ +static ssize_t store_##name(struct device *d, struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return iscsi_ipv4addr_attr_store(d, buf, len); \ +} \ +static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_##name, store_##name) + +ISCSI_IPADDR_ATTR(iscsi_ipv4addr); + +static struct attribute *iscsi_offload_attrs[] = { + &dev_attr_iscsi_ipv4addr.attr, + NULL +}; + +static struct attribute_group iscsi_offload_attr_group = { + .attrs = iscsi_offload_attrs +}; + /* * Sends an sk_buff to an offload queue driver * after dealing with any active network taps. @@ -1079,6 +1121,7 @@ static int cxgb_open(struct net_device * if (err) printk(KERN_WARNING "Could not initialize offload capabilities\n"); + sysfs_create_group(&dev->dev.kobj, &iscsi_offload_attr_group); } link_start(dev); @@ -1101,6 +1144,9 @@ static int cxgb_close(struct net_device netif_carrier_off(dev); t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); + if (is_offload(adapter) && !ofld_disable) + sysfs_remove_group(&dev->dev.kobj, &iscsi_offload_attr_group); + spin_lock(&adapter->work_lock); /* sync with update task */ clear_bit(pi->port_id, &adapter->open_device_map); spin_unlock(&adapter->work_lock); --- a/drivers/net/cxgb3/cxgb3_offload.c +++ b/drivers/net/cxgb3/cxgb3_offload.c @@ -182,7 +182,9 @@ static struct net_device *get_iff_from_m static int cxgb_ulp_iscsi_ctl(struct adapter *adapter, unsigned int req, void *data) { + int i; int ret = 0; + unsigned int val = 0; struct ulp_iscsi_info *uiip = data; switch (req) { @@ -191,31 +193,36 @@ static int cxgb_ulp_iscsi_ctl(struct ada uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT); uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT); uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK); + val = t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ); + for (i = 0; i < 4; i++, val >>= 8) + uiip->pgsz_factor[i] = val & 0xFF; /* * On tx, the iscsi pdu has to be <= tx page size and has to * fit into the Tx PM FIFO. */ uiip->max_txsz = min(adapter->params.tp.tx_pg_size, t3_read_reg(adapter, A_PM1_TX_CFG) >> 17); - /* on rx, the iscsi pdu has to be < rx page size and the - whole pdu + cpl headers has to fit into one sge buffer */ - uiip->max_rxsz = min_t(unsigned int, - adapter->params.tp.rx_pg_size, - (adapter->sge.qs[0].fl[1].buf_size - - sizeof(struct cpl_rx_data) * 2 - - sizeof(struct cpl_rx_data_ddp))); + /* + * on rx, the iscsi pdu has to be < rx page size and the + * the max rx data length programmed in TP + */ + uiip->max_rxsz = min(adapter->params.tp.rx_pg_size, + ((t3_read_reg(adapter, A_TP_PARA_REG2)) + >> S_MAXRXDATA) & M_MAXRXDATA); break; case ULP_ISCSI_SET_PARAMS: t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask); /* set MaxRxData and MaxCoalesceSize to 16224 */ t3_write_reg(adapter, A_TP_PARA_REG2, 0x3f603f60); /* program the ddp page sizes */ - { - int i; - unsigned int val = 0; - for (i = 0; i < 4; i++) - val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i); - if (val) + for (val = 0, i = 0; i < 4; i++) + val |= (uiip->pgsz_factor[i] & 0xF) << (8 * i); + if (val && (val != t3_read_reg(adapter, A_ULPRX_ISCSI_PSZ))) { + printk(KERN_INFO + "%s, setting iscsi pgsz 0x%x, %u,%u,%u,%u.\n", + adapter->name, val, uiip->pgsz_factor[0], + uiip->pgsz_factor[1], uiip->pgsz_factor[2], + uiip->pgsz_factor[3]); t3_write_reg(adapter, A_ULPRX_ISCSI_PSZ, val); } break; @@ -407,6 +414,18 @@ static int cxgb_offload_ctl(struct t3cde rx_page_info->page_size = tp->rx_pg_size; rx_page_info->num = tp->rx_num_pgs; break; + case GET_ISCSI_IPV4ADDR: { + struct iscsi_ipv4addr *p = data; + struct port_info *pi = netdev_priv(p->dev); + p->ipv4addr = pi->iscsi_ipv4addr; + break; + } + case SET_ISCSI_IPV4ADDR: { + struct iscsi_ipv4addr *p = data; + struct port_info *pi = netdev_priv(p->dev); + pi->iscsi_ipv4addr = p->ipv4addr; + break; + } default: return -EOPNOTSUPP; } --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "common.h" #include "regs.h" #include "sge_defs.h" @@ -1856,6 +1857,53 @@ static void restart_tx(struct sge_qset * } /** + * cxgb3_arp_process - process an ARP request probing a private IP address + * @adapter: the adapter + * @skb: the skbuff containing the ARP request + * + * Check if the ARP request is probing the private IP address + * dedicated to iSCSI, generate an ARP reply if so. + */ +static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct port_info *pi; + struct arphdr *arp; + unsigned char *arp_ptr; + unsigned char *sha; + __be32 sip, tip; + + if (!dev) + return; + + skb_reset_network_header(skb); + arp = arp_hdr(skb); + + if (arp->ar_op != htons(ARPOP_REQUEST)) + return; + + arp_ptr = (unsigned char *)(arp + 1); + sha = arp_ptr; + arp_ptr += dev->addr_len; + memcpy(&sip, arp_ptr, sizeof(sip)); + arp_ptr += sizeof(sip); + arp_ptr += dev->addr_len; + memcpy(&tip, arp_ptr, sizeof(tip)); + + pi = netdev_priv(dev); + if (tip != pi->iscsi_ipv4addr) + return; + + arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, + dev->dev_addr, sha); +} + +static inline int is_arp(struct sk_buff *skb) +{ + return skb->protocol == htons(ETH_P_ARP); +} + +/** * rx_eth - process an ingress ethernet packet * @adap: the adapter * @rq: the response queue that received the packet @@ -1879,7 +1927,7 @@ static void rx_eth(struct adapter *adap, pi = netdev_priv(skb->dev); if (pi->rx_csum_offload && p->csum_valid && p->csum == htons(0xffff) && !p->fragment) { - rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; + qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; skb->ip_summed = CHECKSUM_UNNECESSARY; } else skb->ip_summed = CHECKSUM_NONE; @@ -1894,16 +1942,28 @@ static void rx_eth(struct adapter *adap, grp, ntohs(p->vlan), p); - else + else { + if (unlikely(pi->iscsi_ipv4addr && + is_arp(skb))) { + unsigned short vtag = ntohs(p->vlan) & + VLAN_VID_MASK; + skb->dev = vlan_group_get_device(grp, + vtag); + cxgb3_arp_process(adap, skb); + } __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan), - rq->polling); + rq->polling); + } else dev_kfree_skb_any(skb); } else if (rq->polling) { if (lro) lro_receive_skb(&qs->lro_mgr, skb, p); - else + else { + if (unlikely(pi->iscsi_ipv4addr && is_arp(skb))) + cxgb3_arp_process(adap, skb); netif_receive_skb(skb); + } } else netif_rx(skb); }