[arm] Add optimised TCP/IP checksumming for 64-bit ARM

author Michael Brown <mcb30@ipxe.org>

Tue, 10 May 2016 16:13:05 +0000 (17:13 +0100)

committer Michael Brown <mcb30@ipxe.org>

Wed, 11 May 2016 07:16:36 +0000 (08:16 +0100)
author Michael Brown <mcb30@ipxe.org>
Tue, 10 May 2016 16:13:05 +0000 (17:13 +0100)
committer Michael Brown <mcb30@ipxe.org>
Wed, 11 May 2016 07:16:36 +0000 (08:16 +0100)
diff --git a/src/arch/arm/include/bits/tcpip.h b/src/arch/arm32/include/bits/tcpip.h

similarity index 100%

rename from src/arch/arm/include/bits/tcpip.h

rename to src/arch/arm32/include/bits/tcpip.h
diff --git a/src/arch/arm64/core/arm64_tcpip.c b/src/arch/arm64/core/arm64_tcpip.c

new file mode 100644 (file)

index 0000000..0ef04ea
--- /dev/null
+++ b/src/arch/arm64/core/arm64_tcpip.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ * You can also choose to distribute this program under the terms of
+ * the Unmodified Binary Distribution Licence (as given in the file
+ * COPYING.UBDL), provided that you have satisfied its requirements.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+/** @file
+ *
+ * TCP/IP checksum
+ *
+ */
+
+#include <strings.h>
+#include <ipxe/tcpip.h>
+
+/** Alignment used by main checksumming loop */
+#define TCPIP_CHKSUM_ALIGN 16
+
+/** Number of steps in each iteration of the unrolled main checksumming loop */
+#define TCPIP_CHKSUM_UNROLL 4
+
+/**
+ * Calculate continued TCP/IP checkum
+ *
+ * @v sum              Checksum of already-summed data, in network byte order
+ * @v data             Data buffer
+ * @v len              Length of data buffer
+ * @ret sum            Updated checksum, in network byte order
+ */
+uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
+                                size_t len ) {
+       intptr_t start;
+       intptr_t end;
+       intptr_t mid;
+       unsigned int pre;
+       unsigned int post;
+       unsigned int first;
+       uint64_t discard_low;
+       uint64_t discard_high;
+
+       /* Avoid potentially undefined shift operation */
+       if ( len == 0 )
+               return sum;
+
+       /* Find maximally-aligned midpoint.  For short blocks of data,
+        * this may be aligned to fewer than 16 bytes.
+        */
+       start = ( ( intptr_t ) data );
+       end = ( start + len );
+       mid = ( end &
+               ~( ( ~( 1UL << 63 ) ) >> ( 64 - flsl ( start ^ end ) ) ) );
+
+       /* Calculate pre- and post-alignment lengths */
+       pre = ( ( mid - start ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
+       post = ( ( end - mid ) & ( TCPIP_CHKSUM_ALIGN - 1 ) );
+
+       /* Calculate number of steps in first iteration of unrolled loop */
+       first = ( ( ( len - pre - post ) / TCPIP_CHKSUM_ALIGN ) &
+                 ( TCPIP_CHKSUM_UNROLL - 1 ) );
+
+       /* Calculate checksum */
+       __asm__ ( /* Invert sum */
+                 "eor %w0, %w0, #0xffff\n\t"
+                 /* Clear carry flag */
+                 "cmn xzr, xzr\n\t"
+                 /* Byteswap and sum pre-alignment byte, if applicable */
+                 "tbz %w4, #0, 1f\n\t"
+                 "ldrb %w2, [%1], #1\n\t"
+                 "rev16 %w0, %w0\n\t"
+                 "rev16 %w2, %w2\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "\n1:\n\t"
+                 /* Sum pre-alignment halfword, if applicable */
+                 "tbz %w4, #1, 1f\n\t"
+                 "ldrh %w2, [%1], #2\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "\n1:\n\t"
+                 /* Sum pre-alignment word, if applicable */
+                 "tbz %w4, #2, 1f\n\t"
+                 "ldr %w2, [%1], #4\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "\n1:\n\t"
+                 /* Sum pre-alignment doubleword, if applicable */
+                 "tbz %w4, #3, 1f\n\t"
+                 "ldr %2, [%1], #8\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "\n1:\n\t"
+                 /* Jump into unrolled (x4) main loop */
+                 "adr %2, 2f\n\t"
+                 "sub %2, %2, %5, lsl #3\n\t"
+                 "sub %2, %2, %5, lsl #2\n\t"
+                 "br %2\n\t"
+                 "\n1:\n\t"
+                 "ldp %2, %3, [%1], #16\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "adcs %0, %0, %3\n\t"
+                 "ldp %2, %3, [%1], #16\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "adcs %0, %0, %3\n\t"
+                 "ldp %2, %3, [%1], #16\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "adcs %0, %0, %3\n\t"
+                 "ldp %2, %3, [%1], #16\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "adcs %0, %0, %3\n\t"
+                 "\n2:\n\t"
+                 "sub %2, %1, %6\n\t"
+                 "cbnz %2, 1b\n\t"
+                 /* Sum post-alignment doubleword, if applicable */
+                 "tbz %w7, #3, 1f\n\t"
+                 "ldr %2, [%1], #8\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "\n1:\n\t"
+                 /* Sum post-alignment word, if applicable */
+                 "tbz %w7, #2, 1f\n\t"
+                 "ldr %w2, [%1], #4\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "\n1:\n\t"
+                 /* Sum post-alignment halfword, if applicable */
+                 "tbz %w7, #1, 1f\n\t"
+                 "ldrh %w2, [%1], #2\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "\n1:\n\t"
+                 /* Sum post-alignment byte, if applicable */
+                 "tbz %w7, #0, 1f\n\t"
+                 "ldrb %w2, [%1], #1\n\t"
+                 "adcs %0, %0, %2\n\t"
+                 "\n1:\n\t"
+                 /* Fold down to a uint32_t plus carry flag */
+                 "lsr %2, %0, #32\n\t"
+                 "adcs %w0, %w0, %w2\n\t"
+                 /* Fold down to a uint16_t plus carry in bit 16 */
+                 "ubfm %2, %0, #0, #15\n\t"
+                 "ubfm %3, %0, #16, #31\n\t"
+                 "adc %w0, %w2, %w3\n\t"
+                 /* Fold down to a uint16_t */
+                 "tbz %w0, #16, 1f\n\t"
+                 "mov %w2, #0xffff\n\t"
+                 "sub %w0, %w0, %w2\n\t"
+                 "tbz %w0, #16, 1f\n\t"
+                 "sub %w0, %w0, %w2\n\t"
+                 "\n1:\n\t"
+                 /* Byteswap back, if applicable */
+                 "tbz %w4, #0, 1f\n\t"
+                 "rev16 %w0, %w0\n\t"
+                 "\n1:\n\t"
+                 /* Invert sum */
+                 "eor %w0, %w0, #0xffff\n\t"
+                 : "+r" ( sum ), "+r" ( data ), "=&r" ( discard_low ),
+                   "=&r" ( discard_high )
+                 : "r" ( pre ), "r" ( first ), "r" ( end - post ),
+                   "r" ( post )
+                 : "cc" );
+
+       return sum;
+}
diff --git a/src/arch/arm64/include/bits/tcpip.h b/src/arch/arm64/include/bits/tcpip.h

new file mode 100644 (file)

index 0000000..6868653
--- /dev/null
+++ b/src/arch/arm64/include/bits/tcpip.h
@@ -0,0 +1,15 @@
+#ifndef _BITS_TCPIP_H
+#define _BITS_TCPIP_H
+
+/** @file
+ *
+ * Transport-network layer interface
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+extern uint16_t tcpip_continue_chksum ( uint16_t sum, const void *data,
+                                       size_t len );
+
+#endif /* _BITS_TCPIP_H */
author	Michael Brown <mcb30@ipxe.org>
	Tue, 10 May 2016 16:13:05 +0000 (17:13 +0100)
committer	Michael Brown <mcb30@ipxe.org>
	Wed, 11 May 2016 07:16:36 +0000 (08:16 +0100)
src/arch/arm32/include/bits/tcpip.h	[moved from src/arch/arm/include/bits/tcpip.h with 100% similarity]	patch \| blob \| blame \| history
src/arch/arm64/core/arm64_tcpip.c	[new file with mode: 0644]	patch \| blob
src/arch/arm64/include/bits/tcpip.h	[new file with mode: 0644]	patch \| blob