From: Martin Willi <martin@revosec.ch>
Date: Thu, 19 Mar 2015 14:36:41 +0000 (+0100)
Subject: utils: Auto-inlining memxor()
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=91e3ccd94407c167cfaec5b4e59852f4ac11a388;p=thirdparty%2Fstrongswan.git

utils: Auto-inlining memxor()

Similar to memwipe(), inlined memxor() can improve performance significantly
for some algorithms making use of it. In AES-GCM, this can be up to ~20%.
---

diff --git a/src/libstrongswan/utils/utils.c b/src/libstrongswan/utils/utils.c
index 3d5e3dfc90..b01a1b16f8 100644
--- a/src/libstrongswan/utils/utils.c
+++ b/src/libstrongswan/utils/utils.c
@@ -61,44 +61,9 @@ ENUM(status_names, SUCCESS, NEED_MORE,
 /**
  * Described in header.
  */
-void memxor(u_int8_t dst[], u_int8_t src[], size_t n)
+void memxor_noinline(u_int8_t dst[], u_int8_t src[], size_t n)
 {
-	int m, i;
-
-	/* byte wise XOR until dst aligned */
-	for (i = 0; (uintptr_t)&dst[i] % sizeof(long) && i < n; i++)
-	{
-		dst[i] ^= src[i];
-	}
-	/* try to use words if src shares an aligment with dst */
-	switch (((uintptr_t)&src[i] % sizeof(long)))
-	{
-		case 0:
-			for (m = n - sizeof(long); i <= m; i += sizeof(long))
-			{
-				*(long*)&dst[i] ^= *(long*)&src[i];
-			}
-			break;
-		case sizeof(int):
-			for (m = n - sizeof(int); i <= m; i += sizeof(int))
-			{
-				*(int*)&dst[i] ^= *(int*)&src[i];
-			}
-			break;
-		case sizeof(short):
-			for (m = n - sizeof(short); i <= m; i += sizeof(short))
-			{
-				*(short*)&dst[i] ^= *(short*)&src[i];
-			}
-			break;
-		default:
-			break;
-	}
-	/* byte wise XOR of the rest */
-	for (; i < n; i++)
-	{
-		dst[i] ^= src[i];
-	}
+	memxor_inline(dst, src, n);
 }
 
 /**
diff --git a/src/libstrongswan/utils/utils.h b/src/libstrongswan/utils/utils.h
index 029a375182..8ee94ab091 100644
--- a/src/libstrongswan/utils/utils.h
+++ b/src/libstrongswan/utils/utils.h
@@ -551,10 +551,68 @@ typedef struct timespec timespec_t;
  */
 typedef struct sockaddr sockaddr_t;
 
+ /**
+  * Same as memcpy, but XORs src into dst instead of copy, non-inlining.
+  */
+void memxor_noinline(u_int8_t dst[], u_int8_t src[], size_t n);
+
+/**
+ * Same as memcpy, but XORs src into dst instead of copy, inlining.
+ */
+static inline void memxor_inline(u_int8_t dst[], u_int8_t src[], size_t n)
+{
+	int m, i;
+
+	/* byte wise XOR until dst aligned */
+	for (i = 0; (uintptr_t)&dst[i] % sizeof(long) && i < n; i++)
+	{
+		dst[i] ^= src[i];
+	}
+	/* try to use words if src shares an aligment with dst */
+	switch (((uintptr_t)&src[i] % sizeof(long)))
+	{
+		case 0:
+			for (m = n - sizeof(long); i <= m; i += sizeof(long))
+			{
+				*(long*)&dst[i] ^= *(long*)&src[i];
+			}
+			break;
+		case sizeof(int):
+			for (m = n - sizeof(int); i <= m; i += sizeof(int))
+			{
+				*(int*)&dst[i] ^= *(int*)&src[i];
+			}
+			break;
+		case sizeof(short):
+			for (m = n - sizeof(short); i <= m; i += sizeof(short))
+			{
+				*(short*)&dst[i] ^= *(short*)&src[i];
+			}
+			break;
+		default:
+			break;
+	}
+	/* byte wise XOR of the rest */
+	for (; i < n; i++)
+	{
+		dst[i] ^= src[i];
+	}
+}
+
 /**
- * Same as memcpy, but XORs src into dst instead of copy
+ * Same as memcpy, but XORs src into dst instead of copy, auto-inlining.
  */
-void memxor(u_int8_t dest[], u_int8_t src[], size_t n);
+static inline void memxor(u_int8_t dest[], u_int8_t src[], size_t n)
+{
+	if (__builtin_constant_p(n))
+	{
+		memxor_inline(dest, src, n);
+	}
+	else
+	{
+		memxor_noinline(dest, src, n);
+	}
+}
 
 /**
  * Safely overwrite n bytes of memory at ptr with zero, non-inlining variant.