noodle: use SSE palignr

author Matthew Barr <matthew.barr@intel.com>

Thu, 21 Apr 2016 05:39:47 +0000 (15:39 +1000)

committer Matthew Barr <matthew.barr@intel.com>

Wed, 18 May 2016 06:22:12 +0000 (16:22 +1000)
author Matthew Barr <matthew.barr@intel.com>
Thu, 21 Apr 2016 05:39:47 +0000 (15:39 +1000)
committer Matthew Barr <matthew.barr@intel.com>
Wed, 18 May 2016 06:22:12 +0000 (16:22 +1000)
diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c

index 621f89b0c7e0720c0ee96ae538dbe0f3197aa349..e2f80a59693601687f6209549df77a849c2de2fc 100644 (file)
--- a/src/hwlm/noodle_engine.c
+++ b/src/hwlm/noodle_engine.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -37,6 +37,7 @@
  #include "util/compare.h"
  #include "util/masked_move.h"
  #include "util/simd_utils.h"
+#include "util/simd_utils_ssse3.h"
  
  #include <ctype.h>
  #include <stdbool.h>
diff --git a/src/hwlm/noodle_engine_sse.c b/src/hwlm/noodle_engine_sse.c

index 956fd82e778f893e8a4d5511646107e3399a8a34..b36732462f498093f2188de1627c5aa068a32015 100644 (file)
--- a/src/hwlm/noodle_engine_sse.c
+++ b/src/hwlm/noodle_engine_sse.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -190,8 +190,8 @@ hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
          m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
          m128 z1 = eq128(mask1, v);
          m128 z2 = eq128(mask2, v);
-        u32 z = movemask128(and128(or128(lastz1, shiftLeft8Bits(z1)), z2));
-        lastz1 = _mm_srli_si128(z1, 15);
+        u32 z = movemask128(and128(palignr(z1, lastz1, 15), z2));
+        lastz1 = z1;
  
          // On large packet buffers, this prefetch appears to get us about 2%.
          __builtin_prefetch(d + 128);
author	Matthew Barr <matthew.barr@intel.com>
	Thu, 21 Apr 2016 05:39:47 +0000 (15:39 +1000)
committer	Matthew Barr <matthew.barr@intel.com>
	Wed, 18 May 2016 06:22:12 +0000 (16:22 +1000)
src/hwlm/noodle_engine.c		patch \| blob \| blame \| history
src/hwlm/noodle_engine_sse.c		patch \| blob \| blame \| history