From: Julian Seward Date: Wed, 4 Feb 2015 17:52:42 +0000 (+0000) Subject: Vectorise the memcmp template implementation. For scenarios requiring X-Git-Tag: svn/VALGRIND_3_11_0~691 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=bc14b1eb529eb508f0d6919bd7ad1eefffda7d99;p=thirdparty%2Fvalgrind.git Vectorise the memcmp template implementation. For scenarios requiring a lot of memcmp-ing (firefox reftests) this is measurably faster. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@14901 --- diff --git a/shared/vg_replace_strmem.c b/shared/vg_replace_strmem.c index 5203306bd7..ab1b1a85e7 100644 --- a/shared/vg_replace_strmem.c +++ b/shared/vg_replace_strmem.c @@ -959,18 +959,33 @@ static inline void my_exit ( int x ) int VG_REPLACE_FUNCTION_EZU(20190,soname,fnname) \ ( const void *s1V, const void *s2V, SizeT n ) \ { \ - int res; \ - UChar a0; \ - UChar b0; \ - const UChar* s1 = s1V; \ - const UChar* s2 = s2V; \ + const Addr WS = sizeof(UWord); /* 8 or 4 */ \ + const Addr WM = WS - 1; /* 7 or 3 */ \ + Addr s1A = (Addr)s1V; \ + Addr s2A = (Addr)s2V; \ + \ + if (((s1A | s2A) & WM) == 0) { \ + /* Both areas are word aligned. Skip over the */ \ + /* equal prefix as fast as possible. */ \ + while (n >= WS) { \ + UWord w1 = *(UWord*)s1A; \ + UWord w2 = *(UWord*)s2A; \ + if (w1 != w2) break; \ + s1A += WS; \ + s2A += WS; \ + n -= WS; \ + } \ + } \ + \ + const UChar* s1 = (const UChar*) s1A; \ + const UChar* s2 = (const UChar*) s2A; \ \ while (n != 0) { \ - a0 = s1[0]; \ - b0 = s2[0]; \ + UChar a0 = s1[0]; \ + UChar b0 = s2[0]; \ s1 += 1; \ s2 += 1; \ - res = ((int)a0) - ((int)b0); \ + int res = ((int)a0) - ((int)b0); \ if (res != 0) \ return res; \ n -= 1; \