From e347736da8caf3acc8e58973c3d3655ad6765e1f Mon Sep 17 00:00:00 2001 From: wschmidt Date: Fri, 3 Oct 2014 20:06:38 +0000 Subject: [PATCH] 2014-10-03 Bill Schmidt * lex.c (search_line_fast): Add new version to be used for Power8 and later targets when Altivec is enabled. Restrict the existing Altivec version to big-endian systems so that lvsr is not used on little endian, where it is deprecated. Remove LE-specific code from the now-BE-only version. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@215873 138bc75d-0d04-0410-961f-82ee72b054a4 --- libcpp/ChangeLog | 8 ++++ libcpp/lex.c | 115 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 112 insertions(+), 11 deletions(-) diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 3b23708d39ae..09304ef84963 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,11 @@ +2014-10-03 Bill Schmidt + + * lex.c (search_line_fast): Add new version to be used for Power8 + and later targets when Altivec is enabled. Restrict the existing + Altivec version to big-endian systems so that lvsr is not used on + little endian, where it is deprecated. Remove LE-specific code + from the now-BE-only version. + 2014-10-02 Bernd Edlinger Jeff Law diff --git a/libcpp/lex.c b/libcpp/lex.c index bdaa0705bc79..45eaca7ab4ff 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -513,9 +513,111 @@ init_vectorized_lexer (void) search_line_fast = impl; } -#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) +#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__) -/* A vection of the fast scanner using AltiVec vectorized byte compares. */ +/* A vection of the fast scanner using AltiVec vectorized byte compares + and VSX unaligned loads (when VSX is available). This is otherwise + the same as the pre-GCC 5 version. */ + +static const uchar * +search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) +{ + typedef __attribute__((altivec(vector))) unsigned char vc; + + const vc repl_nl = { + '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', + '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' + }; + const vc repl_cr = { + '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r', + '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' + }; + const vc repl_bs = { + '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', + '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' + }; + const vc repl_qm = { + '?', '?', '?', '?', '?', '?', '?', '?', + '?', '?', '?', '?', '?', '?', '?', '?', + }; + const vc zero = { 0 }; + + vc data, t; + + /* Main loop processing 16 bytes at a time. */ + do + { + vc m_nl, m_cr, m_bs, m_qm; + + data = *((const vc *)s); + s += 16; + + m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl); + m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr); + m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs); + m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm); + t = (m_nl | m_cr) | (m_bs | m_qm); + + /* T now contains 0xff in bytes for which we matched one of the relevant + characters. We want to exit the loop if any byte in T is non-zero. + Below is the expansion of vec_any_ne(t, zero). */ + } + while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero)); + + /* Restore s to to point to the 16 bytes we just processed. */ + s -= 16; + + { +#define N (sizeof(vc) / sizeof(long)) + + union { + vc v; + /* Statically assert that N is 2 or 4. */ + unsigned long l[(N == 2 || N == 4) ? N : -1]; + } u; + unsigned long l, i = 0; + + u.v = t; + + /* Find the first word of T that is non-zero. */ + switch (N) + { + case 4: + l = u.l[i++]; + if (l != 0) + break; + s += sizeof(unsigned long); + l = u.l[i++]; + if (l != 0) + break; + s += sizeof(unsigned long); + case 2: + l = u.l[i++]; + if (l != 0) + break; + s += sizeof(unsigned long); + l = u.l[i]; + } + + /* L now contains 0xff in bytes for which we matched one of the + relevant characters. We can find the byte index by finding + its bit index and dividing by 8. */ +#ifdef __BIG_ENDIAN__ + l = __builtin_clzl(l) >> 3; +#else + l = __builtin_ctzl(l) >> 3; +#endif + return s + l; + +#undef N + } +} + +#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__) + +/* A vection of the fast scanner using AltiVec vectorized byte compares. + This cannot be used for little endian because vec_lvsl/lvsr are + deprecated for little endian and the code won't work properly. */ /* ??? Unfortunately, attribute(target("altivec")) is not yet supported, so we can't compile this function without -maltivec on the command line (or implied by some other switch). */ @@ -557,13 +659,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) beginning with all ones and shifting in zeros according to the mis-alignment. The LVSR instruction pulls the exact shift we want from the address. */ -#ifdef __BIG_ENDIAN__ mask = __builtin_vec_lvsr(0, s); mask = __builtin_vec_perm(zero, ones, mask); -#else - mask = __builtin_vec_lvsl(0, s); - mask = __builtin_vec_perm(ones, zero, mask); -#endif data &= mask; /* While altivec loads mask addresses, we still need to align S so @@ -627,11 +724,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) /* L now contains 0xff in bytes for which we matched one of the relevant characters. We can find the byte index by finding its bit index and dividing by 8. */ -#ifdef __BIG_ENDIAN__ l = __builtin_clzl(l) >> 3; -#else - l = __builtin_ctzl(l) >> 3; -#endif return s + l; #undef N -- 2.39.5