From: Niels Möller Date: Fri, 12 Apr 2013 11:19:41 +0000 (+0200) Subject: ARM umac_nh: Use vmlal, 16% speedup. X-Git-Tag: nettle_2.7_release_20130424~54 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cdde35bb2530c4a29cb72dea9d5207a9c954c80f;p=thirdparty%2Fnettle.git ARM umac_nh: Use vmlal, 16% speedup. --- diff --git a/ChangeLog b/ChangeLog index 7a163fe7..a4dd7b2b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ 2013-04-12 Niels Möller - * armv7/umac-nh.asm: New file. 2.1 time speedup. + * armv7/umac-nh.asm: New file. 2.4 time speedup. + * armv7/machine.m4 (D0REG, D1REG): New macros. * configure.ac (asm_replace_list): Added umac-nh.asm and diff --git a/armv7/umac-nh.asm b/armv7/umac-nh.asm index 7c0a0290..87cb86d0 100644 --- a/armv7/umac-nh.asm +++ b/armv7/umac-nh.asm @@ -30,7 +30,7 @@ define(, ) define(, ) define(, ) define(, ) -define(, ) +define(, ) define(, ) define(, ) define(, ) @@ -59,7 +59,7 @@ PROLOGUE(_nettle_umac_nh) vmov.i32 D0REG(QLEFT)[0], SHIFT vmov.32 D1REG(QLEFT), D0REG(QLEFT) - vmov.i64 QACC, #0 + vmov.i64 QY, #0 vshl.u64 DM, DM, D0REG(QRIGHT) .Loop: @@ -78,14 +78,12 @@ PROLOGUE(_nettle_umac_nh) vld1.i32 {QK0, QK1}, [KEY]! vadd.i32 QA, QA, QK0 vadd.i32 QB, QB, QK1 - vmull.u32 QT0, D0REG(QA), D0REG(QB) - vmull.u32 QT1, D1REG(QA), D1REG(QB) subs LENGTH, LENGTH, #32 - vadd.i64 QACC, QACC, QT0 - vadd.i64 QACC, QACC, QT1 + vmlal.u32 QY, D0REG(QA), D0REG(QB) + vmlal.u32 QY, D1REG(QA), D1REG(QB) bhi .Loop - vadd.i64 D0REG(QACC), D0REG(QACC), D1REG(QACC) - vmov r0, r1, D0REG(QACC) + vadd.i64 D0REG(QY), D0REG(QY), D1REG(QY) + vmov r0, r1, D0REG(QY) bx lr EPILOGUE(_nettle_umac_nh)