]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core_avx512.S
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / x86_64 / fpu / multiarch / svml_s_tanhf16_core_avx512.S
CommitLineData
c0f36fc3 1/* Function tanhf vectorized with AVX-512.
581c785b 2 Copyright (C) 2021-2022 Free Software Foundation, Inc.
c0f36fc3
SP
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 https://www.gnu.org/licenses/. */
18
19/*
20 * ALGORITHM DESCRIPTION:
21 *
22 * NOTE: Since the hyperbolic tangent function is odd
23 * (tanh(x) = -tanh(-x)), below algorithm deals with the absolute
24 * value of the argument |x|: tanh(x) = sign(x) * tanh(|x|)
25 *
26 * We use a table lookup method to compute tanh(|x|).
27 * The basic idea is to split the input range into a number of subintervals
28 * and to approximate tanh(.) with a polynomial on each of them.
29 *
30 * IEEE SPECIAL CONDITIONS:
31 * x = [+,-]0, r = [+,-]0
32 * x = +Inf, r = +1
33 * x = -Inf, r = -1
34 * x = QNaN, r = QNaN
35 * x = SNaN, r = QNaN
36 *
37 *
38 * ALGORITHM DETAILS
39 * We handle special values in a callout function, aside from main path
40 * computations. "Special" for this algorithm are:
41 * INF, NAN, |x| > HUGE_THRESHOLD
42 *
43 *
44 * Main path computations are organized as follows:
45 * Actually we split the interval [0, SATURATION_THRESHOLD)
46 * into a number of subintervals. On each subinterval we approximate tanh(.)
47 * with a minimax polynomial of pre-defined degree. Polynomial coefficients
48 * are computed beforehand and stored in table. We also use
49 *
50 * y := |x| + B,
51 *
52 * here B depends on subinterval and is used to make argument
53 * closer to zero.
54 * We also add large fake interval [SATURATION_THRESHOLD, HUGE_THRESHOLD],
55 * where 1.0 + 0.0*y + 0.0*y^2 ... coefficients are stored - just to
56 * preserve main path computation logic but return 1.0 for all arguments.
57 *
58 * Hence reconstruction looks as follows:
59 * we extract proper polynomial and range reduction coefficients
60 * (Pj and B), corresponding to subinterval, to which |x| belongs,
61 * and return
62 *
63 * r := sign(x) * (P0 + P1 * y + ... + Pn * y^n)
64 *
65 * NOTE: we use multiprecision technique to multiply and sum the first
66 * K terms of the polynomial. So Pj, j = 0..K are stored in
67 * table each as a pair of target precision numbers (Pj and PLj) to
68 * achieve wider than target precision.
69 *
70 *
71 */
72
73/* Offsets for data table __svml_stanh_data_internal
74 */
75#define _sC 0
76#define _sP0 128
77#define _sP2 256
78#define _sP3 384
79#define _sP4 512
80#define _sP5 640
81#define _sP6 768
82#define _sP7 896
83#define _iExpMantMask_UISA 1024
84#define _iMinIdxOfsMask_UISA 1088
85#define _iMaxIdxMask_UISA 1152
86#define _sSignMask 1216
87#define _sAbsMask 1280
88#define _iExpMantMask 1344
89#define _iExpMask 1408
90#define _iMinIdxOfsMask 1472
91#define _iMaxIdxMask 1536
92
93#include <sysdep.h>
94
95 .text
96 .section .text.exex512,"ax",@progbits
97ENTRY(_ZGVeN16v_tanhf_skx)
98 pushq %rbp
99 cfi_def_cfa_offset(16)
100 movq %rsp, %rbp
101 cfi_def_cfa(6, 16)
102 cfi_offset(6, -16)
103 andq $-64, %rsp
104 subq $192, %rsp
105 vmovaps %zmm0, %zmm1
106 vmovups __svml_stanh_data_internal(%rip), %zmm9
107 vmovups _sP6+__svml_stanh_data_internal(%rip), %zmm11
108 vmovups _sP5+__svml_stanh_data_internal(%rip), %zmm12
109 vmovups _sP4+__svml_stanh_data_internal(%rip), %zmm13
110 vmovups _sP3+__svml_stanh_data_internal(%rip), %zmm14
111 vmovups _sP2+__svml_stanh_data_internal(%rip), %zmm15
112 vpternlogd $255, %zmm2, %zmm2, %zmm2
113 vandps _sAbsMask+__svml_stanh_data_internal(%rip), %zmm1, %zmm8
114 vandps _sSignMask+__svml_stanh_data_internal(%rip), %zmm1, %zmm0
115
116/* Here huge arguments, INF and NaNs are filtered out to callout. */
117 vpandd _iExpMantMask_UISA+__svml_stanh_data_internal(%rip), %zmm1, %zmm3
118 vpsubd _iMinIdxOfsMask_UISA+__svml_stanh_data_internal(%rip), %zmm3, %zmm4
119 vpcmpd $2, _iExpMask+__svml_stanh_data_internal(%rip), %zmm3, %k1
120
121/*
122 * small table specific variables *
123 * Constant loading
124 */
125 vpxord %zmm5, %zmm5, %zmm5
126
127/* if VMIN, VMAX is defined for I type */
128 vpmaxsd %zmm5, %zmm4, %zmm6
129 vpminsd _iMaxIdxMask_UISA+__svml_stanh_data_internal(%rip), %zmm6, %zmm7
130 vpsrld $21, %zmm7, %zmm10
131 vmovups _sP7+__svml_stanh_data_internal(%rip), %zmm4
132 vpermt2ps _sC+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm9
133 vpermt2ps _sP6+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm11
134 vpermt2ps _sP7+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm4
135 vpermt2ps _sP5+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm12
136 vpermt2ps _sP4+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm13
137 vpermt2ps _sP3+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm14
138 vpermt2ps _sP2+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm15
139 vpandnd %zmm3, %zmm3, %zmm2{%k1}
140 vptestmd %zmm2, %zmm2, %k0
141 vmovups _sP0+__svml_stanh_data_internal(%rip), %zmm3
142 vsubps {rn-sae}, %zmm9, %zmm8, %zmm2
143 kmovw %k0, %edx
144 vfmadd213ps {rn-sae}, %zmm11, %zmm2, %zmm4
145 vpermt2ps _sP0+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm3
146 vfmadd213ps {rn-sae}, %zmm12, %zmm2, %zmm4
147 vfmadd213ps {rn-sae}, %zmm13, %zmm2, %zmm4
148 vfmadd213ps {rn-sae}, %zmm14, %zmm2, %zmm4
149 vfmadd213ps {rn-sae}, %zmm15, %zmm2, %zmm4
150 vfmadd213ps {rn-sae}, %zmm3, %zmm2, %zmm4
151 vorps %zmm0, %zmm4, %zmm0
152 testl %edx, %edx
153
154/* Go to special inputs processing branch */
155 jne L(SPECIAL_VALUES_BRANCH)
156 # LOE rbx r12 r13 r14 r15 edx zmm0 zmm1
157
158/* Restore registers
159 * and exit the function
160 */
161
162L(EXIT):
163 movq %rbp, %rsp
164 popq %rbp
165 cfi_def_cfa(7, 8)
166 cfi_restore(6)
167 ret
168 cfi_def_cfa(6, 16)
169 cfi_offset(6, -16)
170
171/* Branch to process
172 * special inputs
173 */
174
175L(SPECIAL_VALUES_BRANCH):
176 vmovups %zmm1, 64(%rsp)
177 vmovups %zmm0, 128(%rsp)
178 # LOE rbx r12 r13 r14 r15 edx zmm0
179
180 xorl %eax, %eax
181 # LOE rbx r12 r13 r14 r15 eax edx
182
183 vzeroupper
184 movq %r12, 16(%rsp)
185 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
186 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
187 movl %eax, %r12d
188 movq %r13, 8(%rsp)
189 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
190 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
191 movl %edx, %r13d
192 movq %r14, (%rsp)
193 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
194 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
195 # LOE rbx r15 r12d r13d
196
197/* Range mask
198 * bits check
199 */
200
201L(RANGEMASK_CHECK):
202 btl %r12d, %r13d
203
204/* Call scalar math function */
205 jc L(SCALAR_MATH_CALL)
206 # LOE rbx r15 r12d r13d
207
208/* Special inputs
209 * processing loop
210 */
211
212L(SPECIAL_VALUES_LOOP):
213 incl %r12d
214 cmpl $16, %r12d
215
216/* Check bits in range mask */
217 jl L(RANGEMASK_CHECK)
218 # LOE rbx r15 r12d r13d
219
220 movq 16(%rsp), %r12
221 cfi_restore(12)
222 movq 8(%rsp), %r13
223 cfi_restore(13)
224 movq (%rsp), %r14
225 cfi_restore(14)
226 vmovups 128(%rsp), %zmm0
227
228/* Go to exit */
229 jmp L(EXIT)
230 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
231 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
232 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
233 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
234 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
235 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
236 # LOE rbx r12 r13 r14 r15 zmm0
237
238/* Scalar math fucntion call
239 * to process special input
240 */
241
242L(SCALAR_MATH_CALL):
243 movl %r12d, %r14d
244 movss 64(%rsp,%r14,4), %xmm0
245 call tanhf@PLT
246 # LOE rbx r14 r15 r12d r13d xmm0
247
248 movss %xmm0, 128(%rsp,%r14,4)
249
250/* Process special inputs in loop */
251 jmp L(SPECIAL_VALUES_LOOP)
252 # LOE rbx r15 r12d r13d
253END(_ZGVeN16v_tanhf_skx)
254
255 .section .rodata, "a"
256 .align 64
257
258#ifdef __svml_stanh_data_internal_typedef
259typedef unsigned int VUINT32;
260typedef struct
261{
262 __declspec(align(64)) VUINT32 _sC[32][1];
263 __declspec(align(64)) VUINT32 _sP0[32][1];
264 __declspec(align(64)) VUINT32 _sP2[32][1];
265 __declspec(align(64)) VUINT32 _sP3[32][1];
266 __declspec(align(64)) VUINT32 _sP4[32][1];
267 __declspec(align(64)) VUINT32 _sP5[32][1];
268 __declspec(align(64)) VUINT32 _sP6[32][1];
269 __declspec(align(64)) VUINT32 _sP7[32][1];
270 __declspec(align(64)) VUINT32 _iExpMantMask_UISA[16][1];
271 __declspec(align(64)) VUINT32 _iMinIdxOfsMask_UISA[16][1];
272 __declspec(align(64)) VUINT32 _iMaxIdxMask_UISA[16][1];
273 __declspec(align(64)) VUINT32 _sSignMask[16][1];
274 __declspec(align(64)) VUINT32 _sAbsMask[16][1];
275 __declspec(align(64)) VUINT32 _iExpMantMask[16][1];
276 __declspec(align(64)) VUINT32 _iExpMask[16][1];
277 __declspec(align(64)) VUINT32 _iMinIdxOfsMask[16][1];
278 __declspec(align(64)) VUINT32 _iMaxIdxMask[16][1];
279} __svml_stanh_data_internal;
280#endif
281__svml_stanh_data_internal:
282 /*== _sC ==*/
283 .long 0x00000000, 0x3d700000, 0x3d900000, 0x3db00000
284 .long 0x3dd00000, 0x3df00000, 0x3e100000, 0x3e300000
285 .long 0x3e500000, 0x3e700000, 0x3e900000, 0x3eb00000
286 .long 0x3ed00000, 0x3ef00000, 0x3f100000, 0x3f300000
287 .long 0x3f500000, 0x3f700000, 0x3f900000, 0x3fb00000
288 .long 0x3fd00000, 0x3ff00000, 0x40100000, 0x40300000
289 .long 0x40500000, 0x40700000, 0x40900000, 0x40b00000
290 .long 0x40d00000, 0x40f00000, 0x41100000, 0x00000000
291 /*== p0 ==*/
292 .align 64
293 .long 0x00000000, 0x3d6fb9c9, 0x3d8fc35f, 0x3daf9169
294 .long 0x3dcf49ab, 0x3deee849, 0x3e0f0ee8, 0x3e2e4984
295 .long 0x3e4d2f8e, 0x3e6bb32e, 0x3e8c51cd, 0x3ea96163
296 .long 0x3ec543f1, 0x3edfd735, 0x3f028438, 0x3f18abf0
297 .long 0x3f2bc480, 0x3f3bec1c, 0x3f4f2e5b, 0x3f613c53
298 .long 0x3f6ce37d, 0x3f743c4f, 0x3f7a5feb, 0x3f7dea85
299 .long 0x3f7f3b3d, 0x3f7fb78c, 0x3f7fefd4, 0x3f7ffdd0
300 .long 0x3f7fffb4, 0x3f7ffff6, 0x3f7fffff, 0x3f800000
301 /*== p2 ==*/
302 .align 64
303 .long 0x3f800000, 0x3f7f1f84, 0x3f7ebd11, 0x3f7e1e5f
304 .long 0x3f7d609f, 0x3f7c842d, 0x3f7b00e5, 0x3f789580
305 .long 0x3f75b8ad, 0x3f726fd9, 0x3f6cc59b, 0x3f63fb92
306 .long 0x3f59ff97, 0x3f4f11d7, 0x3f3d7573, 0x3f24f360
307 .long 0x3f0cbfe7, 0x3eec1a69, 0x3eb0a801, 0x3e6753a2
308 .long 0x3e132f1a, 0x3db7e7d3, 0x3d320845, 0x3c84d3d4
309 .long 0x3bc477b7, 0x3b10d3da, 0x3a01601e, 0x388c1a3b
310 .long 0x3717b0da, 0x35a43bce, 0x338306c6, 0x00000000
311 /*== p3 ==*/
312 .align 64
313 .long 0xb0343c7b, 0xbd6ee69d, 0xbd8f0da7, 0xbdae477d
314 .long 0xbdcd2a1f, 0xbdeba80d, 0xbe0c443b, 0xbe293cf3
315 .long 0xbe44f282, 0xbe5f3651, 0xbe81c7c0, 0xbe96d7ca
316 .long 0xbea7fb8e, 0xbeb50e9e, 0xbec12efe, 0xbec4be92
317 .long 0xbebce070, 0xbead510e, 0xbe8ef7d6, 0xbe4b8704
318 .long 0xbe083237, 0xbdaf7449, 0xbd2e1ec4, 0xbc83bf06
319 .long 0xbbc3e0b5, 0xbb10aadc, 0xba0157db, 0xb88c18f2
320 .long 0xb717b096, 0xb5a43bae, 0xb383012c, 0x00000000
321 /*== p4 ==*/
322 .align 64
323 .long 0xbeaaaaa5, 0xbeab0612, 0xbea7f01f, 0xbea4e120
324 .long 0xbea387b7, 0xbea15962, 0xbe9d57f7, 0xbe976b5a
325 .long 0xbe90230d, 0xbe880dff, 0xbe7479b3, 0xbe4c3d88
326 .long 0xbe212482, 0xbdeb8cba, 0xbd5e78ad, 0x3c6b5e6e
327 .long 0x3d839143, 0x3dc21ee1, 0x3de347af, 0x3dcbec96
328 .long 0x3d99ef2d, 0x3d542ea1, 0x3cdde701, 0x3c2cca67
329 .long 0x3b81cb27, 0x3ac073a1, 0x39ac3032, 0x383a94d9
330 .long 0x36ca081d, 0x355abd4c, 0x332b3cb6, 0x00000000
331 /*== p5 ==*/
332 .align 64
333 .long 0xb76dd6b9, 0xbe1c276d, 0x3c1dcf2f, 0x3dc1a78d
334 .long 0x3d96f985, 0x3da2b61b, 0x3dc13397, 0x3dd2f670
335 .long 0x3df48a0a, 0x3e06c5a8, 0x3e1a3aba, 0x3e27c405
336 .long 0x3e2e78d0, 0x3e2c3e44, 0x3e1d3097, 0x3df4a8f4
337 .long 0x3da38508, 0x3d31416a, 0x3b562657, 0xbcaeeac9
338 .long 0xbcce9419, 0xbcaaeac4, 0xbc49e7d0, 0xbba71ddd
339 .long 0xbb003b0e, 0xba3f9a05, 0xb92c08a7, 0xb7ba9232
340 .long 0xb64a0b0f, 0xb4dac169, 0xb2ab78ac, 0x00000000
341 /*== p6 ==*/
342 .align 64
343 .long 0x3e0910e9, 0x43761143, 0x4165ecdc, 0xc190f756
344 .long 0xc08c097d, 0xc02ba813, 0xbf7f6bda, 0x3f2b1dc0
345 .long 0x3ece105d, 0x3f426a94, 0xbadb0dc4, 0x3da43b17
346 .long 0xbd51ab88, 0xbcaea23d, 0xbd3b6d8d, 0xbd6caaad
347 .long 0xbd795bed, 0xbd5fddda, 0xbd038f3b, 0xbc1cad63
348 .long 0x3abb4766, 0x3b95f10b, 0x3b825873, 0x3afaea66
349 .long 0x3a49f878, 0x39996bf3, 0x388f3e6c, 0x371bb0e3
350 .long 0x35a8a5e6, 0x34369b17, 0x322487b0, 0x00000000
351 /*== p7 ==*/
352 .align 64
353 .long 0xbc0e2f66, 0x460bda12, 0x43d638ef, 0xc3e11c3e
354 .long 0xc2baa4e9, 0xc249da2d, 0xc1859b82, 0x40dd5b57
355 .long 0x40494640, 0x40c730a8, 0xbf0f160e, 0x3e30e76f
356 .long 0xbea81387, 0xbdb26a1c, 0xbd351e57, 0xbb4c01a0
357 .long 0x3c1d7bfb, 0x3c722cd1, 0x3c973f1c, 0x3c33a31b
358 .long 0x3b862ef4, 0x3a27b3d0, 0xba3b5907, 0xba0efc22
359 .long 0xb97f9f0f, 0xb8c8af50, 0xb7bdddfb, 0xb64f2950
360 .long 0xb4e085b1, 0xb3731dfa, 0xb15a1f04, 0x00000000
361 .align 64
362 .long 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000 /* _iExpMantMask_UISA */
363 .align 64
364 .long 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000 /* _iMinIdxOfsMask_UISA */
365 .align 64
366 .long 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000 /* _iMaxIdxMask_UISA */
367 .align 64
368 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 /* _sSignMask */
369 .align 64
370 .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff /* _sAbsMask */
371 .align 64
372 .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask */
373 .align 64
374 .long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 /* _iExpMask */
375 .align 64
376 .long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 /* _iMinIdxOfsMask */
377 .align 64
378 .long 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000 /* _iMaxIdxMask */
379 .align 64
380 .type __svml_stanh_data_internal,@object
381 .size __svml_stanh_data_internal,.-__svml_stanh_data_internal