[thirdparty/gcc.git] / libgcc / config / cris / arit.c

/* Signed and unsigned multiplication and division and modulus for CRIS.
   Contributed by Axis Communications.
   Written by Hans-Peter Nilsson <hp@axis.se>, c:a 1992.

   Copyright (C) 1998-2023 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.

You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */


/* Note that we provide prototypes for all "const" functions, to attach
   the const attribute.  This is necessary in 2.7.2 - adding the
   attribute to the function *definition* is a syntax error.
    This did not work with e.g. 2.1; back then, the return type had to
   be "const".  */

#include "config.h"

#if defined (__CRIS_arch_version) && __CRIS_arch_version >= 3
#define LZ(v) __builtin_clz (v)
#endif

/* In (at least) the 4.7 series, GCC doesn't automatically choose the
   most optimal strategy, possibly related to insufficient modelling of
   delay-slot costs.  */
#if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
#define SIGNMULT(s, a) ((s) * (a)) /* Cheap multiplication, better than branch.  */
#else
#define SIGNMULT(s, a) ((s) < 0 ? -(a) : (a)) /* Branches are still better.  */
#endif

#if defined (L_udivsi3) || defined (L_divsi3) || defined (L_umodsi3) \
    || defined (L_modsi3)
/* Result type of divmod worker function.  */
struct quot_rem
 {
   long quot;
   long rem;
 };

/* This is the worker function for div and mod.  It is inlined into the
   respective library function.  Parameter A must have bit 31 == 0.  */

static __inline__ struct quot_rem
do_31div (unsigned long a, unsigned long b)
     __attribute__ ((__const__, __always_inline__));

static __inline__ struct quot_rem
do_31div (unsigned long a, unsigned long b)
{
  /* Adjust operands and result if a is 31 bits.  */
  long extra = 0;
  int quot_digits = 0;

  if (b == 0)
    {
      struct quot_rem ret;
      ret.quot = 0xffffffff;
      ret.rem = 0xffffffff;
      return ret;
    }

  if (a < b)
    return (struct quot_rem) { 0, a };

#ifdef LZ
  if (b <= a)
    {
      quot_digits = LZ (b) - LZ (a);
      quot_digits += (a >= (b << quot_digits));
      b <<= quot_digits;
    }
#else
  while (b <= a)
    {
      b <<= 1;
      quot_digits++;
    }
#endif

  /* Is a 31 bits?  Note that bit 31 is handled by the caller.  */
  if (a & 0x40000000)
    {
      /* Then make b:s highest bit max 0x40000000, because it must have
	 been 0x80000000 to be 1 bit higher than a.  */
      b >>= 1;

      /* Adjust a to be maximum 0x3fffffff, i.e. two upper bits zero.  */
      if (a >= b)
	{
	  a -= b;
	  extra = 1 << (quot_digits - 1);
	}
      else
	{
	  a -= b >> 1;

	  /* Remember that we adjusted a by subtracting b * 2 ** Something.  */
	  extra = 1 << quot_digits;
	}

      /* The number of quotient digits will be one less, because
	 we just adjusted b.  */
      quot_digits--;
    }

  /* Now do the division part.  */

  /* Subtract b and add ones to the right when a >= b
     i.e. "a - (b - 1) == (a - b) + 1".  */
  b--;

#define DS __asm__ ("dstep %2,%0" : "=r" (a) : "0" (a), "r" (b)); \
 __attribute__ ((__fallthrough__))

  switch (quot_digits)
    {
    case 32: DS; case 31: DS; case 30: DS; case 29: DS;
    case 28: DS; case 27: DS; case 26: DS; case 25: DS;
    case 24: DS; case 23: DS; case 22: DS; case 21: DS;
    case 20: DS; case 19: DS; case 18: DS; case 17: DS;
    case 16: DS; case 15: DS; case 14: DS; case 13: DS;
    case 12: DS; case 11: DS; case 10: DS; case 9: DS;
    case 8: DS; case 7: DS; case 6: DS; case 5: DS;
    case 4: DS; case 3: DS; case 2: DS; case 1: DS;
    case 0:;
    }

  {
    struct quot_rem ret;
    ret.quot = (a & ((1 << quot_digits) - 1)) + extra;
    ret.rem = a >> quot_digits;
    return ret;
  }
}

#ifdef L_udivsi3
unsigned long
__Udiv (unsigned long a, unsigned long b) __attribute__ ((__const__));

unsigned long
__Udiv (unsigned long a, unsigned long b)
{
  long extra = 0;

  /* Adjust operands and result, if a and/or b is 32 bits.  */
  /* Effectively: b & 0x80000000.  */
  if ((long) b < 0)
    return a >= b;

  /* Effectively: a & 0x80000000.  */
  if ((long) a < 0)
    {
      int tmp = 0;

      if (b == 0)
	return 0xffffffff;
#ifdef LZ
      tmp = LZ (b);
#else
      for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
	;

      tmp = 31 - tmp;
#endif

      if ((b << tmp) > a)
	{
	  extra = 1 << (tmp-1);
	  a -= b << (tmp - 1);
	}
      else
	{
	  extra = 1 << tmp;
	  a -= b << tmp;
	}
    }

  return do_31div (a, b).quot+extra;
}
#endif /* L_udivsi3 */

#ifdef L_divsi3
long
__Div (long a, long b) __attribute__ ((__const__));

long
__Div (long a, long b)
{
  long extra = 0;
  long sign = (b < 0) ? -1 : 1;
  long res;

  /* We need to handle a == -2147483648 as expected and must while
     doing that avoid producing a sequence like "abs (a) < 0" as GCC
     may optimize out the test.  That sequence may not be obvious as
     we call inline functions.  Testing for a being negative and
     handling (presumably much rarer than positive) enables us to get
     a bit of optimization for an (accumulated) reduction of the
     penalty of the 0x80000000 special-case.  */
  if (a < 0)
    {
      sign = -sign;

      if ((a & 0x7fffffff) == 0)
	{
	  /* We're at 0x80000000.  Tread carefully.  */
	  a -= SIGNMULT (sign, b);
	  extra = sign;
	}
      a = -a;
    }

  res = do_31div (a, __builtin_labs (b)).quot;
  return SIGNMULT (sign, res) + extra;
}
#endif /* L_divsi3 */


#ifdef L_umodsi3
unsigned long
__Umod (unsigned long a, unsigned long b) __attribute__ ((__const__));

unsigned long
__Umod (unsigned long a, unsigned long b)
{
  /* Adjust operands and result if a and/or b is 32 bits.  */
  if ((long) b < 0)
    return a >= b ? a - b : a;

  if ((long) a < 0)
    {
      int tmp = 0;

      if (b == 0)
	return a;
#ifdef LZ
      tmp = LZ (b);
#else
      for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
	;
      tmp = 31 - tmp;
#endif

      if ((b << tmp) > a)
	{
	  a -= b << (tmp - 1);
	}
      else
	{
	  a -= b << tmp;
	}
    }

  return do_31div (a, b).rem;
}
#endif /* L_umodsi3 */

#ifdef L_modsi3
long
__Mod (long a, long b) __attribute__ ((__const__));

long
__Mod (long a, long b)
{
  long sign = 1;
  long res;

  /* We need to handle a == -2147483648 as expected and must while
     doing that avoid producing a sequence like "abs (a) < 0" as GCC
     may optimize out the test.  That sequence may not be obvious as
     we call inline functions.  Testing for a being negative and
     handling (presumably much rarer than positive) enables us to get
     a bit of optimization for an (accumulated) reduction of the
     penalty of the 0x80000000 special-case.  */
  if (a < 0)
    {
      sign = -1;
      if ((a & 0x7fffffff) == 0)
	/* We're at 0x80000000.  Tread carefully.  */
	a += __builtin_labs (b);
      a = -a;
    }

  res = do_31div (a, __builtin_labs (b)).rem;
  return SIGNMULT (sign, res);
}
#endif /* L_modsi3 */
#endif /* L_udivsi3 || L_divsi3 || L_umodsi3 || L_modsi3 */

/*
 * Local variables:
 * eval: (c-set-style "gnu")
 * indent-tabs-mode: t
 * End:
 */
Commit	Line	Data
0b85d816 HPN	1	/* Signed and unsigned multiplication and division and modulus for CRIS.
	2	Contributed by Axis Communications.
	3	Written by Hans-Peter Nilsson <hp@axis.se>, c:a 1992.
	4
83ffe9cd	5	Copyright (C) 1998-2023 Free Software Foundation, Inc.
0b85d816 HPN	6
	7	This file is part of GCC.
	8
	9	GCC is free software; you can redistribute it and/or modify it
	10	under the terms of the GNU General Public License as published by the
748086b7	11	Free Software Foundation; either version 3, or (at your option) any
0b85d816 HPN	12	later version.
0b85d816 HPN	13
0b85d816 HPN	14	This file is distributed in the hope that it will be useful, but
	15	WITHOUT ANY WARRANTY; without even the implied warranty of
	16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	17	General Public License for more details.
	18
748086b7 JJ	19	Under Section 7 of GPL version 3, you are granted additional
	20	permissions described in the GCC Runtime Library Exception, version
	21	3.1, as published by the Free Software Foundation.
	22
	23	You should have received a copy of the GNU General Public License and
	24	a copy of the GCC Runtime Library Exception along with this program;
	25	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	26	<http://www.gnu.org/licenses/>. */
0b85d816 HPN	27
	28
	29	/* Note that we provide prototypes for all "const" functions, to attach
	30	the const attribute. This is necessary in 2.7.2 - adding the
	31	attribute to the function definition is a syntax error.
	32	This did not work with e.g. 2.1; back then, the return type had to
	33	be "const". */
	34
	35	#include "config.h"
	36
	37	#if defined (__CRIS_arch_version) && __CRIS_arch_version >= 3
e636e508	38	#define LZ(v) __builtin_clz (v)
0b85d816 HPN	39	#endif
0b85d816 HPN	40
0e499e75 HPN	41	/* In (at least) the 4.7 series, GCC doesn't automatically choose the
	42	most optimal strategy, possibly related to insufficient modelling of
	43	delay-slot costs. */
	44	#if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
	45	#define SIGNMULT(s, a) ((s) * (a)) /* Cheap multiplication, better than branch. */
	46	#else
	47	#define SIGNMULT(s, a) ((s) < 0 ? -(a) : (a)) /* Branches are still better. */
	48	#endif
0b85d816 HPN	49
	50	#if defined (L_udivsi3) \|\| defined (L_divsi3) \|\| defined (L_umodsi3) \
	51	\|\| defined (L_modsi3)
	52	/* Result type of divmod worker function. */
	53	struct quot_rem
	54	{
	55	long quot;
	56	long rem;
	57	};
	58
	59	/* This is the worker function for div and mod. It is inlined into the
01ad6816 HPN	60	respective library function. Parameter A must have bit 31 == 0. */
01ad6816 HPN	61
0b85d816	62	static __inline__ struct quot_rem
807b20b0 HPN	63	do_31div (unsigned long a, unsigned long b)
807b20b0 HPN	64	__attribute__ ((__const__, __always_inline__));
0b85d816 HPN	65
	66	static __inline__ struct quot_rem
	67	do_31div (unsigned long a, unsigned long b)
	68	{
	69	/* Adjust operands and result if a is 31 bits. */
	70	long extra = 0;
	71	int quot_digits = 0;
	72
	73	if (b == 0)
	74	{
	75	struct quot_rem ret;
	76	ret.quot = 0xffffffff;
	77	ret.rem = 0xffffffff;
	78	return ret;
	79	}
	80
	81	if (a < b)
	82	return (struct quot_rem) { 0, a };
	83
	84	#ifdef LZ
	85	if (b <= a)
	86	{
	87	quot_digits = LZ (b) - LZ (a);
	88	quot_digits += (a >= (b << quot_digits));
	89	b <<= quot_digits;
	90	}
	91	#else
	92	while (b <= a)
	93	{
	94	b <<= 1;
	95	quot_digits++;
	96	}
	97	#endif
	98
	99	/* Is a 31 bits? Note that bit 31 is handled by the caller. */
	100	if (a & 0x40000000)
	101	{
	102	/* Then make b:s highest bit max 0x40000000, because it must have
	103	been 0x80000000 to be 1 bit higher than a. */
	104	b >>= 1;
	105
	106	/* Adjust a to be maximum 0x3fffffff, i.e. two upper bits zero. */
	107	if (a >= b)
	108	{
	109	a -= b;
	110	extra = 1 << (quot_digits - 1);
	111	}
	112	else
	113	{
	114	a -= b >> 1;
	115
	116	/* Remember that we adjusted a by subtracting b * 2 ** Something. */
	117	extra = 1 << quot_digits;
	118	}
	119
	120	/* The number of quotient digits will be one less, because
	121	we just adjusted b. */
	122	quot_digits--;
	123	}
	124
	125	/* Now do the division part. */
	126
	127	/* Subtract b and add ones to the right when a >= b
	128	i.e. "a - (b - 1) == (a - b) + 1". */
129	b--;
130
463f6499 HPN	131	#define DS __asm__ ("dstep %2,%0" : "=r" (a) : "0" (a), "r" (b)); \
463f6499 HPN	132	__attribute__ ((__fallthrough__))
0b85d816 HPN	133
	134	switch (quot_digits)
	135	{
	136	case 32: DS; case 31: DS; case 30: DS; case 29: DS;
	137	case 28: DS; case 27: DS; case 26: DS; case 25: DS;
	138	case 24: DS; case 23: DS; case 22: DS; case 21: DS;
	139	case 20: DS; case 19: DS; case 18: DS; case 17: DS;
	140	case 16: DS; case 15: DS; case 14: DS; case 13: DS;
	141	case 12: DS; case 11: DS; case 10: DS; case 9: DS;
	142	case 8: DS; case 7: DS; case 6: DS; case 5: DS;
	143	case 4: DS; case 3: DS; case 2: DS; case 1: DS;
	144	case 0:;
	145	}
	146
	147	{
	148	struct quot_rem ret;
	149	ret.quot = (a & ((1 << quot_digits) - 1)) + extra;
	150	ret.rem = a >> quot_digits;
	151	return ret;
	152	}
	153	}
	154
01ad6816	155	#ifdef L_udivsi3
0b85d816	156	unsigned long
01ad6816	157	__Udiv (unsigned long a, unsigned long b) __attribute__ ((__const__));
0b85d816	158
0b85d816 HPN	159	unsigned long
	160	__Udiv (unsigned long a, unsigned long b)
	161	{
	162	long extra = 0;
	163
	164	/* Adjust operands and result, if a and/or b is 32 bits. */
	165	/* Effectively: b & 0x80000000. */
	166	if ((long) b < 0)
	167	return a >= b;
	168
	169	/* Effectively: a & 0x80000000. */
	170	if ((long) a < 0)
	171	{
	172	int tmp = 0;
	173
	174	if (b == 0)
	175	return 0xffffffff;
	176	#ifdef LZ
	177	tmp = LZ (b);
	178	#else
	179	for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
	180	;
	181
	182	tmp = 31 - tmp;
	183	#endif
	184
	185	if ((b << tmp) > a)
	186	{
	187	extra = 1 << (tmp-1);
	188	a -= b << (tmp - 1);
	189	}
	190	else
	191	{
	192	extra = 1 << tmp;
	193	a -= b << tmp;
	194	}
	195	}
	196
	197	return do_31div (a, b).quot+extra;
	198	}
01ad6816	199	#endif /* L_udivsi3 */
0b85d816 HPN	200
	201	#ifdef L_divsi3
	202	long
	203	__Div (long a, long b) __attribute__ ((__const__));
	204
	205	long
	206	__Div (long a, long b)
	207	{
01ad6816 HPN	208	long extra = 0;
01ad6816 HPN	209	long sign = (b < 0) ? -1 : 1;
0e499e75	210	long res;
01ad6816 HPN	211
	212	/* We need to handle a == -2147483648 as expected and must while
	213	doing that avoid producing a sequence like "abs (a) < 0" as GCC
	214	may optimize out the test. That sequence may not be obvious as
	215	we call inline functions. Testing for a being negative and
	216	handling (presumably much rarer than positive) enables us to get
	217	a bit of optimization for an (accumulated) reduction of the
	218	penalty of the 0x80000000 special-case. */
	219	if (a < 0)
	220	{
	221	sign = -sign;
0b85d816	222
01ad6816 HPN	223	if ((a & 0x7fffffff) == 0)
	224	{
	225	/* We're at 0x80000000. Tread carefully. */
0e499e75	226	a -= SIGNMULT (sign, b);
01ad6816 HPN	227	extra = sign;
	228	}
	229	a = -a;
	230	}
0b85d816	231
0e499e75 HPN	232	res = do_31div (a, __builtin_labs (b)).quot;
0e499e75 HPN	233	return SIGNMULT (sign, res) + extra;
0b85d816 HPN	234	}
0b85d816 HPN	235	#endif /* L_divsi3 */
0b85d816 HPN	236
0b85d816 HPN	237
01ad6816	238	#ifdef L_umodsi3
0b85d816	239	unsigned long
01ad6816	240	__Umod (unsigned long a, unsigned long b) __attribute__ ((__const__));
0b85d816	241
0b85d816 HPN	242	unsigned long
	243	__Umod (unsigned long a, unsigned long b)
	244	{
	245	/* Adjust operands and result if a and/or b is 32 bits. */
	246	if ((long) b < 0)
	247	return a >= b ? a - b : a;
	248
	249	if ((long) a < 0)
	250	{
	251	int tmp = 0;
	252
	253	if (b == 0)
	254	return a;
	255	#ifdef LZ
	256	tmp = LZ (b);
	257	#else
	258	for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
	259	;
	260	tmp = 31 - tmp;
	261	#endif
	262
	263	if ((b << tmp) > a)
	264	{
	265	a -= b << (tmp - 1);
	266	}
	267	else
	268	{
	269	a -= b << tmp;
	270	}
	271	}
	272
	273	return do_31div (a, b).rem;
	274	}
01ad6816	275	#endif /* L_umodsi3 */
0b85d816 HPN	276
	277	#ifdef L_modsi3
	278	long
	279	__Mod (long a, long b) __attribute__ ((__const__));
	280
	281	long
	282	__Mod (long a, long b)
	283	{
01ad6816	284	long sign = 1;
0e499e75	285	long res;
01ad6816 HPN	286
	287	/* We need to handle a == -2147483648 as expected and must while
	288	doing that avoid producing a sequence like "abs (a) < 0" as GCC
	289	may optimize out the test. That sequence may not be obvious as
	290	we call inline functions. Testing for a being negative and
	291	handling (presumably much rarer than positive) enables us to get
	292	a bit of optimization for an (accumulated) reduction of the
	293	penalty of the 0x80000000 special-case. */
	294	if (a < 0)
	295	{
	296	sign = -1;
	297	if ((a & 0x7fffffff) == 0)
	298	/* We're at 0x80000000. Tread carefully. */
	299	a += __builtin_labs (b);
	300	a = -a;
	301	}
0b85d816	302
0e499e75 HPN	303	res = do_31div (a, __builtin_labs (b)).rem;
0e499e75 HPN	304	return SIGNMULT (sign, res);
0b85d816 HPN	305	}
0b85d816 HPN	306	#endif /* L_modsi3 */
0b85d816 HPN	307	#endif /* L_udivsi3 \|\| L_divsi3 \|\| L_umodsi3 \|\| L_modsi3 */
	308
	309	/*
	310	* Local variables:
	311	* eval: (c-set-style "gnu")
	312	* indent-tabs-mode: t
	313	* End:
	314	*/