[thirdparty/gcc.git] / libgfortran / generated / matmul_r16.c

/* Implementation of the MATMUL intrinsic
   Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
   Contributed by Paul Brook <paul@nowt.org>

This file is part of the GNU Fortran 95 runtime library (libgfortran).

Libgfortran is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.

In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file.  (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)

Libgfortran is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public
License along with libgfortran; see the file COPYING.  If not,
write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.  */

#include "config.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "libgfortran.h"

#if defined (HAVE_GFC_REAL_16)

/* The order of loops is different in the case of plain matrix
   multiplication C=MATMUL(A,B), and in the frequent special case where
   the argument A is the temporary result of a TRANSPOSE intrinsic:
   C=MATMUL(TRANSPOSE(A),B).  Transposed temporaries are detected by
   looking at their strides.

   The equivalent Fortran pseudo-code is:

   DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
   IF (.NOT.IS_TRANSPOSED(A)) THEN
     C = 0
     DO J=1,N
       DO K=1,COUNT
         DO I=1,M
           C(I,J) = C(I,J)+A(I,K)*B(K,J)
   ELSE
     DO J=1,N
       DO I=1,M
         S = 0
         DO K=1,COUNT
           S = S+A(I,K)+B(K,J)
         C(I,J) = S
   ENDIF
*/

extern void matmul_r16 (gfc_array_r16 * const restrict retarray, 
	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b);
export_proto(matmul_r16);

void
matmul_r16 (gfc_array_r16 * const restrict retarray, 
	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b)
{
  const GFC_REAL_16 * restrict abase;
  const GFC_REAL_16 * restrict bbase;
  GFC_REAL_16 * restrict dest;

  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
  index_type x, y, n, count, xcount, ycount;

  assert (GFC_DESCRIPTOR_RANK (a) == 2
          || GFC_DESCRIPTOR_RANK (b) == 2);

/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]

   Either A or B (but not both) can be rank 1:

   o One-dimensional argument A is implicitly treated as a row matrix
     dimensioned [1,count], so xcount=1.

   o One-dimensional argument B is implicitly treated as a column matrix
     dimensioned [count, 1], so ycount=1.
  */

  if (retarray->data == NULL)
    {
      if (GFC_DESCRIPTOR_RANK (a) == 1)
        {
          retarray->dim[0].lbound = 0;
          retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
          retarray->dim[0].stride = 1;
        }
      else if (GFC_DESCRIPTOR_RANK (b) == 1)
        {
          retarray->dim[0].lbound = 0;
          retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
          retarray->dim[0].stride = 1;
        }
      else
        {
          retarray->dim[0].lbound = 0;
          retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
          retarray->dim[0].stride = 1;

          retarray->dim[1].lbound = 0;
          retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
          retarray->dim[1].stride = retarray->dim[0].ubound+1;
        }

      retarray->data
	= internal_malloc_size (sizeof (GFC_REAL_16) * size0 ((array_t *) retarray));
      retarray->offset = 0;
    }


  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
    {
      /* One-dimensional result may be addressed in the code below
	 either as a row or a column matrix. We want both cases to
	 work. */
      rxstride = rystride = retarray->dim[0].stride;
    }
  else
    {
      rxstride = retarray->dim[0].stride;
      rystride = retarray->dim[1].stride;
    }


  if (GFC_DESCRIPTOR_RANK (a) == 1)
    {
      /* Treat it as a a row matrix A[1,count]. */
      axstride = a->dim[0].stride;
      aystride = 1;

      xcount = 1;
      count = a->dim[0].ubound + 1 - a->dim[0].lbound;
    }
  else
    {
      axstride = a->dim[0].stride;
      aystride = a->dim[1].stride;

      count = a->dim[1].ubound + 1 - a->dim[1].lbound;
      xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
    }

  assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);

  if (GFC_DESCRIPTOR_RANK (b) == 1)
    {
      /* Treat it as a column matrix B[count,1] */
      bxstride = b->dim[0].stride;

      /* bystride should never be used for 1-dimensional b.
	 in case it is we want it to cause a segfault, rather than
	 an incorrect result. */
      bystride = 0xDEADBEEF;
      ycount = 1;
    }
  else
    {
      bxstride = b->dim[0].stride;
      bystride = b->dim[1].stride;
      ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
    }

  abase = a->data;
  bbase = b->data;
  dest = retarray->data;

  if (rxstride == 1 && axstride == 1 && bxstride == 1)
    {
      const GFC_REAL_16 * restrict bbase_y;
      GFC_REAL_16 * restrict dest_y;
      const GFC_REAL_16 * restrict abase_n;
      GFC_REAL_16 bbase_yn;

      if (rystride == xcount)
	memset (dest, 0, (sizeof (GFC_REAL_16) * xcount * ycount));
      else
	{
	  for (y = 0; y < ycount; y++)
	    for (x = 0; x < xcount; x++)
	      dest[x + y*rystride] = (GFC_REAL_16)0;
	}

      for (y = 0; y < ycount; y++)
	{
	  bbase_y = bbase + y*bystride;
	  dest_y = dest + y*rystride;
	  for (n = 0; n < count; n++)
	    {
	      abase_n = abase + n*aystride;
	      bbase_yn = bbase_y[n];
	      for (x = 0; x < xcount; x++)
		{
		  dest_y[x] += abase_n[x] * bbase_yn;
		}
	    }
	}
    }
  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
    {
      const GFC_REAL_16 *restrict abase_x;
      const GFC_REAL_16 *restrict bbase_y;
      GFC_REAL_16 *restrict dest_y;
      GFC_REAL_16 s;

      for (y = 0; y < ycount; y++)
	{
	  bbase_y = &bbase[y*bystride];
	  dest_y = &dest[y*rystride];
	  for (x = 0; x < xcount; x++)
	    {
	      abase_x = &abase[x*axstride];
	      s = (GFC_REAL_16) 0;
	      for (n = 0; n < count; n++)
		s += abase_x[n] * bbase_y[n];
	      dest_y[x] = s;
	    }
	}
    }
  else if (axstride < aystride)
    {
      for (y = 0; y < ycount; y++)
	for (x = 0; x < xcount; x++)
	  dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;

      for (y = 0; y < ycount; y++)
	for (n = 0; n < count; n++)
	  for (x = 0; x < xcount; x++)
	    /* dest[x,y] += a[x,n] * b[n,y] */
	    dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
    }
  else
    {
      const GFC_REAL_16 *restrict abase_x;
      const GFC_REAL_16 *restrict bbase_y;
      GFC_REAL_16 *restrict dest_y;
      GFC_REAL_16 s;

      for (y = 0; y < ycount; y++)
	{
	  bbase_y = &bbase[y*bystride];
	  dest_y = &dest[y*rystride];
	  for (x = 0; x < xcount; x++)
	    {
	      abase_x = &abase[x*axstride];
	      s = (GFC_REAL_16) 0;
	      for (n = 0; n < count; n++)
		s += abase_x[n*aystride] * bbase_y[n*bxstride];
	      dest_y[x*rxstride] = s;
	    }
	}
    }
}

#endif
Commit	Line	Data
644cb69f	1	/* Implementation of the MATMUL intrinsic
6ff24d45	2	Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
644cb69f FXC	3	Contributed by Paul Brook <paul@nowt.org>
	4
	5	This file is part of the GNU Fortran 95 runtime library (libgfortran).
	6
	7	Libgfortran is free software; you can redistribute it and/or
	8	modify it under the terms of the GNU General Public
	9	License as published by the Free Software Foundation; either
	10	version 2 of the License, or (at your option) any later version.
	11
	12	In addition to the permissions in the GNU General Public License, the
	13	Free Software Foundation gives you unlimited permission to link the
	14	compiled version of this file into combinations with other programs,
	15	and to distribute those combinations without any restriction coming
	16	from the use of this file. (The General Public License restrictions
	17	do apply in other respects; for example, they cover modification of
	18	the file, and distribution when not linked into a combine
	19	executable.)
	20
	21	Libgfortran is distributed in the hope that it will be useful,
	22	but WITHOUT ANY WARRANTY; without even the implied warranty of
	23	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	24	GNU General Public License for more details.
	25
	26	You should have received a copy of the GNU General Public
	27	License along with libgfortran; see the file COPYING. If not,
	28	write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	29	Boston, MA 02110-1301, USA. */
	30
	31	#include "config.h"
	32	#include <stdlib.h>
	33	#include <string.h>
	34	#include <assert.h>
	35	#include "libgfortran.h"
	36
	37	#if defined (HAVE_GFC_REAL_16)
	38
1524f80b RS	39	/* The order of loops is different in the case of plain matrix
	40	multiplication C=MATMUL(A,B), and in the frequent special case where
	41	the argument A is the temporary result of a TRANSPOSE intrinsic:
	42	C=MATMUL(TRANSPOSE(A),B). Transposed temporaries are detected by
	43	looking at their strides.
	44
	45	The equivalent Fortran pseudo-code is:
644cb69f FXC	46
644cb69f FXC	47	DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
1524f80b RS	48	IF (.NOT.IS_TRANSPOSED(A)) THEN
	49	C = 0
	50	DO J=1,N
	51	DO K=1,COUNT
	52	DO I=1,M
	53	C(I,J) = C(I,J)+A(I,K)*B(K,J)
	54	ELSE
	55	DO J=1,N
644cb69f	56	DO I=1,M
1524f80b RS	57	S = 0
	58	DO K=1,COUNT
	59	S = S+A(I,K)+B(K,J)
	60	C(I,J) = S
	61	ENDIF
644cb69f FXC	62	*/
644cb69f FXC	63
85206901 JB	64	extern void matmul_r16 (gfc_array_r16 * const restrict retarray,
85206901 JB	65	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b);
644cb69f FXC	66	export_proto(matmul_r16);
	67
	68	void
85206901 JB	69	matmul_r16 (gfc_array_r16 * const restrict retarray,
85206901 JB	70	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b)
644cb69f	71	{
85206901 JB	72	const GFC_REAL_16 * restrict abase;
	73	const GFC_REAL_16 * restrict bbase;
	74	GFC_REAL_16 * restrict dest;
644cb69f FXC	75
	76	index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
	77	index_type x, y, n, count, xcount, ycount;
	78
	79	assert (GFC_DESCRIPTOR_RANK (a) == 2
	80	\|\| GFC_DESCRIPTOR_RANK (b) == 2);
	81
	82	/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
	83
	84	Either A or B (but not both) can be rank 1:
	85
	86	o One-dimensional argument A is implicitly treated as a row matrix
	87	dimensioned [1,count], so xcount=1.
	88
	89	o One-dimensional argument B is implicitly treated as a column matrix
	90	dimensioned [count, 1], so ycount=1.
	91	*/
	92
	93	if (retarray->data == NULL)
	94	{
	95	if (GFC_DESCRIPTOR_RANK (a) == 1)
	96	{
	97	retarray->dim[0].lbound = 0;
	98	retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
	99	retarray->dim[0].stride = 1;
	100	}
	101	else if (GFC_DESCRIPTOR_RANK (b) == 1)
	102	{
	103	retarray->dim[0].lbound = 0;
	104	retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
	105	retarray->dim[0].stride = 1;
	106	}
	107	else
	108	{
	109	retarray->dim[0].lbound = 0;
	110	retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
	111	retarray->dim[0].stride = 1;
	112
	113	retarray->dim[1].lbound = 0;
	114	retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
	115	retarray->dim[1].stride = retarray->dim[0].ubound+1;
	116	}
	117
	118	retarray->data
	119	= internal_malloc_size (sizeof (GFC_REAL_16) * size0 ((array_t *) retarray));
	120	retarray->offset = 0;
	121	}
	122
644cb69f FXC	123
	124	if (GFC_DESCRIPTOR_RANK (retarray) == 1)
	125	{
	126	/* One-dimensional result may be addressed in the code below
	127	either as a row or a column matrix. We want both cases to
	128	work. */
	129	rxstride = rystride = retarray->dim[0].stride;
	130	}
	131	else
	132	{
	133	rxstride = retarray->dim[0].stride;
	134	rystride = retarray->dim[1].stride;
	135	}
	136
	137
	138	if (GFC_DESCRIPTOR_RANK (a) == 1)
	139	{
	140	/* Treat it as a a row matrix A[1,count]. */
	141	axstride = a->dim[0].stride;
	142	aystride = 1;
	143
	144	xcount = 1;
	145	count = a->dim[0].ubound + 1 - a->dim[0].lbound;
	146	}
	147	else
	148	{
	149	axstride = a->dim[0].stride;
	150	aystride = a->dim[1].stride;
	151
	152	count = a->dim[1].ubound + 1 - a->dim[1].lbound;
	153	xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
	154	}
	155
	156	assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
	157
	158	if (GFC_DESCRIPTOR_RANK (b) == 1)
	159	{
	160	/* Treat it as a column matrix B[count,1] */
	161	bxstride = b->dim[0].stride;
	162
	163	/* bystride should never be used for 1-dimensional b.
	164	in case it is we want it to cause a segfault, rather than
	165	an incorrect result. */
	166	bystride = 0xDEADBEEF;
	167	ycount = 1;
	168	}
	169	else
	170	{
	171	bxstride = b->dim[0].stride;
	172	bystride = b->dim[1].stride;
	173	ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
	174	}
	175
	176	abase = a->data;
	177	bbase = b->data;
	178	dest = retarray->data;
	179
	180	if (rxstride == 1 && axstride == 1 && bxstride == 1)
	181	{
85206901 JB	182	const GFC_REAL_16 * restrict bbase_y;
	183	GFC_REAL_16 * restrict dest_y;
	184	const GFC_REAL_16 * restrict abase_n;
644cb69f FXC	185	GFC_REAL_16 bbase_yn;
644cb69f FXC	186
1633cb7c FXC	187	if (rystride == xcount)
1633cb7c FXC	188	memset (dest, 0, (sizeof (GFC_REAL_16) * xcount * ycount));
644cb69f FXC	189	else
	190	{
	191	for (y = 0; y < ycount; y++)
	192	for (x = 0; x < xcount; x++)
	193	dest[x + y*rystride] = (GFC_REAL_16)0;
	194	}
	195
	196	for (y = 0; y < ycount; y++)
	197	{
	198	bbase_y = bbase + y*bystride;
	199	dest_y = dest + y*rystride;
	200	for (n = 0; n < count; n++)
	201	{
	202	abase_n = abase + n*aystride;
	203	bbase_yn = bbase_y[n];
	204	for (x = 0; x < xcount; x++)
	205	{
	206	dest_y[x] += abase_n[x] * bbase_yn;
	207	}
	208	}
	209	}
	210	}
1524f80b RS	211	else if (rxstride == 1 && aystride == 1 && bxstride == 1)
	212	{
	213	const GFC_REAL_16 *restrict abase_x;
	214	const GFC_REAL_16 *restrict bbase_y;
	215	GFC_REAL_16 *restrict dest_y;
	216	GFC_REAL_16 s;
	217
	218	for (y = 0; y < ycount; y++)
	219	{
	220	bbase_y = &bbase[y*bystride];
	221	dest_y = &dest[y*rystride];
	222	for (x = 0; x < xcount; x++)
	223	{
	224	abase_x = &abase[x*axstride];
	225	s = (GFC_REAL_16) 0;
	226	for (n = 0; n < count; n++)
	227	s += abase_x[n] * bbase_y[n];
	228	dest_y[x] = s;
	229	}
	230	}
	231	}
	232	else if (axstride < aystride)
644cb69f FXC	233	{
	234	for (y = 0; y < ycount; y++)
	235	for (x = 0; x < xcount; x++)
	236	dest[xrxstride + yrystride] = (GFC_REAL_16)0;
	237
	238	for (y = 0; y < ycount; y++)
	239	for (n = 0; n < count; n++)
	240	for (x = 0; x < xcount; x++)
	241	/* dest[x,y] += a[x,n] * b[n,y] */
	242	dest[xrxstride + yrystride] += abase[xaxstride + naystride] * bbase[nbxstride + ybystride];
	243	}
1524f80b RS	244	else
	245	{
	246	const GFC_REAL_16 *restrict abase_x;
	247	const GFC_REAL_16 *restrict bbase_y;
	248	GFC_REAL_16 *restrict dest_y;
	249	GFC_REAL_16 s;
	250
	251	for (y = 0; y < ycount; y++)
	252	{
	253	bbase_y = &bbase[y*bystride];
	254	dest_y = &dest[y*rystride];
	255	for (x = 0; x < xcount; x++)
	256	{
	257	abase_x = &abase[x*axstride];
	258	s = (GFC_REAL_16) 0;
	259	for (n = 0; n < count; n++)
	260	s += abase_x[naystride] bbase_y[n*bxstride];
	261	dest_y[x*rxstride] = s;
	262	}
	263	}
	264	}
644cb69f FXC	265	}
	266
	267	#endif