From: Thomas Koenig <tkoenig@gcc.gnu.org>
Date: Sun, 30 Aug 2009 09:02:01 +0000 (+0000)
Subject: pack.m4 (pack_'rtype_code`): Use count_0 for counting true values in a logical array.
X-Git-Tag: releases/gcc-4.5.0~3775
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=01d93568e892e196fbc28454f9f9e3553cbfcd7e;p=thirdparty%2Fgcc.git

pack.m4 (pack_'rtype_code`): Use count_0 for counting true values in a logical array.

2009-08-30  Thomas Koenig  <tkoenig@gcc.gnu.org>

	* m4/pack.m4 (pack_'rtype_code`): Use count_0 for counting true
	values in a logical array.  Mark bounds checking tests as
	unlikely.
	* intrinsics/pack_generic.c (pack_internal): Likewise.
	* runtime/bounds.c (count_0):  Fix off-by-one error in detecting
	empty arrays.
	* generated/pack_c4.c: Regenerated.
	* generated/pack_c8.c: Regenerated.
	* generated/pack_c10.c: Regenerated.
	* generated/pack_c16.c: Regenerated.
	* generated/pack_i1.c: Regenerated.
	* generated/pack_i16.c: Regenerated.
	* generated/pack_i2.c: Regenerated.
	* generated/pack_i4.c: Regenerated.
	* generated/pack_i8.c: Regenerated.
	* generated/pack_r4.c: Regenerated.
	* generated/pack_r8.c: Regenerated.
	* generated/pack_r10.c: Regenerated.
	* generated/pack_r16.c: Regenerated.

From-SVN: r151225
---

diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog
index 8ef88c0d0255..f1ac312833b6 100644
--- a/libgfortran/ChangeLog
+++ b/libgfortran/ChangeLog
@@ -1,3 +1,25 @@
+2009-08-30  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+	* m4/pack.m4 (pack_'rtype_code`): Use count_0 for counting true
+	values in a logical array.  Mark bounds checking tests as
+	unlikely.
+	* intrinsics/pack_generic.c (pack_internal): Likewise.
+	* runtime/bounds.c (count_0):  Fix off-by-one error in detecting
+	empty arrays.
+	* generated/pack_c4.c: Regenerated.
+	* generated/pack_c8.c: Regenerated.
+	* generated/pack_c10.c: Regenerated.
+	* generated/pack_c16.c: Regenerated.
+	* generated/pack_i1.c: Regenerated.
+	* generated/pack_i16.c: Regenerated.
+	* generated/pack_i2.c: Regenerated.
+	* generated/pack_i4.c: Regenerated.
+	* generated/pack_i8.c: Regenerated.
+	* generated/pack_r4.c: Regenerated.
+	* generated/pack_r8.c: Regenerated.
+	* generated/pack_r10.c: Regenerated.
+	* generated/pack_r16.c: Regenerated.
+
 2009-08-25  Thomas Koenig  <tkoenig@gcc.gnu.org>
 
 	PR libfortran/34670
diff --git a/libgfortran/generated/pack_c10.c b/libgfortran/generated/pack_c10.c
index 008fb5c92368..cc66c538e1c0 100644
--- a/libgfortran/generated/pack_c10.c
+++ b/libgfortran/generated/pack_c10.c
@@ -138,7 +138,7 @@ pack_c10 (gfc_array_c10 *ret, const gfc_array_c10 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_c10 (gfc_array_c10 *ret, const gfc_array_c10 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_c16.c b/libgfortran/generated/pack_c16.c
index e7d039f5becf..9397262dd06d 100644
--- a/libgfortran/generated/pack_c16.c
+++ b/libgfortran/generated/pack_c16.c
@@ -138,7 +138,7 @@ pack_c16 (gfc_array_c16 *ret, const gfc_array_c16 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_c16 (gfc_array_c16 *ret, const gfc_array_c16 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_c4.c b/libgfortran/generated/pack_c4.c
index fe1f68d42253..093bdcc9a1d0 100644
--- a/libgfortran/generated/pack_c4.c
+++ b/libgfortran/generated/pack_c4.c
@@ -138,7 +138,7 @@ pack_c4 (gfc_array_c4 *ret, const gfc_array_c4 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_c4 (gfc_array_c4 *ret, const gfc_array_c4 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_c8.c b/libgfortran/generated/pack_c8.c
index f5a27eca6f51..7971e2ba1358 100644
--- a/libgfortran/generated/pack_c8.c
+++ b/libgfortran/generated/pack_c8.c
@@ -138,7 +138,7 @@ pack_c8 (gfc_array_c8 *ret, const gfc_array_c8 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_c8 (gfc_array_c8 *ret, const gfc_array_c8 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_i1.c b/libgfortran/generated/pack_i1.c
index edc895082a46..3e4647dbd559 100644
--- a/libgfortran/generated/pack_i1.c
+++ b/libgfortran/generated/pack_i1.c
@@ -138,7 +138,7 @@ pack_i1 (gfc_array_i1 *ret, const gfc_array_i1 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_i1 (gfc_array_i1 *ret, const gfc_array_i1 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_i16.c b/libgfortran/generated/pack_i16.c
index 8f38a2747ec0..99d3491c38f2 100644
--- a/libgfortran/generated/pack_i16.c
+++ b/libgfortran/generated/pack_i16.c
@@ -138,7 +138,7 @@ pack_i16 (gfc_array_i16 *ret, const gfc_array_i16 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_i16 (gfc_array_i16 *ret, const gfc_array_i16 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_i2.c b/libgfortran/generated/pack_i2.c
index 149e9f6f67d5..e796d169f76f 100644
--- a/libgfortran/generated/pack_i2.c
+++ b/libgfortran/generated/pack_i2.c
@@ -138,7 +138,7 @@ pack_i2 (gfc_array_i2 *ret, const gfc_array_i2 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_i2 (gfc_array_i2 *ret, const gfc_array_i2 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_i4.c b/libgfortran/generated/pack_i4.c
index dad10d62d46a..91ce99fe4fd4 100644
--- a/libgfortran/generated/pack_i4.c
+++ b/libgfortran/generated/pack_i4.c
@@ -138,7 +138,7 @@ pack_i4 (gfc_array_i4 *ret, const gfc_array_i4 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_i4 (gfc_array_i4 *ret, const gfc_array_i4 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_i8.c b/libgfortran/generated/pack_i8.c
index 0a23aa5b72fb..e49d8c29e95b 100644
--- a/libgfortran/generated/pack_i8.c
+++ b/libgfortran/generated/pack_i8.c
@@ -138,7 +138,7 @@ pack_i8 (gfc_array_i8 *ret, const gfc_array_i8 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_i8 (gfc_array_i8 *ret, const gfc_array_i8 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_r10.c b/libgfortran/generated/pack_r10.c
index e4bbe6fabcf3..f70c932640e1 100644
--- a/libgfortran/generated/pack_r10.c
+++ b/libgfortran/generated/pack_r10.c
@@ -138,7 +138,7 @@ pack_r10 (gfc_array_r10 *ret, const gfc_array_r10 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_r10 (gfc_array_r10 *ret, const gfc_array_r10 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_r16.c b/libgfortran/generated/pack_r16.c
index 7dff30b70068..ff2ad6e7eed6 100644
--- a/libgfortran/generated/pack_r16.c
+++ b/libgfortran/generated/pack_r16.c
@@ -138,7 +138,7 @@ pack_r16 (gfc_array_r16 *ret, const gfc_array_r16 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_r16 (gfc_array_r16 *ret, const gfc_array_r16 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_r4.c b/libgfortran/generated/pack_r4.c
index 51d46a272182..0c08b8c8c94d 100644
--- a/libgfortran/generated/pack_r4.c
+++ b/libgfortran/generated/pack_r4.c
@@ -138,7 +138,7 @@ pack_r4 (gfc_array_r4 *ret, const gfc_array_r4 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_r4 (gfc_array_r4 *ret, const gfc_array_r4 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/generated/pack_r8.c b/libgfortran/generated/pack_r8.c
index 582c2b9aeb16..2b307e29a2b7 100644
--- a/libgfortran/generated/pack_r8.c
+++ b/libgfortran/generated/pack_r8.c
@@ -138,7 +138,7 @@ pack_r8 (gfc_array_r8 *ret, const gfc_array_r8 *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -155,62 +155,10 @@ pack_r8 (gfc_array_r8 *ret, const gfc_array_r8 *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/intrinsics/pack_generic.c b/libgfortran/intrinsics/pack_generic.c
index b611d7771019..eb52f069d407 100644
--- a/libgfortran/intrinsics/pack_generic.c
+++ b/libgfortran/intrinsics/pack_generic.c
@@ -132,7 +132,7 @@ pack_internal (gfc_array_char *ret, const gfc_array_char *array,
   if (mstride[0] == 0)
     mstride[0] = mask_kind;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -147,58 +147,7 @@ pack_internal (gfc_array_char *ret, const gfc_array_char *array,
 	{
 	  /* We have to count the true elements in MASK.  */
 
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
+	  total = count_0 (mask);
 	}
 
       if (ret->data == NULL)
diff --git a/libgfortran/m4/pack.m4 b/libgfortran/m4/pack.m4
index 910ffdcaac17..c5fd2fd817d0 100644
--- a/libgfortran/m4/pack.m4
+++ b/libgfortran/m4/pack.m4
@@ -139,7 +139,7 @@ pack_'rtype_code` ('rtype` *ret, const 'rtype` *array,
   else
     sptr = array->data;
 
-  if (ret->data == NULL || compile_options.bounds_check)
+  if (ret->data == NULL || unlikely (compile_options.bounds_check))
     {
       /* Count the elements, either for allocating memory or
 	 for bounds checking.  */
@@ -156,62 +156,10 @@ pack_'rtype_code` ('rtype` *ret, const 'rtype` *array,
 	    }
 	}
       else
-	{
-	  /* We have to count the true elements in MASK.  */
-
-	  /* TODO: We could speed up pack easily in the case of only
-	     few .TRUE. entries in MASK, by keeping track of where we
-	     would be in the source array during the initial traversal
-	     of MASK, and caching the pointers to those elements. Then,
-	     supposed the number of elements is small enough, we would
-	     only have to traverse the list, and copy those elements
-	     into the result array. In the case of datatypes which fit
-	     in one of the integer types we could also cache the
-	     value instead of a pointer to it.
-	     This approach might be bad from the point of view of
-	     cache behavior in the case where our cache is not big
-	     enough to hold all elements that have to be copied.  */
-
-	  const GFC_LOGICAL_1 *m = mptr;
-
-	  total = 0;
-	  if (zero_sized)
-	    m = NULL;
-
-	  while (m)
-	    {
-	      /* Test this element.  */
-	      if (*m)
-		total++;
-
-	      /* Advance to the next element.  */
-	      m += mstride[0];
-	      count[0]++;
-	      n = 0;
-	      while (count[n] == extent[n])
-		{
-		  /* When we get to the end of a dimension, reset it
-		     and increment the next dimension.  */
-		  count[n] = 0;
-		  /* We could precalculate this product, but this is a
-		     less frequently used path so probably not worth
-		     it.  */
-		  m -= mstride[n] * extent[n];
-		  n++;
-		  if (n >= dim)
-		    {
-		      /* Break out of the loop.  */
-		      m = NULL;
-		      break;
-		    }
-		  else
-		    {
-		      count[n]++;
-		      m += mstride[n];
-		    }
-		}
-	    }
-	}
+        {
+      	  /* We have to count the true elements in MASK.  */
+	  total = count_0 (mask);
+        }
 
       if (ret->data == NULL)
 	{
diff --git a/libgfortran/runtime/bounds.c b/libgfortran/runtime/bounds.c
index 2d2ed76e6b83..35bfa1e2a465 100644
--- a/libgfortran/runtime/bounds.c
+++ b/libgfortran/runtime/bounds.c
@@ -237,7 +237,7 @@ index_type count_0 (const gfc_array_l1 * array)
       extent[n] = GFC_DESCRIPTOR_EXTENT(array,n);
       count[n] = 0;
 
-      if (extent[n] < 0)
+      if (extent[n] <= 0)
 	return 0;
     }