Marc-Andre Lemburg <mal@lemburg.com>:

author Marc-André Lemburg <mal@egenix.com>

Fri, 30 Jun 2000 10:29:57 +0000 (10:29 +0000)

committer Marc-André Lemburg <mal@egenix.com>

Fri, 30 Jun 2000 10:29:57 +0000 (10:29 +0000)
author Marc-André Lemburg <mal@egenix.com>
Fri, 30 Jun 2000 10:29:57 +0000 (10:29 +0000)
committer Marc-André Lemburg <mal@egenix.com>
Fri, 30 Jun 2000 10:29:57 +0000 (10:29 +0000)
diff --git a/Objects/stringobject.c b/Objects/stringobject.c

index 10b43e4d999ff430eb2579f934b83c093eb21df4..5fe5b6517a786aeb91f232fa8a664b6542ab8740 100644 (file)
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -124,8 +124,13 @@ PyObject *
  PyString_FromString(str)
         const char *str;
  {
-       register unsigned int size = strlen(str);
+       register size_t size = strlen(str);
         register PyStringObject *op;
+       if (size > INT_MAX) {
+               PyErr_SetString(PyExc_OverflowError,
+                       "string is too long for a Python string");
+               return NULL;
+       }
  #ifndef DONT_SHARE_SHORT_STRINGS
         if (size == 0 && (op = nullstring) != NULL) {
  #ifdef COUNT_ALLOCS
@@ -237,9 +242,13 @@ static PyObject *
  string_repr(op)
         register PyStringObject *op;
  {
-       /* XXX overflow? */
-       int newsize = 2 + 4 * op->ob_size * sizeof(char);
-       PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
+       size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
+       PyObject *v;
+       if (newsize > INT_MAX) {
+               PyErr_SetString(PyExc_OverflowError,
+                       "string is too large to make repr");
+       }
+       v = PyString_FromStringAndSize((char *)NULL, newsize);
         if (v == NULL) {
                 return NULL;
         }
@@ -2335,36 +2344,52 @@ getnextarg(args, arglen, p_argidx)
  #define F_ZERO (1<<4)
  
  static int
-formatfloat(buf, flags, prec, type, v)
+formatfloat(buf, buflen, flags, prec, type, v)
         char *buf;
+       size_t buflen;
         int flags;
         int prec;
         int type;
         PyObject *v;
  {
+       /* fmt = '%#.' + `prec` + `type`
+          worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
         char fmt[20];
         double x;
         if (!PyArg_Parse(v, "d;float argument required", &x))
                 return -1;
         if (prec < 0)
                 prec = 6;
-       if (prec > 50)
-               prec = 50; /* Arbitrary limitation */
         if (type == 'f' && fabs(x)/1e25 >= 1e25)
                 type = 'g';
         sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
+       /* worst case length calc to ensure no buffer overrun:
+            fmt = %#.<prec>g
+            buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
+               for any double rep.) 
+            len = 1 + prec + 1 + 2 + 5 = 9 + prec
+          If prec=0 the effective precision is 1 (the leading digit is
+          always given), therefore increase by one to 10+prec. */
+       if (buflen <= (size_t)10 + (size_t)prec) {
+               PyErr_SetString(PyExc_OverflowError,
+                       "formatted float is too long (precision too long?)");
+               return -1;
+       }
         sprintf(buf, fmt, x);
         return strlen(buf);
  }
  
  static int
-formatint(buf, flags, prec, type, v)
+formatint(buf, buflen, flags, prec, type, v)
         char *buf;
+       size_t buflen;
         int flags;
         int prec;
         int type;
         PyObject *v;
  {
+       /* fmt = '%#.' + `prec` + 'l' + `type`
+          worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
         char fmt[20];
         long x;
         if (!PyArg_Parse(v, "l;int argument required", &x))
@@ -2372,15 +2397,24 @@ formatint(buf, flags, prec, type, v)
         if (prec < 0)
                 prec = 1;
         sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
+       /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
+          worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
+       if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
+               PyErr_SetString(PyExc_OverflowError,
+                       "formatted integer is too long (precision too long?)");
+               return -1;
+       }
         sprintf(buf, fmt, x);
         return strlen(buf);
  }
  
  static int
-formatchar(buf, v)
+formatchar(buf, buflen, v)
         char *buf;
+       size_t buflen;
         PyObject *v;
  {
+       /* presume that the buffer is at least 2 characters long */
         if (PyString_Check(v)) {
                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
                         return -1;
@@ -2394,7 +2428,15 @@ formatchar(buf, v)
  }
  
  
-/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
+
+   FORMATBUFLEN is the length of the buffer in which the floats, ints, &
+   chars are formatted. XXX This is a magic number. Each formatting
+   routine does bounds checking to ensure no overflow, but a better
+   solution may be to malloc a buffer of appropriate size for each
+   format. For now, the current solution is sufficient.
+*/
+#define FORMATBUFLEN (size_t)120
  
  PyObject *
  PyString_Format(format, args)
@@ -2451,10 +2493,10 @@ PyString_Format(format, args)
                         int fill;
                         PyObject *v = NULL;
                         PyObject *temp = NULL;
-                       char *buf;
+                       char *pbuf;
                         int sign;
                         int len;
-                       char tmpbuf[120]; /* For format{float,int,char}() */
+                       char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
                         char *fmt_start = fmt;
                         
                         fmt++;
@@ -2602,7 +2644,7 @@ PyString_Format(format, args)
                         fill = ' ';
                         switch (c) {
                         case '%':
-                               buf = "%";
+                               pbuf = "%";
                                 len = 1;
                                 break;
                         case 's':
@@ -2622,7 +2664,7 @@ PyString_Format(format, args)
                                           "%s argument has non-string str()");
                                         goto error;
                                 }
-                               buf = PyString_AsString(temp);
+                               pbuf = PyString_AsString(temp);
                                 len = PyString_Size(temp);
                                 if (prec >= 0 && len > prec)
                                         len = prec;
@@ -2635,8 +2677,8 @@ PyString_Format(format, args)
                         case 'X':
                                 if (c == 'i')
                                         c = 'd';
-                               buf = tmpbuf;
-                               len = formatint(buf, flags, prec, c, v);
+                               pbuf = formatbuf;
+                               len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
                                 if (len < 0)
                                         goto error;
                                 sign = (c == 'd');
@@ -2644,9 +2686,9 @@ PyString_Format(format, args)
                                         fill = '0';
                                         if ((flags&F_ALT) &&
                                             (c == 'x' || c == 'X') &&
-                                           buf[0] == '0' && buf[1] == c) {
-                                               *res++ = *buf++;
-                                               *res++ = *buf++;
+                                           pbuf[0] == '0' && pbuf[1] == c) {
+                                               *res++ = *pbuf++;
+                                               *res++ = *pbuf++;
                                                 rescnt -= 2;
                                                 len -= 2;
                                                 width -= 2;
@@ -2660,8 +2702,8 @@ PyString_Format(format, args)
                         case 'f':
                         case 'g':
                         case 'G':
-                               buf = tmpbuf;
-                               len = formatfloat(buf, flags, prec, c, v);
+                               pbuf = formatbuf;
+                               len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
                                 if (len < 0)
                                         goto error;
                                 sign = 1;
@@ -2669,8 +2711,8 @@ PyString_Format(format, args)
                                         fill = '0';
                                 break;
                         case 'c':
-                               buf = tmpbuf;
-                               len = formatchar(buf, v);
+                               pbuf = formatbuf;
+                               len = formatchar(pbuf, sizeof(formatbuf), v);
                                 if (len < 0)
                                         goto error;
                                 break;
@@ -2681,8 +2723,8 @@ PyString_Format(format, args)
                                 goto error;
                         }
                         if (sign) {
-                               if (*buf == '-' || *buf == '+') {
-                                       sign = *buf++;
+                               if (*pbuf == '-' || *pbuf == '+') {
+                                       sign = *pbuf++;
                                         len--;
                                 }
                                 else if (flags & F_SIGN)
@@ -2718,7 +2760,7 @@ PyString_Format(format, args)
                         }
                         if (sign && fill == ' ')
                                 *res++ = sign;
-                       memcpy(res, buf, len);
+                       memcpy(res, pbuf, len);
                         res += len;
                         rescnt -= len;
                         while (--width >= len) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index f907712d833a94c801e4eeb846c01c0538057c0d..dad004a5a1a21981b03bac2e1f4416fe75e8c862 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -66,7 +66,7 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
  
  #include "mymath.h"
  #include "unicodeobject.h"
-#include "ucnhash.h"
+#include <ucnhash.h>
  
  #if defined(HAVE_LIMITS_H)
  #include <limits.h>
@@ -1244,14 +1244,14 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
                  goto onError;
  ucnFallthrough:
              /* fall through on purpose */
-        default:
+               default:
              *p++ = '\\';
              *p++ = (unsigned char)s[-1];
              break;
          }
      }
      if (_PyUnicode_Resize(v, (int)(p - buf)))
-       goto onError;
+               goto onError;
      return (PyObject *)v;
      
   onError:
@@ -4373,11 +4373,14 @@ int usprintf(va_alist) va_dcl
  
  static int
  formatfloat(Py_UNICODE *buf,
+           size_t buflen,
             int flags,
             int prec,
             int type,
             PyObject *v)
  {
+    /* fmt = '%#.' + `prec` + `type`
+       worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
      char fmt[20];
      double x;
      
@@ -4386,21 +4389,34 @@ formatfloat(Py_UNICODE *buf,
         return -1;
      if (prec < 0)
         prec = 6;
-    if (prec > 50)
-       prec = 50; /* Arbitrary limitation */
      if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
         type = 'g';
      sprintf(fmt, "%%%s.%d%c", (flags & F_ALT) ? "#" : "", prec, type);
+    /* worst case length calc to ensure no buffer overrun:
+         fmt = %#.<prec>g
+         buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
+            for any double rep.)
+         len = 1 + prec + 1 + 2 + 5 = 9 + prec
+       If prec=0 the effective precision is 1 (the leading digit is
+       always given), therefore increase by one to 10+prec. */
+    if (buflen <= (size_t)10 + (size_t)prec) {
+       PyErr_SetString(PyExc_OverflowError,
+           "formatted float is too long (precision too long?)");
+       return -1;
+    }
      return usprintf(buf, fmt, x);
  }
  
  static int
  formatint(Py_UNICODE *buf,
+         size_t buflen,
           int flags,
           int prec,
           int type,
           PyObject *v)
  {
+    /* fmt = '%#.' + `prec` + 'l' + `type`
+       worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
      char fmt[20];
      long x;
  
@@ -4409,14 +4425,23 @@ formatint(Py_UNICODE *buf,
         return -1;
      if (prec < 0)
         prec = 1;
+    /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
+       worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
+    if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
+        PyErr_SetString(PyExc_OverflowError,
+            "formatted integer is too long (precision too long?)");
+        return -1;
+    }
      sprintf(fmt, "%%%s.%dl%c", (flags & F_ALT) ? "#" : "", prec, type);
      return usprintf(buf, fmt, x);
  }
  
  static int
  formatchar(Py_UNICODE *buf,
-          PyObject *v)
+           size_t buflen,
+           PyObject *v)
  {
+    /* presume that the buffer is at least 2 characters long */
      if (PyUnicode_Check(v)) {
         if (PyUnicode_GET_SIZE(v) != 1)
             goto onError;
@@ -4446,6 +4471,16 @@ formatchar(Py_UNICODE *buf,
      return -1;
  }
  
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
+
+   FORMATBUFLEN is the length of the buffer in which the floats, ints, &
+   chars are formatted. XXX This is a magic number. Each formatting
+   routine does bounds checking to ensure no overflow, but a better
+   solution may be to malloc a buffer of appropriate size for each
+   format. For now, the current solution is sufficient.
+*/
+#define FORMATBUFLEN (size_t)120
+
  PyObject *PyUnicode_Format(PyObject *format,
                            PyObject *args)
  {
@@ -4505,10 +4540,10 @@ PyObject *PyUnicode_Format(PyObject *format,
             Py_UNICODE fill;
             PyObject *v = NULL;
             PyObject *temp = NULL;
-           Py_UNICODE *buf;
+           Py_UNICODE *pbuf;
             Py_UNICODE sign;
             int len;
-           Py_UNICODE tmpbuf[120]; /* For format{float,int,char}() */
+           Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
  
             fmt++;
             if (*fmt == '(') {
@@ -4658,8 +4693,9 @@ PyObject *PyUnicode_Format(PyObject *format,
             switch (c) {
  
             case '%':
-               buf = tmpbuf;
-               buf[0] = '%';
+               pbuf = formatbuf;
+               /* presume that buffer length is at least 1 */
+               pbuf[0] = '%';
                 len = 1;
                 break;
  
@@ -4695,7 +4731,7 @@ PyObject *PyUnicode_Format(PyObject *format,
                     if (temp == NULL)
                         goto onError;
                 }
-               buf = PyUnicode_AS_UNICODE(temp);
+               pbuf = PyUnicode_AS_UNICODE(temp);
                 len = PyUnicode_GET_SIZE(temp);
                 if (prec >= 0 && len > prec)
                     len = prec;
@@ -4709,8 +4745,9 @@ PyObject *PyUnicode_Format(PyObject *format,
             case 'X':
                 if (c == 'i')
                     c = 'd';
-               buf = tmpbuf;
-               len = formatint(buf, flags, prec, c, v);
+               pbuf = formatbuf;
+               len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
+                       flags, prec, c, v);
                 if (len < 0)
                     goto onError;
                 sign = (c == 'd');
@@ -4718,9 +4755,9 @@ PyObject *PyUnicode_Format(PyObject *format,
                     fill = '0';
                     if ((flags&F_ALT) &&
                         (c == 'x' || c == 'X') &&
-                       buf[0] == '0' && buf[1] == c) {
-                       *res++ = *buf++;
-                       *res++ = *buf++;
+                       pbuf[0] == '0' && pbuf[1] == c) {
+                       *res++ = *pbuf++;
+                       *res++ = *pbuf++;
                         rescnt -= 2;
                         len -= 2;
                         width -= 2;
@@ -4735,8 +4772,9 @@ PyObject *PyUnicode_Format(PyObject *format,
             case 'f':
             case 'g':
             case 'G':
-               buf = tmpbuf;
-               len = formatfloat(buf, flags, prec, c, v);
+               pbuf = formatbuf;
+               len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
+                       flags, prec, c, v);
                 if (len < 0)
                     goto onError;
                 sign = 1;
@@ -4745,8 +4783,8 @@ PyObject *PyUnicode_Format(PyObject *format,
                 break;
  
             case 'c':
-               buf = tmpbuf;
-               len = formatchar(buf, v);
+               pbuf = formatbuf;
+               len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v);
                 if (len < 0)
                     goto onError;
                 break;
@@ -4758,8 +4796,8 @@ PyObject *PyUnicode_Format(PyObject *format,
                 goto onError;
             }
             if (sign) {
-               if (*buf == '-' || *buf == '+') {
-                   sign = *buf++;
+               if (*pbuf == '-' || *pbuf == '+') {
+                   sign = *pbuf++;
                     len--;
                 }
                 else if (flags & F_SIGN)
@@ -4795,7 +4833,7 @@ PyObject *PyUnicode_Format(PyObject *format,
             }
             if (sign && fill == ' ')
                 *res++ = sign;
-           memcpy(res, buf, len * sizeof(Py_UNICODE));
+           memcpy(res, pbuf, len * sizeof(Py_UNICODE));
             res += len;
             rescnt -= len;
             while (--width >= len) {
author	Marc-André Lemburg <mal@egenix.com>
	Fri, 30 Jun 2000 10:29:57 +0000 (10:29 +0000)
committer	Marc-André Lemburg <mal@egenix.com>
	Fri, 30 Jun 2000 10:29:57 +0000 (10:29 +0000)
Objects/stringobject.c		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history