encodings.pl: Added `ASCII' alias.

author Tom Tromey <tromey@cygnus.com>

Wed, 1 Nov 2000 17:00:02 +0000 (17:00 +0000)

committer Tom Tromey <tromey@gcc.gnu.org>

Wed, 1 Nov 2000 17:00:02 +0000 (17:00 +0000)
author Tom Tromey <tromey@cygnus.com>
Wed, 1 Nov 2000 17:00:02 +0000 (17:00 +0000)
committer Tom Tromey <tromey@gcc.gnu.org>
Wed, 1 Nov 2000 17:00:02 +0000 (17:00 +0000)
diff --git a/libjava/ChangeLog b/libjava/ChangeLog

index 0fe0ed3065f86ab81539208b56bb2bcd535e500f..1901cfce861d67110e19be7df0845949f6de9878 100644 (file)
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
@@ -1,3 +1,22 @@
+2000-11-01  Tom Tromey  <tromey@cygnus.com>
+
+       * scripts/encodings.pl: Added `ASCII' alias.
+       * Makefile.in: Rebuilt.
+       * Makefile.am (convert_source_files): Added new files.
+       * gnu/gcj/convert/Input_ASCII.java: New file.
+       * gnu/gcj/convert/Output_ASCII.java: New file.
+       * gnu/gcj/convert/Output_8859_1.java (write): Use `?' to represent
+       out-of-range characters.
+       * gnu/gcj/convert/natIconv.cc (iconv_init): New method.
+       (read): Swap bytes if required.  Treat `count' as character count,
+       not byte count.
+       (write): Likewise.  Also, handle case where iconv fails on a given
+       character.
+       (init): Put encoding into exception.
+       * gnu/gcj/convert/IOConverter.java (iconv_byte_swap): New global.
+       (static): Call iconv_init.  Rebuilt alias list.
+       (iconv_init): New private method.
+
  2000-11-01  Tom Tromey  <tromey@cygnus.com>
  
         * Makefile.in: Rebuilt.
diff --git a/libjava/Makefile.am b/libjava/Makefile.am

index 8a5372d26c29d502b2b2abec7108ef5b5308eb3d..d3fcd9b014ccb1e759d5a1679685a3d55752ac55 100644 (file)
--- a/libjava/Makefile.am
+++ b/libjava/Makefile.am
@@ -506,6 +506,7 @@ convert_source_files = \
  gnu/gcj/convert/BytesToUnicode.java \
  gnu/gcj/convert/Convert.java \
  gnu/gcj/convert/Input_8859_1.java \
+gnu/gcj/convert/Input_ASCII.java \
  gnu/gcj/convert/Input_EUCJIS.java \
  gnu/gcj/convert/Input_JavaSrc.java \
  gnu/gcj/convert/Input_SJIS.java \
@@ -513,6 +514,7 @@ gnu/gcj/convert/Input_UTF8.java     \
  gnu/gcj/convert/Input_iconv.java \
  gnu/gcj/convert/IOConverter.java \
  gnu/gcj/convert/Output_8859_1.java \
+gnu/gcj/convert/Output_ASCII.java \
  gnu/gcj/convert/Output_EUCJIS.java \
  gnu/gcj/convert/Output_JavaSrc.java \
  gnu/gcj/convert/Output_SJIS.java \
diff --git a/libjava/Makefile.in b/libjava/Makefile.in

index f1d15018d29e6a09e3215482ed56e70c9d4b6731..7ebd6cebe939ff22f68d65e60502ddc16fe266a1 100644 (file)
--- a/libjava/Makefile.in
+++ b/libjava/Makefile.in
@@ -280,6 +280,7 @@ convert_source_files = \
  gnu/gcj/convert/BytesToUnicode.java \
  gnu/gcj/convert/Convert.java \
  gnu/gcj/convert/Input_8859_1.java \
+gnu/gcj/convert/Input_ASCII.java \
  gnu/gcj/convert/Input_EUCJIS.java \
  gnu/gcj/convert/Input_JavaSrc.java \
  gnu/gcj/convert/Input_SJIS.java \
@@ -287,6 +288,7 @@ gnu/gcj/convert/Input_UTF8.java     \
  gnu/gcj/convert/Input_iconv.java \
  gnu/gcj/convert/IOConverter.java \
  gnu/gcj/convert/Output_8859_1.java \
+gnu/gcj/convert/Output_ASCII.java \
  gnu/gcj/convert/Output_EUCJIS.java \
  gnu/gcj/convert/Output_JavaSrc.java \
  gnu/gcj/convert/Output_SJIS.java \
@@ -1197,6 +1199,7 @@ DEP_FILES =  .deps/$(srcdir)/$(CONVERT_DIR)/gen-from-JIS.P \
  .deps/gnu/gcj/convert/BytesToUnicode.P .deps/gnu/gcj/convert/Convert.P \
  .deps/gnu/gcj/convert/IOConverter.P \
  .deps/gnu/gcj/convert/Input_8859_1.P \
+.deps/gnu/gcj/convert/Input_ASCII.P \
  .deps/gnu/gcj/convert/Input_EUCJIS.P \
  .deps/gnu/gcj/convert/Input_JavaSrc.P \
  .deps/gnu/gcj/convert/Input_SJIS.P .deps/gnu/gcj/convert/Input_UTF8.P \
@@ -1204,6 +1207,7 @@ DEP_FILES =  .deps/$(srcdir)/$(CONVERT_DIR)/gen-from-JIS.P \
  .deps/gnu/gcj/convert/JIS0208_to_Unicode.P \
  .deps/gnu/gcj/convert/JIS0212_to_Unicode.P \
  .deps/gnu/gcj/convert/Output_8859_1.P \
+.deps/gnu/gcj/convert/Output_ASCII.P \
  .deps/gnu/gcj/convert/Output_EUCJIS.P \
  .deps/gnu/gcj/convert/Output_JavaSrc.P \
  .deps/gnu/gcj/convert/Output_SJIS.P .deps/gnu/gcj/convert/Output_UTF8.P \
diff --git a/libjava/gnu/gcj/convert/IOConverter.java b/libjava/gnu/gcj/convert/IOConverter.java

index c98662485b67a39576d5aaf7781ec0191520fefd..9b5fbad00f46ce823515f0124f25cf54a52d78cf 100644 (file)
--- a/libjava/gnu/gcj/convert/IOConverter.java
+++ b/libjava/gnu/gcj/convert/IOConverter.java
@@ -18,6 +18,10 @@ public abstract class IOConverter
    // Map encoding aliases to our canonical form.
    static private Hashtable hash = new Hashtable ();
  
+  // True if we have to do byte-order conversions on iconv()
+  // arguments.
+  static protected boolean iconv_byte_swap;
+
    static
    {
      // Manually maintained aliases.  Note that the value must be our
@@ -25,6 +29,17 @@ public abstract class IOConverter
      hash.put ("ISO-Latin-1", "8859_1");
      // All aliases after this point are automatically generated by the
      // `encodings.pl' script.  Run it to make any corrections.
+    hash.put ("ANSI_X3.4-1968", "ASCII");
+    hash.put ("iso-ir-6", "ASCII");
+    hash.put ("ANSI_X3.4-1986", "ASCII");
+    hash.put ("ISO_646.irv:1991", "ASCII");
+    hash.put ("ASCII", "ASCII");
+    hash.put ("ISO646-US", "ASCII");
+    hash.put ("US-ASCII", "ASCII");
+    hash.put ("us", "ASCII");
+    hash.put ("IBM367", "ASCII");
+    hash.put ("cp367", "ASCII");
+    hash.put ("csASCII", "ASCII");
      hash.put ("ISO_8859-1:1987", "8859_1");
      hash.put ("iso-ir-100", "8859_1");
      hash.put ("ISO_8859-1", "8859_1");
@@ -41,8 +56,12 @@ public abstract class IOConverter
      hash.put ("Extended_UNIX_Code_Packed_Format_for_Japanese", "EUCJIS");
      hash.put ("csEUCPkdFmtJapanese", "EUCJIS");
      hash.put ("EUC-JP", "EUCJIS");
+
+    iconv_byte_swap = iconv_init ();
    }
  
+  private static native boolean iconv_init ();
+
    // Turn an alias into the canonical form.
    protected static final String canonicalize (String name)
    {
diff --git a/libjava/gnu/gcj/convert/Input_8859_1.java b/libjava/gnu/gcj/convert/Input_8859_1.java

index 6c70034f3d9b7bc7573c714c211d330eb2f2f0e6..bd5f7798086a41f52d5030bbc7b153fd53060232 100644 (file)
--- a/libjava/gnu/gcj/convert/Input_8859_1.java
+++ b/libjava/gnu/gcj/convert/Input_8859_1.java
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999  Free Software Foundation
+/* Copyright (C) 1999, 2000  Free Software Foundation
  
     This file is part of libgcj.
  
@@ -9,7 +9,7 @@ details.  */
  package gnu.gcj.convert;
  
  /**
- * Convert ISO-Latin-1 (8851-1) text to Unicode.
+ * Convert ISO-Latin-1 (8859-1) text to Unicode.
   * @author Per Bothner <bothner@cygnus.com>
   * @date March 1999.
   */
diff --git a/libjava/gnu/gcj/convert/Input_ASCII.java b/libjava/gnu/gcj/convert/Input_ASCII.java

new file mode 100644 (file)

index 0000000..cb531e9
--- /dev/null
+++ b/libjava/gnu/gcj/convert/Input_ASCII.java
@@ -0,0 +1,37 @@
+/* Copyright (C) 2000  Free Software Foundation
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert;
+
+/**
+ * Convert ASCII text to Unicode.
+ * @date October 2000
+ */
+
+public class Input_ASCII extends BytesToUnicode
+{
+  public String getName() { return "ASCII"; }
+
+  public int read (char[] outbuffer, int outpos, int count)
+  {
+    int origpos = outpos;
+    // Make sure fields of this are in registers.
+    int inpos = this.inpos;
+    byte[] inbuffer = this.inbuffer;
+    int inavail = this.inlength - inpos;
+    int outavail = count;
+    if (outavail > inavail)
+      outavail = inavail;
+    while (--outavail >= 0)
+      {
+       outbuffer[outpos++] = (char) (inbuffer[inpos++] & 0x7f);
+      }
+    this.inpos = inpos;
+    return outpos - origpos;
+  }
+}
diff --git a/libjava/gnu/gcj/convert/Output_8859_1.java b/libjava/gnu/gcj/convert/Output_8859_1.java

index ac04ad67ac1ac79dc0974d78b04c95297d34eff9..7ae6a615f2306306dda2d0ad7d3e5ac7a9cf2739 100644 (file)
--- a/libjava/gnu/gcj/convert/Output_8859_1.java
+++ b/libjava/gnu/gcj/convert/Output_8859_1.java
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999  Free Software Foundation
+/* Copyright (C) 1999, 2000  Free Software Foundation
  
     This file is part of libgcj.
  
@@ -10,9 +10,9 @@ package gnu.gcj.convert;
   
  /**
   * Convert Unicode ISO-Latin-1 (8851-1) text.
- * The high-order byte of each character is truncated.
+ * Unrecognized characters are printed as `?'.
   * @author Per Bothner <bothner@cygnus.com>
- * @date Match 1999.
+ * @date March 1999.
   */
  
  public class Output_8859_1 extends UnicodeToBytes
@@ -30,7 +30,8 @@ public class Output_8859_1 extends UnicodeToBytes
        inlength = avail;
      for (int i = inlength;  --i >= 0;  )
        {
-       buf[count++] = (byte) inbuffer[inpos++];
+       char c = inbuffer[inpos++];
+       buf[count++] = (byte) ((c > 0xff) ? '?' : c);
        }
      this.count = count;
      return inlength;
@@ -45,7 +46,8 @@ public class Output_8859_1 extends UnicodeToBytes
        inlength = avail;
      for (int i = inlength;  --i >= 0;  )
        {
-       buf[count++] = (byte) str.charAt(inpos++);
+       char c = str.charAt(inpos++);
+       buf[count++] = (byte) ((c > 0xff) ? '?' : c);
        }
      this.count = count;
      return inlength;
diff --git a/libjava/gnu/gcj/convert/Output_ASCII.java b/libjava/gnu/gcj/convert/Output_ASCII.java

new file mode 100644 (file)

index 0000000..9f33645
--- /dev/null
+++ b/libjava/gnu/gcj/convert/Output_ASCII.java
@@ -0,0 +1,54 @@
+/* Copyright (C) 2000  Free Software Foundation
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert; 
+ 
+/**
+ * Convert Unicode ASCII
+ * Unrecognized characters are printed as `?'.
+ * @date October 2000
+ */
+
+public class Output_ASCII extends UnicodeToBytes
+{
+  public String getName() { return "ASCII"; }
+
+  /**
+   * @return number of chars converted. */
+  public int write (char[] inbuffer, int inpos, int inlength)
+  {
+    int count = this.count;
+    byte[] buf = this.buf;
+    int avail = buf.length - count;
+    if (inlength > avail)
+      inlength = avail;
+    for (int i = inlength;  --i >= 0;  )
+      {
+       char c = inbuffer[inpos++];
+       buf[count++] = (byte) ((c > 0x7f) ? '?' : c);
+      }
+    this.count = count;
+    return inlength;
+  }
+
+  public int write (String str, int inpos, int inlength, char[] work)
+  {
+    int count = this.count;
+    byte[] buf = this.buf;
+    int avail = buf.length - count;
+    if (inlength > avail)
+      inlength = avail;
+    for (int i = inlength;  --i >= 0;  )
+      {
+       char c = str.charAt(inpos++);
+       buf[count++] = (byte) ((c > 0x7f) ? '?' : c);
+      }
+    this.count = count;
+    return inlength;
+  }
+}
diff --git a/libjava/gnu/gcj/convert/natIconv.cc b/libjava/gnu/gcj/convert/natIconv.cc

index 061779c02b8b7010d7a4233d60da353cd11657a3..d346b1488f25f036b6c69bcfd48d1276f08ea553 100644 (file)
--- a/libjava/gnu/gcj/convert/natIconv.cc
+++ b/libjava/gnu/gcj/convert/natIconv.cc
@@ -44,13 +44,13 @@ gnu::gcj::convert::Input_iconv::init (jstring encoding)
  
    iconv_t h = iconv_open ("UCS-2", buffer);
    if (h == (iconv_t) -1)
-    JvThrow (new java::io::UnsupportedEncodingException);
+    throw new java::io::UnsupportedEncodingException (encoding);
  
    JvAssert (h != NULL);
    handle = reinterpret_cast<gnu::gcj::RawData *> (h);
  #else /* HAVE_ICONV */
    // If no iconv, just throw an exception.
-  JvThrow (new java::io::UnsupportedEncodingException);
+  throw new java::io::UnsupportedEncodingException (encoding);
  #endif /* HAVE_ICONV */
  }
  
@@ -75,7 +75,7 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer,
    jchar *out = elements (outbuffer);
    size_t inavail = inlength - inpos;
    size_t old_in = inavail;
-  size_t outavail = count;
+  size_t outavail = count * sizeof (jchar);
    size_t old_out = outavail;
  
    char *inbuf = (char *) &bytes[inpos];
@@ -86,8 +86,20 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer,
                             &outbuf, &outavail);
    // FIXME: what if R==-1?
  
+  if (iconv_byte_swap)
+    {
+      size_t max = (old_out - outavail) / sizeof (jchar);
+      for (size_t i = 0; i < max; ++i)
+       {
+         // Byte swap.
+         jchar c = (((out[outpos + i] & 0xff) << 8)
+                    | ((out[outpos + i] >> 8) & 0xff));
+         outbuf[i] = c;
+       }
+    }
+
    inpos += old_in - inavail;
-  return old_out - outavail;
+  return (old_out - outavail) / sizeof (jchar);
  #else /* HAVE_ICONV */
    return -1;
  #endif /* HAVE_ICONV */
@@ -104,13 +116,13 @@ gnu::gcj::convert::Output_iconv::init (jstring encoding)
  
    iconv_t h = iconv_open (buffer, "UCS-2");
    if (h == (iconv_t) -1)
-    JvThrow (new java::io::UnsupportedEncodingException);
+    throw new java::io::UnsupportedEncodingException (encoding);
  
    JvAssert (h != NULL);
    handle = reinterpret_cast<gnu::gcj::RawData *> (h);
  #else /* HAVE_ICONV */
    // If no iconv, just throw an exception.
-  JvThrow (new java::io::UnsupportedEncodingException);
+  throw new java::io::UnsupportedEncodingException (encoding);
  #endif /* HAVE_ICONV */
  }
  
@@ -128,14 +140,15 @@ gnu::gcj::convert::Output_iconv::finalize (void)
  
  jint
  gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer,
-                                       jint inpos, jint count)
+                                       jint inpos, jint inlength)
  {
  #ifdef HAVE_ICONV
    jchar *chars = elements (inbuffer);
    jbyte *out = elements (buf);
+  jchar *temp_buffer = NULL;
  
-  size_t inavail = count;
-  size_t old_in = count;
+  size_t inavail = inlength * sizeof (jchar);
+  size_t old_in = inavail;
  
    size_t outavail = buf->length - count;
    size_t old_out = outavail;
@@ -143,14 +156,88 @@ gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer,
    char *inbuf = (char *) &chars[inpos];
    char *outbuf = (char *) &out[count];
  
-  size_t r = iconv_adapter (iconv, (iconv_t) handle,
-                           &inbuf, &inavail,
-                           &outbuf, &outavail);
-  // FIXME: what if R==-1?
+  if (iconv_byte_swap)
+    {
+      // Ugly performance penalty -- don't use losing systems!
+      temp_buffer = (jchar *) _Jv_Malloc (inlength * sizeof (jchar));
+      for (int i = 0; i < inlength; ++i)
+       {
+         // Byte swap.
+         jchar c = (((chars[inpos + i] & 0xff) << 8)
+                    | ((chars[inpos + i] >> 8) & 0xff));
+         temp_buffer[i] = c;
+       }
+      inbuf = (char *) temp_buffer;
+    }
+
+  // If the conversion fails on the very first character, then we
+  // assume that the character can't be represented in the output
+  // encoding.  There's nothing useful we can do here, so we simply
+  // omit that character.  Note that we can't check `errno' because
+  // glibc 2.1.3 doesn't set it correctly.  We could check it if we
+  // really needed to, but we'd have to disable support for 2.1.3.
+  size_t loop_old_in = old_in;
+  while (1)
+    {
+      size_t r = iconv_adapter (iconv, (iconv_t) handle,
+                               &inbuf, &inavail,
+                               &outbuf, &outavail);
+      if (r == -1 && inavail == loop_old_in)
+       {
+         inavail -= 2;
+         if (inavail == 0)
+           break;
+         loop_old_in -= 2;
+         inbuf += 2;
+       }
+      else
+       break;
+    }
+
+  if (temp_buffer != NULL)
+    _Jv_Free (temp_buffer);
  
    count += old_out - outavail;
-  return old_in - inavail;
+  return (old_in - inavail) / sizeof (jchar);
  #else /* HAVE_ICONV */
    return -1;
  #endif /* HAVE_ICONV */
  }
+
+jboolean
+gnu::gcj::convert::IOConverter::iconv_init (void)
+{
+  // Some versions of iconv() always return their UCS-2 results in
+  // big-endian order, and they also require UCS-2 inputs to be in
+  // big-endian order.  For instance, glibc 2.1.3 does this.  If the
+  // UTF-8=>UCS-2 iconv converter has this feature, then we assume
+  // that all UCS-2 converters do.  (This might not be the best
+  // heuristic, but is is all we've got.)
+  jboolean result = false;
+#ifdef HAVE_ICONV
+  iconv_t handle = iconv_open ("UCS-2", "UTF-8");
+  if (handle != (iconv_t) -1)
+    {
+      jchar c;
+      unsigned char in[3];
+      char *inp, *outp;
+      size_t inc, outc, r;
+
+      // This is the UTF-8 encoding of \ufeff.
+      in[0] = 0xef;
+      in[1] = 0xbb;
+      in[2] = 0xbf;
+
+      inp = (char *) in;
+      inc = 3;
+      outp = (char *) &c;
+      outc = 2;
+
+      r = iconv_adapter (iconv, handle, &inp, &inc, &outp, &outc);
+      // Conversion must be complete for us to use the result.
+      if (r != (size_t) -1 && inc == 0 && outc == 0)
+       result = (c != 0xfeff);
+    }
+#endif /* HAVE_ICONV */
+  return result;
+}
diff --git a/libjava/scripts/encodings.pl b/libjava/scripts/encodings.pl

index f2f649959ce72ce7d1773836707c567011cfe905..4c7f0579534e750ff63ccd8c5125bb3e294908a1 100644 (file)
--- a/libjava/scripts/encodings.pl
+++ b/libjava/scripts/encodings.pl
@@ -4,6 +4,7 @@
  
  # Map IANA canonical names onto our canonical names.
  %map = (
+       'ANSI_X3.4-1968' => 'ASCII',
         'ISO_8859-1:1987' => '8859_1',
         'UTF-8' => 'UTF8',
         'Shift_JIS' => 'SJIS',
author	Tom Tromey <tromey@cygnus.com>
	Wed, 1 Nov 2000 17:00:02 +0000 (17:00 +0000)
committer	Tom Tromey <tromey@gcc.gnu.org>
	Wed, 1 Nov 2000 17:00:02 +0000 (17:00 +0000)
libjava/ChangeLog		patch \| blob \| blame \| history
libjava/Makefile.am		patch \| blob \| blame \| history
libjava/Makefile.in		patch \| blob \| blame \| history
libjava/gnu/gcj/convert/IOConverter.java		patch \| blob \| blame \| history
libjava/gnu/gcj/convert/Input_8859_1.java		patch \| blob \| blame \| history
libjava/gnu/gcj/convert/Input_ASCII.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/gcj/convert/Output_8859_1.java		patch \| blob \| blame \| history
libjava/gnu/gcj/convert/Output_ASCII.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/gcj/convert/natIconv.cc		patch \| blob \| blame \| history
libjava/scripts/encodings.pl		patch \| blob \| blame \| history