pdftops/GfxFont.cxx

   1 //========================================================================
   2 //
   3 // GfxFont.cc
   4 //
   5 // Copyright 1996-2003 Glyph & Cog, LLC
   6 //
   7 //========================================================================
   8
   9 #include <config.h>
  10
  11 #ifdef USE_GCC_PRAGMAS
  12 #pragma implementation
  13 #endif
  14
  15 #include <stdio.h>
  16 #include <stdlib.h>
  17 #include <string.h>
  18 #include <ctype.h>
  19 #include "gmem.h"
  20 #include "Error.h"
  21 #include "Object.h"
  22 #include "Dict.h"
  23 #include "GlobalParams.h"
  24 #include "CMap.h"
  25 #include "CharCodeToUnicode.h"
  26 #include "FontEncodingTables.h"
  27 #include "BuiltinFontTables.h"
  28 #include "FoFiType1.h"
  29 #include "FoFiType1C.h"
  30 #include "FoFiTrueType.h"
  31 #include "GfxFont.h"
  32
  33 //------------------------------------------------------------------------
  34
  35 struct StdFontMapEntry {
  36   char *altName;
  37   char *properName;
  38 };
  39
  40 // Acrobat 4.0 and earlier substituted Base14-compatible fonts without
  41 // providing Widths and a FontDescriptor, so we munge the names into
  42 // the proper Base14 names.  This table is from implementation note 44
  43 // in the PDF 1.4 spec, with some additions based on empirical
  44 // evidence.
  45 static StdFontMapEntry stdFontMap[] = {
  46   { "Arial",                        "Helvetica" },
  47   { "Arial,Bold",                   "Helvetica-Bold" },
  48   { "Arial,BoldItalic",             "Helvetica-BoldOblique" },
  49   { "Arial,Italic",                 "Helvetica-Oblique" },
  50   { "Arial-Bold",                   "Helvetica-Bold" },
  51   { "Arial-BoldItalic",             "Helvetica-BoldOblique" },
  52   { "Arial-BoldItalicMT",           "Helvetica-BoldOblique" },
  53   { "Arial-BoldMT",                 "Helvetica-Bold" },
  54   { "Arial-Italic",                 "Helvetica-Oblique" },
  55   { "Arial-ItalicMT",               "Helvetica-Oblique" },
  56   { "ArialMT",                      "Helvetica" },
  57   { "Courier,Bold",                 "Courier-Bold" },
  58   { "Courier,BoldItalic",           "Courier-BoldOblique" },
  59   { "Courier,Italic",               "Courier-Oblique" },
  60   { "CourierNew",                   "Courier" },
  61   { "CourierNew,Bold",              "Courier-Bold" },
  62   { "CourierNew,BoldItalic",        "Courier-BoldOblique" },
  63   { "CourierNew,Italic",            "Courier-Oblique" },
  64   { "CourierNew-Bold",              "Courier-Bold" },
  65   { "CourierNew-BoldItalic",        "Courier-BoldOblique" },
  66   { "CourierNew-Italic",            "Courier-Oblique" },
  67   { "CourierNewPS-BoldItalicMT",    "Courier-BoldOblique" },
  68   { "CourierNewPS-BoldMT",          "Courier-Bold" },
  69   { "CourierNewPS-ItalicMT",        "Courier-Oblique" },
  70   { "CourierNewPSMT",               "Courier" },
  71   { "Helvetica,Bold",               "Helvetica-Bold" },
  72   { "Helvetica,BoldItalic",         "Helvetica-BoldOblique" },
  73   { "Helvetica,Italic",             "Helvetica-Oblique" },
  74   { "Helvetica-BoldItalic",         "Helvetica-BoldOblique" },
  75   { "Helvetica-Italic",             "Helvetica-Oblique" },
  76   { "Symbol,Bold",                  "Symbol" },
  77   { "Symbol,BoldItalic",            "Symbol" },
  78   { "Symbol,Italic",                "Symbol" },
  79   { "TimesNewRoman",                "Times-Roman" },
  80   { "TimesNewRoman,Bold",           "Times-Bold" },
  81   { "TimesNewRoman,BoldItalic",     "Times-BoldItalic" },
  82   { "TimesNewRoman,Italic",         "Times-Italic" },
  83   { "TimesNewRoman-Bold",           "Times-Bold" },
  84   { "TimesNewRoman-BoldItalic",     "Times-BoldItalic" },
  85   { "TimesNewRoman-Italic",         "Times-Italic" },
  86   { "TimesNewRomanPS",              "Times-Roman" },
  87   { "TimesNewRomanPS-Bold",         "Times-Bold" },
  88   { "TimesNewRomanPS-BoldItalic",   "Times-BoldItalic" },
  89   { "TimesNewRomanPS-BoldItalicMT", "Times-BoldItalic" },
  90   { "TimesNewRomanPS-BoldMT",       "Times-Bold" },
  91   { "TimesNewRomanPS-Italic",       "Times-Italic" },
  92   { "TimesNewRomanPS-ItalicMT",     "Times-Italic" },
  93   { "TimesNewRomanPSMT",            "Times-Roman" },
  94   { "TimesNewRomanPSMT,Bold",       "Times-Bold" },
  95   { "TimesNewRomanPSMT,BoldItalic", "Times-BoldItalic" },
  96   { "TimesNewRomanPSMT,Italic",     "Times-Italic" }
  97 };
  98
  99 //------------------------------------------------------------------------
 100 // GfxFont
 101 //------------------------------------------------------------------------
 102
 103 GfxFont *GfxFont::makeFont(XRef *xref, char *tagA, Ref idA, Dict *fontDict) {
 104   GString *nameA;
 105   GfxFont *font;
 106   Object obj1;
 107
 108   // get base font name
 109   nameA = NULL;
 110   fontDict->lookup("BaseFont", &obj1);
 111   if (obj1.isName()) {
 112     nameA = new GString(obj1.getName());
 113   }
 114   obj1.free();
 115
 116   // get font type
 117   font = NULL;
 118   fontDict->lookup("Subtype", &obj1);
 119   if (obj1.isName("Type1") || obj1.isName("MMType1")) {
 120     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontType1, fontDict);
 121   } else if (obj1.isName("Type1C")) {
 122     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontType1C, fontDict);
 123   } else if (obj1.isName("Type3")) {
 124     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontType3, fontDict);
 125   } else if (obj1.isName("TrueType")) {
 126     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontTrueType, fontDict);
 127   } else if (obj1.isName("Type0")) {
 128     font = new GfxCIDFont(xref, tagA, idA, nameA, fontDict);
 129   } else {
 130     error(-1, "Unknown font type: '%s'",
 131           obj1.isName() ? obj1.getName() : "???");
 132     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontUnknownType, fontDict);
 133   }
 134   obj1.free();
 135
 136   return font;
 137 }
 138
 139 GfxFont::GfxFont(char *tagA, Ref idA, GString *nameA) {
 140   ok = gFalse;
 141   tag = new GString(tagA);
 142   id = idA;
 143   name = nameA;
 144   origName = nameA;
 145   embFontName = NULL;
 146   extFontFile = NULL;
 147 }
 148
 149 GfxFont::~GfxFont() {
 150   delete tag;
 151   if (origName && origName != name) {
 152     delete origName;
 153   }
 154   if (name) {
 155     delete name;
 156   }
 157   if (embFontName) {
 158     delete embFontName;
 159   }
 160   if (extFontFile) {
 161     delete extFontFile;
 162   }
 163 }
 164
 165 void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) {
 166   Object obj1, obj2, obj3, obj4;
 167   double t;
 168   int i;
 169
 170   // assume Times-Roman by default (for substitution purposes)
 171   flags = fontSerif;
 172
 173   embFontID.num = -1;
 174   embFontID.gen = -1;
 175   missingWidth = 0;
 176
 177   if (fontDict->lookup("FontDescriptor", &obj1)->isDict()) {
 178
 179     // get flags
 180     if (obj1.dictLookup("Flags", &obj2)->isInt()) {
 181       flags = obj2.getInt();
 182     }
 183     obj2.free();
 184
 185     // get name
 186     obj1.dictLookup("FontName", &obj2);
 187     if (obj2.isName()) {
 188       embFontName = new GString(obj2.getName());
 189     }
 190     obj2.free();
 191
 192     // look for embedded font file
 193     if (obj1.dictLookupNF("FontFile", &obj2)->isRef()) {
 194       embFontID = obj2.getRef();
 195       if (type != fontType1) {
 196         error(-1, "Mismatch between font type and embedded font file");
 197         type = fontType1;
 198       }
 199     }
 200     obj2.free();
 201     if (embFontID.num == -1 &&
 202         obj1.dictLookupNF("FontFile2", &obj2)->isRef()) {
 203       embFontID = obj2.getRef();
 204       if (type != fontTrueType && type != fontCIDType2) {
 205         error(-1, "Mismatch between font type and embedded font file");
 206         type = type == fontCIDType0 ? fontCIDType2 : fontTrueType;
 207       }
 208     }
 209     obj2.free();
 210     if (embFontID.num == -1 &&
 211         obj1.dictLookupNF("FontFile3", &obj2)->isRef()) {
 212       if (obj2.fetch(xref, &obj3)->isStream()) {
 213         obj3.streamGetDict()->lookup("Subtype", &obj4);
 214         if (obj4.isName("Type1")) {
 215           embFontID = obj2.getRef();
 216           if (type != fontType1) {
 217             error(-1, "Mismatch between font type and embedded font file");
 218             type = fontType1;
 219           }
 220         } else if (obj4.isName("Type1C")) {
 221           embFontID = obj2.getRef();
 222           if (type != fontType1 && type != fontType1C) {
 223             error(-1, "Mismatch between font type and embedded font file");
 224           }
 225           type = fontType1C;
 226         } else if (obj4.isName("TrueType")) {
 227           embFontID = obj2.getRef();
 228           if (type != fontTrueType) {
 229             error(-1, "Mismatch between font type and embedded font file");
 230             type = fontTrueType;
 231           }
 232         } else if (obj4.isName("CIDFontType0C")) {
 233           embFontID = obj2.getRef();
 234           if (type != fontCIDType0) {
 235             error(-1, "Mismatch between font type and embedded font file");
 236           }
 237           type = fontCIDType0C;
 238         } else {
 239           error(-1, "Unknown embedded font type '%s'",
 240                 obj4.isName() ? obj4.getName() : "???");
 241         }
 242         obj4.free();
 243       }
 244       obj3.free();
 245     }
 246     obj2.free();
 247
 248     // look for MissingWidth
 249     obj1.dictLookup("MissingWidth", &obj2);
 250     if (obj2.isNum()) {
 251       missingWidth = obj2.getNum();
 252     }
 253     obj2.free();
 254
 255     // get Ascent and Descent
 256     obj1.dictLookup("Ascent", &obj2);
 257     if (obj2.isNum()) {
 258       t = 0.001 * obj2.getNum();
 259       // some broken font descriptors set ascent and descent to 0
 260       if (t != 0) {
 261         ascent = t;
 262       }
 263     }
 264     obj2.free();
 265     obj1.dictLookup("Descent", &obj2);
 266     if (obj2.isNum()) {
 267       t = 0.001 * obj2.getNum();
 268       // some broken font descriptors set ascent and descent to 0
 269       if (t != 0) {
 270         descent = t;
 271       }
 272       // some broken font descriptors specify a positive descent
 273       if (descent > 0) {
 274         descent = -descent;
 275       }
 276     }
 277     obj2.free();
 278
 279     // font FontBBox
 280     if (obj1.dictLookup("FontBBox", &obj2)->isArray()) {
 281       for (i = 0; i < 4 && i < obj2.arrayGetLength(); ++i) {
 282         if (obj2.arrayGet(i, &obj3)->isNum()) {
 283           fontBBox[i] = 0.001 * obj3.getNum();
 284         }
 285         obj3.free();
 286       }
 287     }
 288     obj2.free();
 289
 290   }
 291   obj1.free();
 292 }
 293
 294 CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits,
 295                                               CharCodeToUnicode *ctu) {
 296   GString *buf;
 297   Object obj1;
 298   int c;
 299
 300   if (!fontDict->lookup("ToUnicode", &obj1)->isStream()) {
 301     obj1.free();
 302     return NULL;
 303   }
 304   buf = new GString();
 305   obj1.streamReset();
 306   while ((c = obj1.streamGetChar()) != EOF) {
 307     buf->append(c);
 308   }
 309   obj1.streamClose();
 310   obj1.free();
 311   if (ctu) {
 312     ctu->mergeCMap(buf, nBits);
 313   } else {
 314     ctu = CharCodeToUnicode::parseCMap(buf, nBits);
 315   }
 316   delete buf;
 317   return ctu;
 318 }
 319
 320 void GfxFont::findExtFontFile() {
 321   static char *type1Exts[] = { ".pfa", ".pfb", ".ps", "", NULL };
 322   static char *ttExts[] = { ".ttf", NULL };
 323
 324   if (name) {
 325     if (type == fontType1) {
 326       extFontFile = globalParams->findFontFile(name, type1Exts);
 327     } else if (type == fontTrueType) {
 328       extFontFile = globalParams->findFontFile(name, ttExts);
 329     }
 330   }
 331 }
 332
 333 char *GfxFont::readExtFontFile(int *len) {
 334   FILE *f;
 335   char *buf;
 336
 337   if (!(f = fopen(extFontFile->getCString(), "rb"))) {
 338     error(-1, "External font file '%s' vanished", extFontFile->getCString());
 339     return NULL;
 340   }
 341   fseek(f, 0, SEEK_END);
 342   *len = (int)ftell(f);
 343   fseek(f, 0, SEEK_SET);
 344   buf = (char *)gmalloc(*len);
 345   if ((int)fread(buf, 1, *len, f) != *len) {
 346     error(-1, "Error reading external font file '%s'",
 347           extFontFile->getCString());
 348   }
 349   fclose(f);
 350   return buf;
 351 }
 352
 353 char *GfxFont::readEmbFontFile(XRef *xref, int *len) {
 354   char *buf;
 355   Object obj1, obj2;
 356   Stream *str;
 357   int c;
 358   int size, i;
 359
 360   obj1.initRef(embFontID.num, embFontID.gen);
 361   obj1.fetch(xref, &obj2);
 362   if (!obj2.isStream()) {
 363     error(-1, "Embedded font file is not a stream");
 364     obj2.free();
 365     obj1.free();
 366     embFontID.num = -1;
 367     return NULL;
 368   }
 369   str = obj2.getStream();
 370
 371   buf = NULL;
 372   i = size = 0;
 373   str->reset();
 374   while ((c = str->getChar()) != EOF) {
 375     if (i == size) {
 376       size += 4096;
 377       buf = (char *)grealloc(buf, size);
 378     }
 379     buf[i++] = c;
 380   }
 381   *len = i;
 382   str->close();
 383
 384   obj2.free();
 385   obj1.free();
 386
 387   return buf;
 388 }
 389
 390 //------------------------------------------------------------------------
 391 // Gfx8BitFont
 392 //------------------------------------------------------------------------
 393
 394 Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA,
 395                          GfxFontType typeA, Dict *fontDict):
 396   GfxFont(tagA, idA, nameA)
 397 {
 398   GString *name2;
 399   BuiltinFont *builtinFont;
 400   char **baseEnc;
 401   GBool baseEncFromFontFile;
 402   char *buf;
 403   int len;
 404   FoFiType1 *ffT1;
 405   FoFiType1C *ffT1C;
 406   int code, code2;
 407   char *charName;
 408   GBool missing, hex;
 409   Unicode toUnicode[256];
 410   CharCodeToUnicode *utu, *ctu2;
 411   Unicode uBuf[8];
 412   double mul;
 413   int firstChar, lastChar;
 414   Gushort w;
 415   Object obj1, obj2, obj3;
 416   int n, i, a, b, m;
 417
 418   type = typeA;
 419   ctu = NULL;
 420
 421   // do font name substitution for various aliases of the Base 14 font
 422   // names
 423   if (name) {
 424     name2 = name->copy();
 425     i = 0;
 426     while (i < name2->getLength()) {
 427       if (name2->getChar(i) == ' ') {
 428         name2->del(i);
 429       } else {
 430         ++i;
 431       }
 432     }
 433     a = 0;
 434     b = sizeof(stdFontMap) / sizeof(StdFontMapEntry);
 435     // invariant: stdFontMap[a].altName <= name2 < stdFontMap[b].altName
 436     while (b - a > 1) {
 437       m = (a + b) / 2;
 438       if (name2->cmp(stdFontMap[m].altName) >= 0) {
 439         a = m;
 440       } else {
 441         b = m;
 442       }
 443     }
 444     if (!name2->cmp(stdFontMap[a].altName)) {
 445       name = new GString(stdFontMap[a].properName);
 446     }
 447     delete name2;
 448   }
 449
 450   // is it a built-in font?
 451   builtinFont = NULL;
 452   if (name) {
 453     for (i = 0; i < nBuiltinFonts; ++i) {
 454       if (!name->cmp(builtinFonts[i].name)) {
 455         builtinFont = &builtinFonts[i];
 456         break;
 457       }
 458     }
 459   }
 460
 461   // default ascent/descent values
 462   if (builtinFont) {
 463     ascent = 0.001 * builtinFont->ascent;
 464     descent = 0.001 * builtinFont->descent;
 465     fontBBox[0] = 0.001 * builtinFont->bbox[0];
 466     fontBBox[1] = 0.001 * builtinFont->bbox[1];
 467     fontBBox[2] = 0.001 * builtinFont->bbox[2];
 468     fontBBox[3] = 0.001 * builtinFont->bbox[3];
 469   } else {
 470     ascent = 0.95;
 471     descent = -0.35;
 472     fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
 473   }
 474
 475   // get info from font descriptor
 476   readFontDescriptor(xref, fontDict);
 477
 478   // for non-embedded fonts, don't trust the ascent/descent/bbox
 479   // values from the font descriptor
 480   if (builtinFont && embFontID.num < 0) {
 481     ascent = 0.001 * builtinFont->ascent;
 482     descent = 0.001 * builtinFont->descent;
 483     fontBBox[0] = 0.001 * builtinFont->bbox[0];
 484     fontBBox[1] = 0.001 * builtinFont->bbox[1];
 485     fontBBox[2] = 0.001 * builtinFont->bbox[2];
 486     fontBBox[3] = 0.001 * builtinFont->bbox[3];
 487   }
 488
 489   // look for an external font file
 490   findExtFontFile();
 491
 492   // get font matrix
 493   fontMat[0] = fontMat[3] = 1;
 494   fontMat[1] = fontMat[2] = fontMat[4] = fontMat[5] = 0;
 495   if (fontDict->lookup("FontMatrix", &obj1)->isArray()) {
 496     for (i = 0; i < 6 && i < obj1.arrayGetLength(); ++i) {
 497       if (obj1.arrayGet(i, &obj2)->isNum()) {
 498         fontMat[i] = obj2.getNum();
 499       }
 500       obj2.free();
 501     }
 502   }
 503   obj1.free();
 504
 505   // get Type 3 bounding box, font definition, and resources
 506   if (type == fontType3) {
 507     if (fontDict->lookup("FontBBox", &obj1)->isArray()) {
 508       for (i = 0; i < 4 && i < obj1.arrayGetLength(); ++i) {
 509         if (obj1.arrayGet(i, &obj2)->isNum()) {
 510           fontBBox[i] = obj2.getNum();
 511         }
 512         obj2.free();
 513       }
 514     }
 515     obj1.free();
 516     if (!fontDict->lookup("CharProcs", &charProcs)->isDict()) {
 517       error(-1, "Missing or invalid CharProcs dictionary in Type 3 font");
 518       charProcs.free();
 519     }
 520     if (!fontDict->lookup("Resources", &resources)->isDict()) {
 521       resources.free();
 522     }
 523   }
 524
 525   //----- build the font encoding -----
 526
 527   // Encodings start with a base encoding, which can come from
 528   // (in order of priority):
 529   //   1. FontDict.Encoding or FontDict.Encoding.BaseEncoding
 530   //        - MacRoman / MacExpert / WinAnsi / Standard
 531   //   2. embedded or external font file
 532   //   3. default:
 533   //        - builtin --> builtin encoding
 534   //        - TrueType --> WinAnsiEncoding
 535   //        - others --> StandardEncoding
 536   // and then add a list of differences (if any) from
 537   // FontDict.Encoding.Differences.
 538
 539   // check FontDict for base encoding
 540   hasEncoding = gFalse;
 541   usesMacRomanEnc = gFalse;
 542   baseEnc = NULL;
 543   baseEncFromFontFile = gFalse;
 544   fontDict->lookup("Encoding", &obj1);
 545   if (obj1.isDict()) {
 546     obj1.dictLookup("BaseEncoding", &obj2);
 547     if (obj2.isName("MacRomanEncoding")) {
 548       hasEncoding = gTrue;
 549       usesMacRomanEnc = gTrue;
 550       baseEnc = macRomanEncoding;
 551     } else if (obj2.isName("MacExpertEncoding")) {
 552       hasEncoding = gTrue;
 553       baseEnc = macExpertEncoding;
 554     } else if (obj2.isName("WinAnsiEncoding")) {
 555       hasEncoding = gTrue;
 556       baseEnc = winAnsiEncoding;
 557     }
 558     obj2.free();
 559   } else if (obj1.isName("MacRomanEncoding")) {
 560     hasEncoding = gTrue;
 561     usesMacRomanEnc = gTrue;
 562     baseEnc = macRomanEncoding;
 563   } else if (obj1.isName("MacExpertEncoding")) {
 564     hasEncoding = gTrue;
 565     baseEnc = macExpertEncoding;
 566   } else if (obj1.isName("WinAnsiEncoding")) {
 567     hasEncoding = gTrue;
 568     baseEnc = winAnsiEncoding;
 569   }
 570
 571   // check embedded or external font file for base encoding
 572   // (only for Type 1 fonts - trying to get an encoding out of a
 573   // TrueType font is a losing proposition)
 574   ffT1 = NULL;
 575   ffT1C = NULL;
 576   buf = NULL;
 577   if (type == fontType1 && (extFontFile || embFontID.num >= 0)) {
 578     if (extFontFile) {
 579       ffT1 = FoFiType1::load(extFontFile->getCString());
 580     } else {
 581       buf = readEmbFontFile(xref, &len);
 582       ffT1 = FoFiType1::make(buf, len);
 583     }
 584     if (ffT1) {
 585       if (ffT1->getName()) {
 586         if (embFontName) {
 587           delete embFontName;
 588         }
 589         embFontName = new GString(ffT1->getName());
 590       }
 591       if (!baseEnc) {
 592         baseEnc = ffT1->getEncoding();
 593         baseEncFromFontFile = gTrue;
 594       }
 595     }
 596   } else if (type == fontType1C && (extFontFile || embFontID.num >= 0)) {
 597     if (extFontFile) {
 598       ffT1C = FoFiType1C::load(extFontFile->getCString());
 599     } else {
 600       buf = readEmbFontFile(xref, &len);
 601       ffT1C = FoFiType1C::make(buf, len);
 602     }
 603     if (ffT1C) {
 604       if (ffT1C->getName()) {
 605         if (embFontName) {
 606           delete embFontName;
 607         }
 608         embFontName = new GString(ffT1C->getName());
 609       }
 610       if (!baseEnc) {
 611         baseEnc = ffT1C->getEncoding();
 612         baseEncFromFontFile = gTrue;
 613       }
 614     }
 615   }
 616   if (buf) {
 617     gfree(buf);
 618   }
 619
 620   // get default base encoding
 621   if (!baseEnc) {
 622     if (builtinFont && embFontID.num < 0) {
 623       baseEnc = builtinFont->defaultBaseEnc;
 624       hasEncoding = gTrue;
 625     } else if (type == fontTrueType) {
 626       baseEnc = winAnsiEncoding;
 627     } else {
 628       baseEnc = standardEncoding;
 629     }
 630   }
 631
 632   // copy the base encoding
 633   for (i = 0; i < 256; ++i) {
 634     enc[i] = baseEnc[i];
 635     if ((encFree[i] = baseEncFromFontFile) && enc[i]) {
 636       enc[i] = copyString(baseEnc[i]);
 637     }
 638   }
 639
 640   // some Type 1C font files have empty encodings, which can break the
 641   // T1C->T1 conversion (since the 'seac' operator depends on having
 642   // the accents in the encoding), so we fill in any gaps from
 643   // StandardEncoding
 644   if (type == fontType1C && (extFontFile || embFontID.num >= 0) &&
 645       baseEncFromFontFile) {
 646     for (i = 0; i < 256; ++i) {
 647       if (!enc[i] && standardEncoding[i]) {
 648         enc[i] = standardEncoding[i];
 649         encFree[i] = gFalse;
 650       }
 651     }
 652   }
 653
 654   // merge differences into encoding
 655   if (obj1.isDict()) {
 656     obj1.dictLookup("Differences", &obj2);
 657     if (obj2.isArray()) {
 658       hasEncoding = gTrue;
 659       code = 0;
 660       for (i = 0; i < obj2.arrayGetLength(); ++i) {
 661         obj2.arrayGet(i, &obj3);
 662         if (obj3.isInt()) {
 663           code = obj3.getInt();
 664         } else if (obj3.isName()) {
 665           if (code >= 0 && code < 256) {
 666             if (encFree[code]) {
 667               gfree(enc[code]);
 668             }
 669             enc[code] = copyString(obj3.getName());
 670             encFree[code] = gTrue;
 671           }
 672           ++code;
 673         } else {
 674           error(-1, "Wrong type in font encoding resource differences (%s)",
 675                 obj3.getTypeName());
 676         }
 677         obj3.free();
 678       }
 679     }
 680     obj2.free();
 681   }
 682   obj1.free();
 683   if (ffT1) {
 684     delete ffT1;
 685   }
 686   if (ffT1C) {
 687     delete ffT1C;
 688   }
 689
 690   //----- build the mapping to Unicode -----
 691
 692   // pass 1: use the name-to-Unicode mapping table
 693   missing = hex = gFalse;
 694   for (code = 0; code < 256; ++code) {
 695     if ((charName = enc[code])) {
 696       if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) &&
 697           strcmp(charName, ".notdef")) {
 698         // if it wasn't in the name-to-Unicode table, check for a
 699         // name that looks like 'Axx' or 'xx', where 'A' is any letter
 700         // and 'xx' is two hex digits
 701         if ((strlen(charName) == 3 &&
 702              isalpha(charName[0]) &&
 703              isxdigit(charName[1]) && isxdigit(charName[2]) &&
 704              ((charName[1] >= 'a' && charName[1] <= 'f') ||
 705               (charName[1] >= 'A' && charName[1] <= 'F') ||
 706               (charName[2] >= 'a' && charName[2] <= 'f') ||
 707               (charName[2] >= 'A' && charName[2] <= 'F'))) ||
 708             (strlen(charName) == 2 &&
 709              isxdigit(charName[0]) && isxdigit(charName[1]) &&
 710              ((charName[0] >= 'a' && charName[0] <= 'f') ||
 711               (charName[0] >= 'A' && charName[0] <= 'F') ||
 712               (charName[1] >= 'a' && charName[1] <= 'f') ||
 713               (charName[1] >= 'A' && charName[1] <= 'F')))) {
 714           hex = gTrue;
 715         }
 716         missing = gTrue;
 717       }
 718     } else {
 719       toUnicode[code] = 0;
 720     }
 721   }
 722
 723   // pass 2: try to fill in the missing chars, looking for names of
 724   // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B'
 725   // are any letters, 'xx' is two hex digits, and 'nn' is 2-4
 726   // decimal digits
 727   if (missing && globalParams->getMapNumericCharNames()) {
 728     for (code = 0; code < 256; ++code) {
 729       if ((charName = enc[code]) && !toUnicode[code] &&
 730           strcmp(charName, ".notdef")) {
 731         n = strlen(charName);
 732         code2 = -1;
 733         if (hex && n == 3 && isalpha(charName[0]) &&
 734             isxdigit(charName[1]) && isxdigit(charName[2])) {
 735           sscanf(charName+1, "%x", &code2);
 736         } else if (hex && n == 2 &&
 737                    isxdigit(charName[0]) && isxdigit(charName[1])) {
 738           sscanf(charName, "%x", &code2);
 739         } else if (!hex && n >= 2 && n <= 4 &&
 740                    isdigit(charName[0]) && isdigit(charName[1])) {
 741           code2 = atoi(charName);
 742         } else if (n >= 3 && n <= 5 &&
 743                    isdigit(charName[1]) && isdigit(charName[2])) {
 744           code2 = atoi(charName+1);
 745         } else if (n >= 4 && n <= 6 &&
 746                    isdigit(charName[2]) && isdigit(charName[3])) {
 747           code2 = atoi(charName+2);
 748         }
 749         if (code2 >= 0 && code2 <= 0xff) {
 750           toUnicode[code] = (Unicode)code2;
 751         }
 752       }
 753     }
 754   }
 755
 756   // construct the char code -> Unicode mapping object
 757   ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
 758
 759   // merge in a ToUnicode CMap, if there is one -- this overwrites
 760   // existing entries in ctu, i.e., the ToUnicode CMap takes
 761   // precedence, but the other encoding info is allowed to fill in any
 762   // holes
 763   readToUnicodeCMap(fontDict, 8, ctu);
 764
 765   // look for a Unicode-to-Unicode mapping
 766   if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
 767     for (i = 0; i < 256; ++i) {
 768       toUnicode[i] = 0;
 769     }
 770     ctu2 = CharCodeToUnicode::make8BitToUnicode(toUnicode);
 771     for (i = 0; i < 256; ++i) {
 772       n = ctu->mapToUnicode((CharCode)i, uBuf, 8);
 773       if (n >= 1) {
 774         n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
 775         if (n >= 1) {
 776           ctu2->setMapping((CharCode)i, uBuf, n);
 777         }
 778       }
 779     }
 780     utu->decRefCnt();
 781     delete ctu;
 782     ctu = ctu2;
 783   }
 784
 785   //----- get the character widths -----
 786
 787   // initialize all widths
 788   for (code = 0; code < 256; ++code) {
 789     widths[code] = missingWidth * 0.001;
 790   }
 791
 792   // use widths from font dict, if present
 793   fontDict->lookup("FirstChar", &obj1);
 794   firstChar = obj1.isInt() ? obj1.getInt() : 0;
 795   obj1.free();
 796   if (firstChar < 0 || firstChar > 255) {
 797     firstChar = 0;
 798   }
 799   fontDict->lookup("LastChar", &obj1);
 800   lastChar = obj1.isInt() ? obj1.getInt() : 255;
 801   obj1.free();
 802   if (lastChar < 0 || lastChar > 255) {
 803     lastChar = 255;
 804   }
 805   mul = (type == fontType3) ? fontMat[0] : 0.001;
 806   fontDict->lookup("Widths", &obj1);
 807   if (obj1.isArray()) {
 808     flags |= fontFixedWidth;
 809     if (obj1.arrayGetLength() < lastChar - firstChar + 1) {
 810       lastChar = firstChar + obj1.arrayGetLength() - 1;
 811     }
 812     for (code = firstChar; code <= lastChar; ++code) {
 813       obj1.arrayGet(code - firstChar, &obj2);
 814       if (obj2.isNum()) {
 815         widths[code] = obj2.getNum() * mul;
 816         if (widths[code] != widths[firstChar]) {
 817           flags &= ~fontFixedWidth;
 818         }
 819       }
 820       obj2.free();
 821     }
 822
 823   // use widths from built-in font
 824   } else if (builtinFont) {
 825     // this is a kludge for broken PDF files that encode char 32
 826     // as .notdef
 827     if (builtinFont->widths->getWidth("space", &w)) {
 828       widths[32] = 0.001 * w;
 829     }
 830     for (code = 0; code < 256; ++code) {
 831       if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
 832         widths[code] = 0.001 * w;
 833       }
 834     }
 835
 836   // couldn't find widths -- use defaults
 837   } else {
 838     // this is technically an error -- the Widths entry is required
 839     // for all but the Base-14 fonts -- but certain PDF generators
 840     // apparently don't include widths for Arial and TimesNewRoman
 841     if (isFixedWidth()) {
 842       i = 0;
 843     } else if (isSerif()) {
 844       i = 8;
 845     } else {
 846       i = 4;
 847     }
 848     if (isBold()) {
 849       i += 2;
 850     }
 851     if (isItalic()) {
 852       i += 1;
 853     }
 854     builtinFont = builtinFontSubst[i];
 855     // this is a kludge for broken PDF files that encode char 32
 856     // as .notdef
 857     if (builtinFont->widths->getWidth("space", &w)) {
 858       widths[32] = 0.001 * w;
 859     }
 860     for (code = 0; code < 256; ++code) {
 861       if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
 862         widths[code] = 0.001 * w;
 863       }
 864     }
 865   }
 866   obj1.free();
 867
 868   ok = gTrue;
 869 }
 870
 871 Gfx8BitFont::~Gfx8BitFont() {
 872   int i;
 873
 874   for (i = 0; i < 256; ++i) {
 875     if (encFree[i] && enc[i]) {
 876       gfree(enc[i]);
 877     }
 878   }
 879   ctu->decRefCnt();
 880   if (charProcs.isDict()) {
 881     charProcs.free();
 882   }
 883   if (resources.isDict()) {
 884     resources.free();
 885   }
 886 }
 887
 888 int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code,
 889                              Unicode *u, int uSize, int *uLen,
 890                              double *dx, double *dy, double *ox, double *oy) {
 891   CharCode c;
 892
 893   *code = c = (CharCode)(*s & 0xff);
 894   *uLen = ctu->mapToUnicode(c, u, uSize);
 895   *dx = widths[c];
 896   *dy = *ox = *oy = 0;
 897   return 1;
 898 }
 899
 900 CharCodeToUnicode *Gfx8BitFont::getToUnicode() {
 901   ctu->incRefCnt();
 902   return ctu;
 903 }
 904
 905 Gushort *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) {
 906   Gushort *map;
 907   int cmapPlatform, cmapEncoding;
 908   int unicodeCmap, macRomanCmap, msSymbolCmap, cmap;
 909   GBool useMacRoman, useUnicode;
 910   char *charName;
 911   Unicode u;
 912   int code, i, n;
 913
 914   map = (Gushort *)gmallocn(256, sizeof(Gushort));
 915   for (i = 0; i < 256; ++i) {
 916     map[i] = 0;
 917   }
 918
 919   // To match up with the Adobe-defined behaviour, we choose a cmap
 920   // like this:
 921   // 1. If the PDF font has an encoding:
 922   //    1a. If the PDF font specified MacRomanEncoding and the
 923   //        TrueType font has a Macintosh Roman cmap, use it, and
 924   //        reverse map the char names through MacRomanEncoding to
 925   //        get char codes.
 926   //    1b. If the TrueType font has a Microsoft Unicode cmap or a
 927   //        non-Microsoft Unicode cmap, use it, and use the Unicode
 928   //        indexes, not the char codes.
 929   //    1c. If the PDF font is symbolic and the TrueType font has a
 930   //        Microsoft Symbol cmap, use it, and use char codes
 931   //        directly (possibly with an offset of 0xf000).
 932   //    1d. If the TrueType font has a Macintosh Roman cmap, use it,
 933   //        as in case 1a.
 934   // 2. If the PDF font does not have an encoding or the PDF font is
 935   //    symbolic:
 936   //    2a. If the TrueType font has a Macintosh Roman cmap, use it,
 937   //        and use char codes directly (possibly with an offset of
 938   //        0xf000).
 939   //    2b. If the TrueType font has a Microsoft Symbol cmap, use it,
 940   //        and use char codes directly (possible with an offset of
 941   //        0xf000).
 942   // 3. If none of these rules apply, use the first cmap and hope for
 943   //    the best (this shouldn't happen).
 944   unicodeCmap = macRomanCmap = msSymbolCmap = -1;
 945   for (i = 0; i < ff->getNumCmaps(); ++i) {
 946     cmapPlatform = ff->getCmapPlatform(i);
 947     cmapEncoding = ff->getCmapEncoding(i);
 948     if ((cmapPlatform == 3 && cmapEncoding == 1) ||
 949         cmapPlatform == 0) {
 950       unicodeCmap = i;
 951     } else if (cmapPlatform == 1 && cmapEncoding == 0) {
 952       macRomanCmap = i;
 953     } else if (cmapPlatform == 3 && cmapEncoding == 0) {
 954       msSymbolCmap = i;
 955     }
 956   }
 957   cmap = 0;
 958   useMacRoman = gFalse;
 959   useUnicode = gFalse;
 960   if (hasEncoding) {
 961     if (usesMacRomanEnc && macRomanCmap >= 0) {
 962       cmap = macRomanCmap;
 963       useMacRoman = gTrue;
 964     } else if (unicodeCmap >= 0) {
 965       cmap = unicodeCmap;
 966       useUnicode = gTrue;
 967     } else if ((flags & fontSymbolic) && msSymbolCmap >= 0) {
 968       cmap = msSymbolCmap;
 969     } else if ((flags & fontSymbolic) && macRomanCmap >= 0) {
 970       cmap = macRomanCmap;
 971     } else if (macRomanCmap >= 0) {
 972       cmap = macRomanCmap;
 973       useMacRoman = gTrue;
 974     }
 975   } else {
 976     if (macRomanCmap >= 0) {
 977       cmap = macRomanCmap;
 978     } else if (msSymbolCmap >= 0) {
 979       cmap = msSymbolCmap;
 980     }
 981   }
 982
 983   // reverse map the char names through MacRomanEncoding, then map the
 984   // char codes through the cmap
 985   if (useMacRoman) {
 986     for (i = 0; i < 256; ++i) {
 987       if ((charName = enc[i])) {
 988         if ((code = globalParams->getMacRomanCharCode(charName))) {
 989           map[i] = ff->mapCodeToGID(cmap, code);
 990         }
 991       }
 992     }
 993
 994   // map Unicode through the cmap
 995   } else if (useUnicode) {
 996     for (i = 0; i < 256; ++i) {
 997       if (((charName = enc[i]) &&
 998            (u = globalParams->mapNameToUnicode(charName))) ||
 999           (n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
1000         map[i] = ff->mapCodeToGID(cmap, u);
1001       }
1002     }
1003
1004   // map the char codes through the cmap, possibly with an offset of
1005   // 0xf000
1006   } else {
1007     for (i = 0; i < 256; ++i) {
1008       if (!(map[i] = ff->mapCodeToGID(cmap, i))) {
1009         map[i] = ff->mapCodeToGID(cmap, 0xf000 + i);
1010       }
1011     }
1012   }
1013
1014   // try the TrueType 'post' table to handle any unmapped characters
1015   for (i = 0; i < 256; ++i) {
1016     if (!map[i] && (charName = enc[i])) {
1017       map[i] = (Gushort)(long)ff->mapNameToGID(charName);
1018     }
1019   }
1020
1021   return map;
1022 }
1023
1024 Dict *Gfx8BitFont::getCharProcs() {
1025   return charProcs.isDict() ? charProcs.getDict() : (Dict *)NULL;
1026 }
1027
1028 Object *Gfx8BitFont::getCharProc(int code, Object *proc) {
1029   if (enc[code] && charProcs.isDict()) {
1030     charProcs.dictLookup(enc[code], proc);
1031   } else {
1032     proc->initNull();
1033   }
1034   return proc;
1035 }
1036
1037 Dict *Gfx8BitFont::getResources() {
1038   return resources.isDict() ? resources.getDict() : (Dict *)NULL;
1039 }
1040
1041 //------------------------------------------------------------------------
1042 // GfxCIDFont
1043 //------------------------------------------------------------------------
1044
1045 static int CDECL cmpWidthExcep(const void *w1, const void *w2) {
1046   return ((GfxFontCIDWidthExcep *)w1)->first -
1047          ((GfxFontCIDWidthExcep *)w2)->first;
1048 }
1049
1050 static int CDECL cmpWidthExcepV(const void *w1, const void *w2) {
1051   return ((GfxFontCIDWidthExcepV *)w1)->first -
1052          ((GfxFontCIDWidthExcepV *)w2)->first;
1053 }
1054
1055 GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA,
1056                        Dict *fontDict):
1057   GfxFont(tagA, idA, nameA)
1058 {
1059   Dict *desFontDict;
1060   GString *collection, *cMapName;
1061   Object desFontDictObj;
1062   Object obj1, obj2, obj3, obj4, obj5, obj6;
1063   CharCodeToUnicode *utu;
1064   CharCode c;
1065   Unicode uBuf[8];
1066   int c1, c2;
1067   int excepsSize, i, j, k, n;
1068
1069   ascent = 0.95;
1070   descent = -0.35;
1071   fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
1072   cMap = NULL;
1073   ctu = NULL;
1074   widths.defWidth = 1.0;
1075   widths.defHeight = -1.0;
1076   widths.defVY = 0.880;
1077   widths.exceps = NULL;
1078   widths.nExceps = 0;
1079   widths.excepsV = NULL;
1080   widths.nExcepsV = 0;
1081   cidToGID = NULL;
1082   cidToGIDLen = 0;
1083
1084   // get the descendant font
1085   if (!fontDict->lookup("DescendantFonts", &obj1)->isArray()) {
1086     error(-1, "Missing DescendantFonts entry in Type 0 font");
1087     obj1.free();
1088     goto err1;
1089   }
1090   if (!obj1.arrayGet(0, &desFontDictObj)->isDict()) {
1091     error(-1, "Bad descendant font in Type 0 font");
1092     goto err3;
1093   }
1094   obj1.free();
1095   desFontDict = desFontDictObj.getDict();
1096
1097   // font type
1098   if (!desFontDict->lookup("Subtype", &obj1)) {
1099     error(-1, "Missing Subtype entry in Type 0 descendant font");
1100     goto err3;
1101   }
1102   if (obj1.isName("CIDFontType0")) {
1103     type = fontCIDType0;
1104   } else if (obj1.isName("CIDFontType2")) {
1105     type = fontCIDType2;
1106   } else {
1107     error(-1, "Unknown Type 0 descendant font type '%s'",
1108           obj1.isName() ? obj1.getName() : "???");
1109     goto err3;
1110   }
1111   obj1.free();
1112
1113   // get info from font descriptor
1114   readFontDescriptor(xref, desFontDict);
1115
1116   // look for an external font file
1117   findExtFontFile();
1118
1119   //----- encoding info -----
1120
1121   // char collection
1122   if (!desFontDict->lookup("CIDSystemInfo", &obj1)->isDict()) {
1123     error(-1, "Missing CIDSystemInfo dictionary in Type 0 descendant font");
1124     goto err3;
1125   }
1126   obj1.dictLookup("Registry", &obj2);
1127   obj1.dictLookup("Ordering", &obj3);
1128   if (!obj2.isString() || !obj3.isString()) {
1129     error(-1, "Invalid CIDSystemInfo dictionary in Type 0 descendant font");
1130     goto err4;
1131   }
1132   collection = obj2.getString()->copy()->append('-')->append(obj3.getString());
1133   obj3.free();
1134   obj2.free();
1135   obj1.free();
1136
1137   // look for a ToUnicode CMap
1138   if (!(ctu = readToUnicodeCMap(fontDict, 16, NULL))) {
1139
1140     // the "Adobe-Identity" and "Adobe-UCS" collections don't have
1141     // cidToUnicode files
1142     if (collection->cmp("Adobe-Identity") &&
1143         collection->cmp("Adobe-UCS")) {
1144
1145       // look for a user-supplied .cidToUnicode file
1146       if (!(ctu = globalParams->getCIDToUnicode(collection))) {
1147         error(-1, "Unknown character collection '%s'",
1148               collection->getCString());
1149         // fall-through, assuming the Identity mapping -- this appears
1150         // to match Adobe's behavior
1151       }
1152     }
1153   }
1154
1155   // look for a Unicode-to-Unicode mapping
1156   if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
1157     if (ctu) {
1158       for (c = 0; c < ctu->getLength(); ++c) {
1159         n = ctu->mapToUnicode(c, uBuf, 8);
1160         if (n >= 1) {
1161           n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
1162           if (n >= 1) {
1163             ctu->setMapping(c, uBuf, n);
1164           }
1165         }
1166       }
1167       utu->decRefCnt();
1168     } else {
1169       ctu = utu;
1170     }
1171   }
1172
1173   // encoding (i.e., CMap)
1174   //~ need to handle a CMap stream here
1175   //~ also need to deal with the UseCMap entry in the stream dict
1176   if (!fontDict->lookup("Encoding", &obj1)->isName()) {
1177     error(-1, "Missing or invalid Encoding entry in Type 0 font");
1178     delete collection;
1179     goto err3;
1180   }
1181   cMapName = new GString(obj1.getName());
1182   obj1.free();
1183   if (!(cMap = globalParams->getCMap(collection, cMapName))) {
1184     error(-1, "Unknown CMap '%s' for character collection '%s'",
1185           cMapName->getCString(), collection->getCString());
1186     delete collection;
1187     delete cMapName;
1188     goto err2;
1189   }
1190   delete collection;
1191   delete cMapName;
1192
1193   // CIDToGIDMap (for embedded TrueType fonts)
1194   if (type == fontCIDType2) {
1195     desFontDict->lookup("CIDToGIDMap", &obj1);
1196     if (obj1.isStream()) {
1197       cidToGIDLen = 0;
1198       i = 64;
1199       cidToGID = (Gushort *)gmallocn(i, sizeof(Gushort));
1200       obj1.streamReset();
1201       while ((c1 = obj1.streamGetChar()) != EOF &&
1202              (c2 = obj1.streamGetChar()) != EOF) {
1203         if (cidToGIDLen == i) {
1204           i *= 2;
1205           cidToGID = (Gushort *)greallocn(cidToGID, i, sizeof(Gushort));
1206         }
1207         cidToGID[cidToGIDLen++] = (Gushort)((c1 << 8) + c2);
1208       }
1209     } else if (!obj1.isName("Identity") && !obj1.isNull()) {
1210       error(-1, "Invalid CIDToGIDMap entry in CID font");
1211     }
1212     obj1.free();
1213   }
1214
1215   //----- character metrics -----
1216
1217   // default char width
1218   if (desFontDict->lookup("DW", &obj1)->isInt()) {
1219     widths.defWidth = obj1.getInt() * 0.001;
1220   }
1221   obj1.free();
1222
1223   // char width exceptions
1224   if (desFontDict->lookup("W", &obj1)->isArray()) {
1225     excepsSize = 0;
1226     i = 0;
1227     while (i + 1 < obj1.arrayGetLength()) {
1228       obj1.arrayGet(i, &obj2);
1229       obj1.arrayGet(i + 1, &obj3);
1230       if (obj2.isInt() && obj3.isInt() && i + 2 < obj1.arrayGetLength()) {
1231         if (obj1.arrayGet(i + 2, &obj4)->isNum()) {
1232           if (widths.nExceps == excepsSize) {
1233             excepsSize += 16;
1234             widths.exceps = (GfxFontCIDWidthExcep *)
1235               greallocn(widths.exceps,
1236                         excepsSize, sizeof(GfxFontCIDWidthExcep));
1237           }
1238           widths.exceps[widths.nExceps].first = obj2.getInt();
1239           widths.exceps[widths.nExceps].last = obj3.getInt();
1240           widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
1241           ++widths.nExceps;
1242         } else {
1243           error(-1, "Bad widths array in Type 0 font");
1244         }
1245         obj4.free();
1246         i += 3;
1247       } else if (obj2.isInt() && obj3.isArray()) {
1248         if (widths.nExceps + obj3.arrayGetLength() > excepsSize) {
1249           excepsSize = (widths.nExceps + obj3.arrayGetLength() + 15) & ~15;
1250           widths.exceps = (GfxFontCIDWidthExcep *)
1251             greallocn(widths.exceps,
1252                       excepsSize, sizeof(GfxFontCIDWidthExcep));
1253         }
1254         j = obj2.getInt();
1255         for (k = 0; k < obj3.arrayGetLength(); ++k) {
1256           if (obj3.arrayGet(k, &obj4)->isNum()) {
1257             widths.exceps[widths.nExceps].first = j;
1258             widths.exceps[widths.nExceps].last = j;
1259             widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
1260             ++j;
1261             ++widths.nExceps;
1262           } else {
1263             error(-1, "Bad widths array in Type 0 font");
1264           }
1265           obj4.free();
1266         }
1267         i += 2;
1268       } else {
1269         error(-1, "Bad widths array in Type 0 font");
1270         ++i;
1271       }
1272       obj3.free();
1273       obj2.free();
1274     }
1275     qsort(widths.exceps, widths.nExceps, sizeof(GfxFontCIDWidthExcep),
1276           &cmpWidthExcep);
1277   }
1278   obj1.free();
1279
1280   // default metrics for vertical font
1281   if (desFontDict->lookup("DW2", &obj1)->isArray() &&
1282       obj1.arrayGetLength() == 2) {
1283     if (obj1.arrayGet(0, &obj2)->isNum()) {
1284       widths.defVY = obj2.getNum() * 0.001;
1285     }
1286     obj2.free();
1287     if (obj1.arrayGet(1, &obj2)->isNum()) {
1288       widths.defHeight = obj2.getNum() * 0.001;
1289     }
1290     obj2.free();
1291   }
1292   obj1.free();
1293
1294   // char metric exceptions for vertical font
1295   if (desFontDict->lookup("W2", &obj1)->isArray()) {
1296     excepsSize = 0;
1297     i = 0;
1298     while (i + 1 < obj1.arrayGetLength()) {
1299       obj1.arrayGet(i, &obj2);
1300       obj1.arrayGet(i+ 1, &obj3);
1301       if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) {
1302         if (obj1.arrayGet(i + 2, &obj4)->isNum() &&
1303             obj1.arrayGet(i + 3, &obj5)->isNum() &&
1304             obj1.arrayGet(i + 4, &obj6)->isNum()) {
1305           if (widths.nExcepsV == excepsSize) {
1306             excepsSize += 16;
1307             widths.excepsV = (GfxFontCIDWidthExcepV *)
1308               greallocn(widths.excepsV,
1309                         excepsSize, sizeof(GfxFontCIDWidthExcepV));
1310           }
1311           widths.excepsV[widths.nExcepsV].first = obj2.getInt();
1312           widths.excepsV[widths.nExcepsV].last = obj3.getInt();
1313           widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
1314           widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
1315           widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
1316           ++widths.nExcepsV;
1317         } else {
1318           error(-1, "Bad widths (W2) array in Type 0 font");
1319         }
1320         obj6.free();
1321         obj5.free();
1322         obj4.free();
1323         i += 5;
1324       } else if (obj2.isInt() && obj3.isArray()) {
1325         if (widths.nExcepsV + obj3.arrayGetLength() / 3 > excepsSize) {
1326           excepsSize =
1327             (widths.nExcepsV + obj3.arrayGetLength() / 3 + 15) & ~15;
1328           widths.excepsV = (GfxFontCIDWidthExcepV *)
1329             greallocn(widths.excepsV,
1330                       excepsSize, sizeof(GfxFontCIDWidthExcepV));
1331         }
1332         j = obj2.getInt();
1333         for (k = 0; k < obj3.arrayGetLength(); k += 3) {
1334           if (obj3.arrayGet(k, &obj4)->isNum() &&
1335               obj3.arrayGet(k+1, &obj5)->isNum() &&
1336               obj3.arrayGet(k+2, &obj6)->isNum()) {
1337             widths.excepsV[widths.nExceps].first = j;
1338             widths.excepsV[widths.nExceps].last = j;
1339             widths.excepsV[widths.nExceps].height = obj4.getNum() * 0.001;
1340             widths.excepsV[widths.nExceps].vx = obj5.getNum() * 0.001;
1341             widths.excepsV[widths.nExceps].vy = obj6.getNum() * 0.001;
1342             ++j;
1343             ++widths.nExcepsV;
1344           } else {
1345             error(-1, "Bad widths (W2) array in Type 0 font");
1346           }
1347           obj6.free();
1348           obj5.free();
1349           obj4.free();
1350         }
1351         i += 2;
1352       } else {
1353         error(-1, "Bad widths (W2) array in Type 0 font");
1354         ++i;
1355       }
1356       obj3.free();
1357       obj2.free();
1358     }
1359     qsort(widths.excepsV, widths.nExcepsV, sizeof(GfxFontCIDWidthExcepV),
1360           &cmpWidthExcepV);
1361   }
1362   obj1.free();
1363
1364   desFontDictObj.free();
1365   ok = gTrue;
1366   return;
1367
1368  err4:
1369   obj3.free();
1370   obj2.free();
1371  err3:
1372   obj1.free();
1373  err2:
1374   desFontDictObj.free();
1375  err1:;
1376 }
1377
1378 GfxCIDFont::~GfxCIDFont() {
1379   if (cMap) {
1380     cMap->decRefCnt();
1381   }
1382   if (ctu) {
1383     ctu->decRefCnt();
1384   }
1385   gfree(widths.exceps);
1386   gfree(widths.excepsV);
1387   if (cidToGID) {
1388     gfree(cidToGID);
1389   }
1390 }
1391
1392 int GfxCIDFont::getNextChar(char *s, int len, CharCode *code,
1393                             Unicode *u, int uSize, int *uLen,
1394                             double *dx, double *dy, double *ox, double *oy) {
1395   CID cid;
1396   double w, h, vx, vy;
1397   int n, a, b, m;
1398
1399   if (!cMap) {
1400     *code = 0;
1401     *uLen = 0;
1402     *dx = *dy = 0;
1403     return 1;
1404   }
1405
1406   *code = (CharCode)(cid = cMap->getCID(s, len, &n));
1407   if (ctu) {
1408     *uLen = ctu->mapToUnicode(cid, u, uSize);
1409   } else {
1410     *uLen = 0;
1411   }
1412
1413   // horizontal
1414   if (cMap->getWMode() == 0) {
1415     w = widths.defWidth;
1416     h = vx = vy = 0;
1417     if (widths.nExceps > 0 && cid >= widths.exceps[0].first) {
1418       a = 0;
1419       b = widths.nExceps;
1420       // invariant: widths.exceps[a].first <= cid < widths.exceps[b].first
1421       while (b - a > 1) {
1422         m = (a + b) / 2;
1423         if (widths.exceps[m].first <= cid) {
1424           a = m;
1425         } else {
1426           b = m;
1427         }
1428       }
1429       if (cid <= widths.exceps[a].last) {
1430         w = widths.exceps[a].width;
1431       }
1432     }
1433
1434   // vertical
1435   } else {
1436     w = 0;
1437     h = widths.defHeight;
1438     vx = widths.defWidth / 2;
1439     vy = widths.defVY;
1440     if (widths.nExcepsV > 0 && cid >= widths.excepsV[0].first) {
1441       a = 0;
1442       b = widths.nExcepsV;
1443       // invariant: widths.excepsV[a].first <= cid < widths.excepsV[b].first
1444       while (b - a > 1) {
1445         m = (a + b) / 2;
1446         if (widths.excepsV[m].last <= cid) {
1447           a = m;
1448         } else {
1449           b = m;
1450         }
1451       }
1452       if (cid <= widths.excepsV[a].last) {
1453         h = widths.excepsV[a].height;
1454         vx = widths.excepsV[a].vx;
1455         vy = widths.excepsV[a].vy;
1456       }
1457     }
1458   }
1459
1460   *dx = w;
1461   *dy = h;
1462   *ox = vx;
1463   *oy = vy;
1464
1465   return n;
1466 }
1467
1468 int GfxCIDFont::getWMode() {
1469   return cMap ? cMap->getWMode() : 0;
1470 }
1471
1472 CharCodeToUnicode *GfxCIDFont::getToUnicode() {
1473   if (ctu) {
1474     ctu->incRefCnt();
1475   }
1476   return ctu;
1477 }
1478
1479 GString *GfxCIDFont::getCollection() {
1480   return cMap ? cMap->getCollection() : (GString *)NULL;
1481 }
1482
1483 //------------------------------------------------------------------------
1484 // GfxFontDict
1485 //------------------------------------------------------------------------
1486
1487 GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict) {
1488   int i;
1489   Object obj1, obj2;
1490   Ref r;
1491
1492   numFonts = fontDict->getLength();
1493   fonts = (GfxFont **)gmallocn(numFonts, sizeof(GfxFont *));
1494   for (i = 0; i < numFonts; ++i) {
1495     fontDict->getValNF(i, &obj1);
1496     obj1.fetch(xref, &obj2);
1497     if (obj2.isDict()) {
1498       if (obj1.isRef()) {
1499         r = obj1.getRef();
1500       } else {
1501         // no indirect reference for this font, so invent a unique one
1502         // (legal generation numbers are five digits, so any 6-digit
1503         // number would be safe)
1504         r.num = i;
1505         if (fontDictRef) {
1506           r.gen = 100000 + fontDictRef->num;
1507         } else {
1508           r.gen = 999999;
1509         }
1510       }
1511       fonts[i] = GfxFont::makeFont(xref, fontDict->getKey(i),
1512                                    r, obj2.getDict());
1513       if (fonts[i] && !fonts[i]->isOk()) {
1514         delete fonts[i];
1515         fonts[i] = NULL;
1516       }
1517     } else {
1518       error(-1, "font resource is not a dictionary");
1519       fonts[i] = NULL;
1520     }
1521     obj1.free();
1522     obj2.free();
1523   }
1524 }
1525
1526 GfxFontDict::~GfxFontDict() {
1527   int i;
1528
1529   for (i = 0; i < numFonts; ++i) {
1530     if (fonts[i]) {
1531       delete fonts[i];
1532     }
1533   }
1534   gfree(fonts);
1535 }
1536
1537 GfxFont *GfxFontDict::lookup(char *tag) {
1538   int i;
1539
1540   for (i = 0; i < numFonts; ++i) {
1541     if (fonts[i] && fonts[i]->matches(tag)) {
1542       return fonts[i];
1543     }
1544   }
1545   return NULL;
1546 }