File: t1disasm.c

package info (click to toggle)
t1utils 1.2-3
links: PTS
area: main
in suites: potato
size: 148 kB
ctags: 173
sloc: ansic: 1,558; makefile: 78; sh: 10
file content (761 lines) | stat: -rw-r--r-- 21,367 bytes
/* t1disasm
 *
 * This program `disassembles' Adobe Type-1 font programs in either PFB
 * or PFA format.  It produces a human readable/editable
 * pseudo-PostScript file by performing eexec and charstring decryption
 * as specified in the `Adobe Type 1 Font Format' version 1.1 (the
 * `black book').  There is a companion program, t1asm, which
 * `assembles' such a pseudo-PostScript file into either PFB or PFA
 * format.
 *
 * Copyright (c) 1992 by I. Lee Hetherington, all rights reserved.
 *
 * Permission is hereby granted to use, modify, and distribute this
 * program for any purpose provided this copyright notice and the one
 * below remain intact.
 *
 * I. Lee Hetherington (ilh@lcs.mit.edu)
 *
 *======================================================================
 *
 * This program has been modified by CurveSoft, Inc. and all such
 * modifications are covered by the following copyright notice, license,
 * and disclaimer
 *
 * Copyright (C) 1997  CurveSoft, Inc. All rights reserved.
 *
 * License is hereby granted without fee, to use, copy, modify,
 * translate and distribute this software and its documentation for any
 * purpose provided that the above copyright notice is present in all
 * copies and that the name of CurveSoft not be used in advertising or
 * publicity pertaining to this software without written prior
 * permission.
 *
 * CURVESOFT PROVIDES THIS SOFTWARE "AS IS", WITHOUT ANY WARRANTIES OF
 * ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO
 * ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE, AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  THE ENTIRE RISK
 * AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE, INCLUDING ANY DUTY
 * TO SUPPORT OR MAINTAIN, BELONGS TO THE LICENSEE.  SHOULD ANY PORTION
 * OF THE SOFTWARE PROVE DEFECTIVE, THE LICENSEE (NOT CURVESOFT) ASSUMES
 * THE ENTIRE COST OF ALL SERVICING, REPAIR AND CORRECTION.  IN NO EVENT
 * SHALL CURVESOFT BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
 *
 *======================================================================
 *
 * Log:
 *
 * Revision 1.4  92/07/10  10:55:08  ilh
 * Added support for additional PostScript after the closefile command
 * (ie., some fonts have {restore}if' after the cleartomark).  Also,
 * removed hardwired charstring start command (-| or RD) in favor of
 * automatically determining it.
 *
 * Revision 1.3  92/06/23  10:57:53  ilh
 * MSDOS porting by Kai-Uwe Herbing (herbing@netmbx.netmbx.de)
 * incoporated.
 *
 * Revision 1.2  92/05/22  12:05:33  ilh
 * Fixed bug where we were counting on sprintf to return its first
 * argument---not true in ANSI C.  This bug was detected by Piet
 * Tutelaers (rcpt@urc.tue.nl).  Also, fixed (signed) integer overflow
 * error when testing high-order bit of integer for possible
 * sign-extension by making comparison between unsigned integers.
 *
 * Revision 1.1  92/05/22  12:04:07  ilh
 * initial version
 *
 * Ported to Microsoft C/C++ Compiler and MS-DOS operating system by
 * Kai-Uwe Herbing (herbing@netmbx.netmbx.de) on June 12, 1992. Code
 * specific to the MS-DOS version is encapsulated with #ifdef _MSDOS
 * ... #endif, where _MSDOS is an identifier, which is automatically
 * defined, if you compile with the Microsoft C/C++ Compiler.
 * */

#ifndef lint
static char rcsid[] =
 "@(#) $Id: t1disasm.c,v 1.4 92/07/10 10:55:08 ilh Exp $";
static char copyright[] =
 "@(#) Copyright (c) 1992 by I. Lee Hetherington, all rights reserved.";
#ifdef _MSDOS
static char portnotice[] =
 "@(#) Ported to MS-DOS by Kai-Uwe Herbing (herbing@netmbx.netmbx.de).";
#endif
#endif  /* lint */

/* Note: this is ANSI C. */

#ifdef _MSDOS
#include <fcntl.h>
#include <io.h>
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>

/* int32 must be at least 32-bit and uint16 must be at least 16-bit */
#if INT_MAX >= 0x7FFFFFFFUL
typedef int int32;
#else
typedef long int32;
#endif

#if USHRT_MAX >= 0xFFFFUL
typedef unsigned short uint16;
#else
typedef unsigned int uint16;
#endif

/* Needed for 'shortint' Type 2 operator */
#if SHRT_MAX >= 0x7FFFUL
typedef short int16;
#else
typedef int int16;
#endif

#define LINESIZE 256

#define cgetc()  cdecrypt((byte)(egetc() & 0xff))

typedef unsigned char byte;

static FILE *ifp;
static FILE *ofp;
static char line[LINESIZE];
static int start_charstring = 0;
static int final_ascii = 0;
static int lenIV = 4;
static char cs_start[10];
int lnum = 1; /* line number */

/* decryption stuff */
static uint16 er, cr;
static uint16 c1 = 52845, c2 = 22719;

/* ------------------------------------------------------  eexec_scanner
 *
 * This function looks for `currentfile eexec' string and returns 1 once
 * found. If c == 0, then simply check the status.
 */
static int
eexec_scanner(int c)
{
  static char *key = "currentfile eexec\n";
  static char *p = 0;

  if (!p)
    p = key;

  if (c && *p) {
    if ((char) (c & 0xff) == *p)
      ++p;
    else
      p = key;
  }
  return *p == '\0';
}                                                    /* eexec_scanner */

/* -------------------------------------------------------------  hexval
 *
 * This function returns the value of a single hex digit.
 */
static int
hexval(char c)
{
  if (c >= 'A' && c <= 'F')
    return c - 'A' + 10;
  if (c >= 'a' && c <= 'f')
    return c - 'a' + 10;
  if (c >= '0' && c <= '9')
    return c - '0';
  return 0;
}                                                           /* hexval */


/* --------------------------------------------------------------  bgetc
 *
 * This function returns a single character at a time from a PFA or PFB
 * file.  This stream is mixed ASCII and binary bytes.  For PFB files,
 * the section headers are removed, and \r is replaced by \n in ASCII
 * sections.  For PFA files, the hexdecimal data is turned into binary
 * bytes.
 */
static int
bgetc()
{
  static int first_byte = 1;
  static int is_pfa = 0;
  static int is_pfb = 0;
  static int32 pfb_remaining = 0;
  int c, val;

  /*
   * is_pfa == 1 means PFA initial ASCII section
   * is_pfa == 2 means PFA hexadecimal section
   * is_pfb == 1 means PFB ASCII section
   * is_pfB == 2 means PFB binary section
   */

  c = fgetc(ifp);

  if (c == EOF)
    return EOF;

  if (first_byte) {
    /*
     * Determine if this is a PFA or PFB file by looking at first byte.
     */
    if (c == 0x80) {
      is_pfb = 1;
      is_pfa = 0;

#ifdef _MSDOS
      /*
       * If we are processing a PFB (binary) input file, we must set its
       * file mode to binary.
       */
      _setmode(_fileno(ifp), _O_BINARY);
#endif  /* _MSDOS */

    } else {
      is_pfb = 0;
      is_pfa = 1;
    }
    first_byte = 0;
  }

  if (is_pfb) {
    /* PFB */
    if (pfb_remaining == 0) {
      /* beginning of block---we know c == 0x80 at this point */
      switch (fgetc(ifp)) {
      case 1:
        is_pfb = 1;
        break;
      case 2:
        is_pfb = 2;
        break;
      case 3:
        return EOF;
      default:
        fprintf(stderr, "error: is this really a PFB file?\n");
        exit(1);
      }      /* switch */

      /* get block length */
      pfb_remaining = (int32)(fgetc(ifp) & 0xff);
      pfb_remaining |= (int32)(fgetc(ifp) & 0xff) << 8;
      pfb_remaining |= (int32)(fgetc(ifp) & 0xff) << 16;
      pfb_remaining |= (int32)(fgetc(ifp) & 0xff) << 24;
      /* get character */
      c = fgetc(ifp);
      if (c == EOF)
        return EOF;
    }

    --pfb_remaining;
    /* in ASCII section change return to newline */
    if (is_pfb == 1 && c == '\r')
      c = '\n';
    (void) eexec_scanner(c);
    return c;
  }

  /* PFA */
  if (final_ascii)
    return c;
  if (is_pfa == 1) {
    /* in initial ASCII */
    if (eexec_scanner(c))
      is_pfa = 2;
    return c;
  }
  /* in hexadecimal */
  while (isspace(c))
    c = fgetc(ifp);
  val = hexval((char)c) << 4;
  val |= hexval((char)(c = fgetc(ifp)));
  return c == EOF ? EOF : val;
}                                                            /* bgetc */


/* -----------------------------------------------------------  bgetline
 *
 * This functions returns a line of (non-decrypted) characters.  A line
 * is terminated by length (including terminating null) greater than
 * LINESIZE, a newline \n.  The line, including the terminating newline,
 * is put into line[].
 */
static void
bgetline()
{
  int c;
  char *p = line;

  while (p < line + LINESIZE) {
    c = bgetc();
    if (c == EOF)
      break;
    *p++ = (char) c;
    if (c == '\r') {    /* map \r to \n */
      p[-1] = '\n';
      break;
    }
    if (c == '\n')
      break;
  }                     /* while */
  *p = '\0';
}                                                         /* bgetline */


/* -----------------------------------------------------------  edecrypt
 *
 * Two separate decryption functions because eexec and charstring
 * decryption must proceed in parallel.
 */
static byte
edecrypt(byte cipher)
{
  byte plain;

  plain = (byte)(cipher ^ (er >> 8));
  er = (uint16)((cipher + er) * c1 + c2);
  return plain;
}                                                         /* edecrypt */

/* -----------------------------------------------------------  cdecrypt
 *
 * CharString encryption
 */
static byte
cdecrypt(byte cipher)
{
  byte plain;

  plain = (byte)(cipher ^ (cr >> 8));
  cr = (uint16)((cipher + cr) * c1 + c2);
  return plain;
}                                                         /* cdecrypt */


/* ----------------------------------------------------  immediate_eexec
 *
 * This function returns 1 the first time the eexec_scanner returns 1.
 */
static int
immediate_eexec()
{
  static int reported = 0;

  if (!reported && eexec_scanner(0)) {
    reported = 1;
    return 1;
  }
  return 0;
}                                                  /* immediate_eexec */


/* --------------------------------------------------------------  egetc
 *
 * This function returns a single byte at a time through (possible)
 * eexec decryption.  When immediate_eexec returns 1 it fires up the
 * eexec decryption machinery.
 */
static int
egetc()
{
  static int in_eexec = 0;
  int c;

  if ((c = bgetc()) == EOF)
    return EOF;
  if (in_eexec)
    return (int)edecrypt((byte)(c & 0xff));

  if (immediate_eexec()) {
    /* start eexec decryption */
    in_eexec = 1;
    er = 55665;
    /* toss out four random bytes */
    (void) edecrypt((byte)(bgetc() & 0xff));
    (void) edecrypt((byte)(bgetc() & 0xff));
    (void) edecrypt((byte)(bgetc() & 0xff));
    (void) edecrypt((byte)(bgetc() & 0xff));
  }
  return c;
}                                                            /* egetc */


/* -----------------------------------------------------------  egetline
 *
 * This function returns a line of eexec decrypted characters.  A line
 * is terminated by length (including terminating null) greater than
 * LINESIZE, a newline \n, or the special charstring start sequence in
 * cs_start[] (usually ` -| ' or ` RD ').  The line, including the
 * terminating newline or charstring start sequence is put into line[].
 * If terminated by a charstring start sequence, the flag
 * start_charstring is set to 1.
 */
static void
egetline()
{
  int c;
  int l = strlen(cs_start);
  char *p = line;

  start_charstring = 0;
  while (p < line + LINESIZE - 1) {
    c = egetc();
    if (c == EOF)
      break;
    *p++ = (char) c;
    if (l > 0 &&
        p >= line + l + 2 &&
        p[-2 - l] == ' ' &&
        p[-1] == ' ' &&
        strncmp(p - l - 1, cs_start, l) == 0) {
      p -= l + 2;
      start_charstring = 1;
      break;
    }
    if (c == '\r') {                              /* map \r to \n */
      p[-1] = '\n';
      break;
    }
    if (c == '\n')
      break;
  }
  *p = '\0';
}                                                         /* egetline */


/* ----------------------------------------------------------  set_lenIV
 *
 * If the line contains an entry of the form `/lenIV <num>' then set the
 * global lenIV to <num>.  This indicates the number of random bytes at
 * the beginning of each charstring.
 */
static void
set_lenIV()
{
  char *p = strstr(line, "/lenIV ");

  if (p && isdigit(p[7]))
    lenIV = atoi(p + 7);
}                                                        /* set_lenIV */

/* -------------------------------------------------------  set_cs_start
 *
 * find and set string that starts a CharString
 */
static void
set_cs_start()
{
  char *p, *q, *r;

  if ((p = strstr(line, "string currentfile"))) {
    /* locate the name of the charstring start command */
    *p = '\0';                      /* damage line[] */
    q = strrchr(line, '/');
    if (q) {
      r = cs_start;
      ++q;
      while (!isspace(*q) && *q != '{')
        *r++ = *q++;
      *r = '\0';
    }
    *p = 's';                      /* repair line[] */
  }
}                                                     /* set_cs_start */

/* -------------------------------------------------------------  output
 *
 * output
 */
static void
output(char *string)
{
  fprintf(ofp, "%s", string);
}                                                           /* output */

/* -------------------------------------------------------  output_token
 *
 * Subroutine to neatly format output of charstring tokens.  If token =
 * "\n", then a newline is output.  If at start of line (start == 1),
 * prefix token with tab, otherwise a space.
 */
static void
output_token(char *token)
{
  static int start = 1;

  if (strcmp(token, "\n") == 0) {
    fprintf(ofp, "\n");
    start = 1;
  } else {
    fprintf(ofp, "%s%s", start ? "\t" : " ", token);
    start = 0;
  }
}                                                     /* output_token */

/* ------------------------------------------------------  do_charstring
 *
 * Subroutine to decrypt and ASCII-ify tokens in charstring data.
 * First, the length (in bytes) of the charstring is determined from
 * line[].  Then the charstring decryption machinery is fired up,
 * skipping the first lenIV bytes.  Finally, the decrypted tokens are
 * expanded into human-readable form.
 */
static void
do_charstring()
{
  int l = strlen(line);
  char *p = line + l - 1;
  int cs_len;
  int i;
  int b;
  int32 val;
  char buf[20];

  while (p >= line && *p != ' ' && *p != '\t')
    --p;
  cs_len = atoi(p);

  *p = '\0';
  output(line);
  output(" {\n");

  cr = 4330;
  for (i = 0; i < lenIV; i++, cs_len--)
    (void) cgetc();

  while (cs_len > 0) {
    --cs_len;
    b = cgetc();
    if (b >= 32) {
      if (b >= 32 && b <= 246) {
        val = b - 139;
      } else if (b >= 247 && b <= 250) {
        --cs_len;
        val = (b - 247)*256 + 108 + cgetc();
      } else if (b >= 251 && b <= 254) {
        --cs_len;
        val = -(b - 251)*256 - 108 - cgetc();
      } else {
        cs_len -= 4;
        val =  (cgetc() & 0xff) << 24;
        val |= (cgetc() & 0xff) << 16;
        val |= (cgetc() & 0xff) <<  8;
        val |= (cgetc() & 0xff) <<  0;

        /* in case an int32 is larger than four bytes---sign extend */
#if INT_MAX > 0x7FFFFFFFUL
        for (i = 4; i < sizeof(int32); i++)
          val |= 0xff << (i * 8);
#endif

      }
      sprintf(buf, "%d", val);
      output_token(buf);
    } else if (b == 28) {    /* 'shortint' (Type 2) */
      int16 val16;
      val16 = (cgetc() & 0xff) <<  8;
      val16 |= (cgetc() & 0xff);
      sprintf(buf, "%d", (int)val16);
      output_token(buf);
    } else {
      switch (b) {
      case 1: output_token("hstem"); break;
      case 3: output_token("vstem"); break;
      case 4: output_token("vmoveto"); break;
      case 5: output_token("rlineto"); break;
      case 6: output_token("hlineto"); break;
      case 7: output_token("vlineto"); break;
      case 8: output_token("rrcurveto"); break;
      case 9: output_token("closepath"); break;
      case 10: output_token("callsubr"); break;
      case 11: output_token("return"); break;
      case 13: output_token("hsbw"); break;
      case 14: output_token("endchar"); break;

      /* Type 2 operators */
      case 16: output_token("blend"); break;
      case 18: output_token("hstemhm"); break;
      case 19: output_token("hintmask"); break;
      case 20: output_token("cntrmask"); break;

      case 21: output_token("rmoveto"); break;
      case 22: output_token("hmoveto"); break;

      /* Type 2 operators */
      case 23: output_token("vstemhm"); break;
      case 24: output_token("rcurveline"); break;
      case 25: output_token("rlinecurve"); break;
      case 26: output_token("vvcurveto"); break;
      case 27: output_token("hhcurveto"); break;
      case 29: output_token("callgsubr"); break;

      case 30: output_token("vhcurveto"); break;
      case 31: output_token("hvcurveto"); break;
      case 12:
        --cs_len;
        switch (b = cgetc()) {
        case 0: output_token("dotsection"); break;
        case 1: output_token("vstem3"); break;
        case 2: output_token("hstem3"); break;

        /* Type 2 operators */
        case 3: output_token("and"); break;
        case 4: output_token("or"); break;
        case 5: output_token("not"); break;

        case 6: output_token("seac"); break;
        case 7: output_token("sbw"); break;

        /* Type 2 operators */
        case 8: output_token("store"); break;
        case 9: output_token("abs"); break;
        case 10: output_token("add"); break;
        case 11: output_token("sub"); break;

        case 12: output_token("div"); break;

        /* Type 2 operators */
        case 13: output_token("load"); break;
        case 14: output_token("neg"); break;
        case 15: output_token("eq"); break;

        case 16: output_token("callothersubr"); break;
        case 17: output_token("pop"); break;

        /* Type 2 operators
         *
         * NOTE: 25 is documented as "Reserved" in Technical Note #5177,
         *       "The Type 2 CharString Format", 16 December 1996,
         *       p. 30, from Adobe but Kepler and Jenson MM fonts treat
         *       it as 'div'
         */
        case 18: output_token("drop"); break;
        case 20: output_token("put"); break;
        case 21: output_token("get"); break;
        case 22: output_token("ifelse"); break;
        case 23: output_token("random"); break;
        case 24: output_token("mul"); break;
        case 25: output_token("Div"); break;
        case 26: output_token("sqrt"); break;
        case 27: output_token("dup"); break;
        case 28: output_token("exch"); break;
        case 29: output_token("index"); break;
        case 30: output_token("roll"); break;

        case 33: output_token("setcurrentpoint"); break;

        /* Type 2 operators */
        case 34: output_token("hflex"); break;
        case 35: output_token("flex"); break;
        case 36: output_token("hflex1"); break;
        case 37: output_token("flex1"); break;

        default:
          sprintf(buf, "UNKNOWN_12_%d", b);
          output_token(buf);
          break;
        }    /* switch */
        break;

      default:
        sprintf(buf, "UNKNOWN_%d", b);
        output_token(buf);
        break;
      }  /* switch */
      output_token("\n");
    }  /* else */
  }    /* while */
  output("\t}");
}                                                    /* do_charstring */

/* -------------------------------------------------------  print_banner
 *
 * CharString encryption
 */
static void
print_banner()
{
  static char rcs_revision[] = "$Revision: 1.4 $";
  static char revision[20];

  if (sscanf(rcs_revision, "$Revision: %19s", revision) != 1)
    revision[0] = '\0';
  fprintf(stderr, "This is t1disasm %s (modified).\n", revision);
}                                                     /* print_banner */

/* ---------------------------------------------------------------  main
 *
 * main program
 */
int
main(int argc, char **argv)
{
  ifp = stdin;
  ofp = stdout;

  print_banner();

  /* possibly open input & output files */
  if (argc >= 2) {
    ifp = fopen(argv[1], "r");
    if (!ifp) {
      fprintf(stderr, "error: cannot open %s for reading\n", argv[1]);
      exit(1);
    }
  }
  if (argc >= 3) {
    ofp = fopen(argv[2], "w");
    if (!ofp) {
      fprintf(stderr, "error: cannot open %s for writing\n", argv[2]);
      exit(1);
    }
  }

  /*
   * main loop --- normally done when reach `mark currentfile closefile'
   * on output (rest is garbage).
   */
  for (lnum = 0; ; lnum++) {
    egetline();
    if (line[0] == '\0')
      break;
    set_lenIV();
    set_cs_start();
    if (start_charstring)
      do_charstring();
    else
      output(line);
    if (strcmp(line, "mark currentfile closefile\n") == 0)
      break;
  }  /* for */

  /* Final wrap-up: check for any PostScript after the cleartomark. */
  final_ascii = 1;
  while (bgetline(), line[0] != '\0') {
    if (strncmp(line, "cleartomark", 11) == 0) {
      if (line[11] && line[11] != '\n')
        output(line + 11);
      while (bgetline(), line[0] != '\0')
        output(line);
      break;
    }
  }  /* while */

  fclose(ifp);
  fclose(ofp);

  return 0;
}                                                             /* main */