Home | History | Annotate | Download | only in jpeg
      1 /*
      2  * rdjpgcom.c
      3  *
      4  * Copyright (C) 1994-1997, Thomas G. Lane.
      5  * This file is part of the Independent JPEG Group's software.
      6  * For conditions of distribution and use, see the accompanying README file.
      7  *
      8  * This file contains a very simple stand-alone application that displays
      9  * the text in COM (comment) markers in a JFIF file.
     10  * This may be useful as an example of the minimum logic needed to parse
     11  * JPEG markers.
     12  */
     13 
     14 #define JPEG_CJPEG_DJPEG	/* to get the command-line config symbols */
     15 #include "jinclude.h"		/* get auto-config symbols, <stdio.h> */
     16 
     17 #include <ctype.h>		/* to declare isupper(), tolower() */
     18 #ifdef USE_SETMODE
     19 #include <fcntl.h>		/* to declare setmode()'s parameter macros */
     20 /* If you have setmode() but not <io.h>, just delete this line: */
     21 #include <io.h>			/* to declare setmode() */
     22 #endif
     23 
     24 #ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
     25 #ifdef __MWERKS__
     26 #include <SIOUX.h>              /* Metrowerks needs this */
     27 #include <console.h>		/* ... and this */
     28 #endif
     29 #ifdef THINK_C
     30 #include <console.h>		/* Think declares it here */
     31 #endif
     32 #endif
     33 
     34 #ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
     35 #define READ_BINARY	"r"
     36 #else
     37 #ifdef VMS			/* VMS is very nonstandard */
     38 #define READ_BINARY	"rb", "ctx=stm"
     39 #else				/* standard ANSI-compliant case */
     40 #define READ_BINARY	"rb"
     41 #endif
     42 #endif
     43 
     44 #ifndef EXIT_FAILURE		/* define exit() codes if not provided */
     45 #define EXIT_FAILURE  1
     46 #endif
     47 #ifndef EXIT_SUCCESS
     48 #ifdef VMS
     49 #define EXIT_SUCCESS  1		/* VMS is very nonstandard */
     50 #else
     51 #define EXIT_SUCCESS  0
     52 #endif
     53 #endif
     54 
     55 
     56 /*
     57  * These macros are used to read the input file.
     58  * To reuse this code in another application, you might need to change these.
     59  */
     60 
     61 static FILE * infile;		/* input JPEG file */
     62 
     63 /* Return next input byte, or EOF if no more */
     64 #define NEXTBYTE()  getc(infile)
     65 
     66 
     67 /* Error exit handler */
     68 #define ERREXIT(msg)  (fprintf(stderr, "%s\n", msg), exit(EXIT_FAILURE))
     69 
     70 
     71 /* Read one byte, testing for EOF */
     72 static int
     73 read_1_byte (void)
     74 {
     75   int c;
     76 
     77   c = NEXTBYTE();
     78   if (c == EOF)
     79     ERREXIT("Premature EOF in JPEG file");
     80   return c;
     81 }
     82 
     83 /* Read 2 bytes, convert to unsigned int */
     84 /* All 2-byte quantities in JPEG markers are MSB first */
     85 static unsigned int
     86 read_2_bytes (void)
     87 {
     88   int c1, c2;
     89 
     90   c1 = NEXTBYTE();
     91   if (c1 == EOF)
     92     ERREXIT("Premature EOF in JPEG file");
     93   c2 = NEXTBYTE();
     94   if (c2 == EOF)
     95     ERREXIT("Premature EOF in JPEG file");
     96   return (((unsigned int) c1) << 8) + ((unsigned int) c2);
     97 }
     98 
     99 
    100 /*
    101  * JPEG markers consist of one or more 0xFF bytes, followed by a marker
    102  * code byte (which is not an FF).  Here are the marker codes of interest
    103  * in this program.  (See jdmarker.c for a more complete list.)
    104  */
    105 
    106 #define M_SOF0  0xC0		/* Start Of Frame N */
    107 #define M_SOF1  0xC1		/* N indicates which compression process */
    108 #define M_SOF2  0xC2		/* Only SOF0-SOF2 are now in common use */
    109 #define M_SOF3  0xC3
    110 #define M_SOF5  0xC5		/* NB: codes C4 and CC are NOT SOF markers */
    111 #define M_SOF6  0xC6
    112 #define M_SOF7  0xC7
    113 #define M_SOF9  0xC9
    114 #define M_SOF10 0xCA
    115 #define M_SOF11 0xCB
    116 #define M_SOF13 0xCD
    117 #define M_SOF14 0xCE
    118 #define M_SOF15 0xCF
    119 #define M_SOI   0xD8		/* Start Of Image (beginning of datastream) */
    120 #define M_EOI   0xD9		/* End Of Image (end of datastream) */
    121 #define M_SOS   0xDA		/* Start Of Scan (begins compressed data) */
    122 #define M_APP0	0xE0		/* Application-specific marker, type N */
    123 #define M_APP12	0xEC		/* (we don't bother to list all 16 APPn's) */
    124 #define M_COM   0xFE		/* COMment */
    125 
    126 
    127 /*
    128  * Find the next JPEG marker and return its marker code.
    129  * We expect at least one FF byte, possibly more if the compressor used FFs
    130  * to pad the file.
    131  * There could also be non-FF garbage between markers.  The treatment of such
    132  * garbage is unspecified; we choose to skip over it but emit a warning msg.
    133  * NB: this routine must not be used after seeing SOS marker, since it will
    134  * not deal correctly with FF/00 sequences in the compressed image data...
    135  */
    136 
    137 static int
    138 next_marker (void)
    139 {
    140   int c;
    141   int discarded_bytes = 0;
    142 
    143   /* Find 0xFF byte; count and skip any non-FFs. */
    144   c = read_1_byte();
    145   while (c != 0xFF) {
    146     discarded_bytes++;
    147     c = read_1_byte();
    148   }
    149   /* Get marker code byte, swallowing any duplicate FF bytes.  Extra FFs
    150    * are legal as pad bytes, so don't count them in discarded_bytes.
    151    */
    152   do {
    153     c = read_1_byte();
    154   } while (c == 0xFF);
    155 
    156   if (discarded_bytes != 0) {
    157     fprintf(stderr, "Warning: garbage data found in JPEG file\n");
    158   }
    159 
    160   return c;
    161 }
    162 
    163 
    164 /*
    165  * Read the initial marker, which should be SOI.
    166  * For a JFIF file, the first two bytes of the file should be literally
    167  * 0xFF M_SOI.  To be more general, we could use next_marker, but if the
    168  * input file weren't actually JPEG at all, next_marker might read the whole
    169  * file and then return a misleading error message...
    170  */
    171 
    172 static int
    173 first_marker (void)
    174 {
    175   int c1, c2;
    176 
    177   c1 = NEXTBYTE();
    178   c2 = NEXTBYTE();
    179   if (c1 != 0xFF || c2 != M_SOI)
    180     ERREXIT("Not a JPEG file");
    181   return c2;
    182 }
    183 
    184 
    185 /*
    186  * Most types of marker are followed by a variable-length parameter segment.
    187  * This routine skips over the parameters for any marker we don't otherwise
    188  * want to process.
    189  * Note that we MUST skip the parameter segment explicitly in order not to
    190  * be fooled by 0xFF bytes that might appear within the parameter segment;
    191  * such bytes do NOT introduce new markers.
    192  */
    193 
    194 static void
    195 skip_variable (void)
    196 /* Skip over an unknown or uninteresting variable-length marker */
    197 {
    198   unsigned int length;
    199 
    200   /* Get the marker parameter length count */
    201   length = read_2_bytes();
    202   /* Length includes itself, so must be at least 2 */
    203   if (length < 2)
    204     ERREXIT("Erroneous JPEG marker length");
    205   length -= 2;
    206   /* Skip over the remaining bytes */
    207   while (length > 0) {
    208     (void) read_1_byte();
    209     length--;
    210   }
    211 }
    212 
    213 
    214 /*
    215  * Process a COM marker.
    216  * We want to print out the marker contents as legible text;
    217  * we must guard against non-text junk and varying newline representations.
    218  */
    219 
    220 static void
    221 process_COM (void)
    222 {
    223   unsigned int length;
    224   int ch;
    225   int lastch = 0;
    226 
    227   /* Get the marker parameter length count */
    228   length = read_2_bytes();
    229   /* Length includes itself, so must be at least 2 */
    230   if (length < 2)
    231     ERREXIT("Erroneous JPEG marker length");
    232   length -= 2;
    233 
    234   while (length > 0) {
    235     ch = read_1_byte();
    236     /* Emit the character in a readable form.
    237      * Nonprintables are converted to \nnn form,
    238      * while \ is converted to \\.
    239      * Newlines in CR, CR/LF, or LF form will be printed as one newline.
    240      */
    241     if (ch == '\r') {
    242       printf("\n");
    243     } else if (ch == '\n') {
    244       if (lastch != '\r')
    245 	printf("\n");
    246     } else if (ch == '\\') {
    247       printf("\\\\");
    248     } else if (isprint(ch)) {
    249       putc(ch, stdout);
    250     } else {
    251       printf("\\%03o", ch);
    252     }
    253     lastch = ch;
    254     length--;
    255   }
    256   printf("\n");
    257 }
    258 
    259 
    260 /*
    261  * Process a SOFn marker.
    262  * This code is only needed if you want to know the image dimensions...
    263  */
    264 
    265 static void
    266 process_SOFn (int marker)
    267 {
    268   unsigned int length;
    269   unsigned int image_height, image_width;
    270   int data_precision, num_components;
    271   const char * process;
    272   int ci;
    273 
    274   length = read_2_bytes();	/* usual parameter length count */
    275 
    276   data_precision = read_1_byte();
    277   image_height = read_2_bytes();
    278   image_width = read_2_bytes();
    279   num_components = read_1_byte();
    280 
    281   switch (marker) {
    282   case M_SOF0:	process = "Baseline";  break;
    283   case M_SOF1:	process = "Extended sequential";  break;
    284   case M_SOF2:	process = "Progressive";  break;
    285   case M_SOF3:	process = "Lossless";  break;
    286   case M_SOF5:	process = "Differential sequential";  break;
    287   case M_SOF6:	process = "Differential progressive";  break;
    288   case M_SOF7:	process = "Differential lossless";  break;
    289   case M_SOF9:	process = "Extended sequential, arithmetic coding";  break;
    290   case M_SOF10:	process = "Progressive, arithmetic coding";  break;
    291   case M_SOF11:	process = "Lossless, arithmetic coding";  break;
    292   case M_SOF13:	process = "Differential sequential, arithmetic coding";  break;
    293   case M_SOF14:	process = "Differential progressive, arithmetic coding"; break;
    294   case M_SOF15:	process = "Differential lossless, arithmetic coding";  break;
    295   default:	process = "Unknown";  break;
    296   }
    297 
    298   printf("JPEG image is %uw * %uh, %d color components, %d bits per sample\n",
    299 	 image_width, image_height, num_components, data_precision);
    300   printf("JPEG process: %s\n", process);
    301 
    302   if (length != (unsigned int) (8 + num_components * 3))
    303     ERREXIT("Bogus SOF marker length");
    304 
    305   for (ci = 0; ci < num_components; ci++) {
    306     (void) read_1_byte();	/* Component ID code */
    307     (void) read_1_byte();	/* H, V sampling factors */
    308     (void) read_1_byte();	/* Quantization table number */
    309   }
    310 }
    311 
    312 
    313 /*
    314  * Parse the marker stream until SOS or EOI is seen;
    315  * display any COM markers.
    316  * While the companion program wrjpgcom will always insert COM markers before
    317  * SOFn, other implementations might not, so we scan to SOS before stopping.
    318  * If we were only interested in the image dimensions, we would stop at SOFn.
    319  * (Conversely, if we only cared about COM markers, there would be no need
    320  * for special code to handle SOFn; we could treat it like other markers.)
    321  */
    322 
    323 static int
    324 scan_JPEG_header (int verbose)
    325 {
    326   int marker;
    327 
    328   /* Expect SOI at start of file */
    329   if (first_marker() != M_SOI)
    330     ERREXIT("Expected SOI marker first");
    331 
    332   /* Scan miscellaneous markers until we reach SOS. */
    333   for (;;) {
    334     marker = next_marker();
    335     switch (marker) {
    336       /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be,
    337        * treated as SOFn.  C4 in particular is actually DHT.
    338        */
    339     case M_SOF0:		/* Baseline */
    340     case M_SOF1:		/* Extended sequential, Huffman */
    341     case M_SOF2:		/* Progressive, Huffman */
    342     case M_SOF3:		/* Lossless, Huffman */
    343     case M_SOF5:		/* Differential sequential, Huffman */
    344     case M_SOF6:		/* Differential progressive, Huffman */
    345     case M_SOF7:		/* Differential lossless, Huffman */
    346     case M_SOF9:		/* Extended sequential, arithmetic */
    347     case M_SOF10:		/* Progressive, arithmetic */
    348     case M_SOF11:		/* Lossless, arithmetic */
    349     case M_SOF13:		/* Differential sequential, arithmetic */
    350     case M_SOF14:		/* Differential progressive, arithmetic */
    351     case M_SOF15:		/* Differential lossless, arithmetic */
    352       if (verbose)
    353 	process_SOFn(marker);
    354       else
    355 	skip_variable();
    356       break;
    357 
    358     case M_SOS:			/* stop before hitting compressed data */
    359       return marker;
    360 
    361     case M_EOI:			/* in case it's a tables-only JPEG stream */
    362       return marker;
    363 
    364     case M_COM:
    365       process_COM();
    366       break;
    367 
    368     case M_APP12:
    369       /* Some digital camera makers put useful textual information into
    370        * APP12 markers, so we print those out too when in -verbose mode.
    371        */
    372       if (verbose) {
    373 	printf("APP12 contains:\n");
    374 	process_COM();
    375       } else
    376 	skip_variable();
    377       break;
    378 
    379     default:			/* Anything else just gets skipped */
    380       skip_variable();		/* we assume it has a parameter count... */
    381       break;
    382     }
    383   } /* end loop */
    384 }
    385 
    386 
    387 /* Command line parsing code */
    388 
    389 static const char * progname;	/* program name for error messages */
    390 
    391 
    392 static void
    393 usage (void)
    394 /* complain about bad command line */
    395 {
    396   fprintf(stderr, "rdjpgcom displays any textual comments in a JPEG file.\n");
    397 
    398   fprintf(stderr, "Usage: %s [switches] [inputfile]\n", progname);
    399 
    400   fprintf(stderr, "Switches (names may be abbreviated):\n");
    401   fprintf(stderr, "  -verbose    Also display dimensions of JPEG image\n");
    402 
    403   exit(EXIT_FAILURE);
    404 }
    405 
    406 
    407 static int
    408 keymatch (char * arg, const char * keyword, int minchars)
    409 /* Case-insensitive matching of (possibly abbreviated) keyword switches. */
    410 /* keyword is the constant keyword (must be lower case already), */
    411 /* minchars is length of minimum legal abbreviation. */
    412 {
    413   register int ca, ck;
    414   register int nmatched = 0;
    415 
    416   while ((ca = *arg++) != '\0') {
    417     if ((ck = *keyword++) == '\0')
    418       return 0;			/* arg longer than keyword, no good */
    419     if (isupper(ca))		/* force arg to lcase (assume ck is already) */
    420       ca = tolower(ca);
    421     if (ca != ck)
    422       return 0;			/* no good */
    423     nmatched++;			/* count matched characters */
    424   }
    425   /* reached end of argument; fail if it's too short for unique abbrev */
    426   if (nmatched < minchars)
    427     return 0;
    428   return 1;			/* A-OK */
    429 }
    430 
    431 
    432 /*
    433  * The main program.
    434  */
    435 
    436 int
    437 main (int argc, char **argv)
    438 {
    439   int argn;
    440   char * arg;
    441   int verbose = 0;
    442 
    443   /* On Mac, fetch a command line. */
    444 #ifdef USE_CCOMMAND
    445   argc = ccommand(&argv);
    446 #endif
    447 
    448   progname = argv[0];
    449   if (progname == NULL || progname[0] == 0)
    450     progname = "rdjpgcom";	/* in case C library doesn't provide it */
    451 
    452   /* Parse switches, if any */
    453   for (argn = 1; argn < argc; argn++) {
    454     arg = argv[argn];
    455     if (arg[0] != '-')
    456       break;			/* not switch, must be file name */
    457     arg++;			/* advance over '-' */
    458     if (keymatch(arg, "verbose", 1)) {
    459       verbose++;
    460     } else
    461       usage();
    462   }
    463 
    464   /* Open the input file. */
    465   /* Unix style: expect zero or one file name */
    466   if (argn < argc-1) {
    467     fprintf(stderr, "%s: only one input file\n", progname);
    468     usage();
    469   }
    470   if (argn < argc) {
    471     if ((infile = fopen(argv[argn], READ_BINARY)) == NULL) {
    472       fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
    473       exit(EXIT_FAILURE);
    474     }
    475   } else {
    476     /* default input file is stdin */
    477 #ifdef USE_SETMODE		/* need to hack file mode? */
    478     setmode(fileno(stdin), O_BINARY);
    479 #endif
    480 #ifdef USE_FDOPEN		/* need to re-open in binary mode? */
    481     if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) {
    482       fprintf(stderr, "%s: can't open stdin\n", progname);
    483       exit(EXIT_FAILURE);
    484     }
    485 #else
    486     infile = stdin;
    487 #endif
    488   }
    489 
    490   /* Scan the JPEG headers. */
    491   (void) scan_JPEG_header(verbose);
    492 
    493   /* All done. */
    494   exit(EXIT_SUCCESS);
    495   return 0;			/* suppress no-return-value warnings */
    496 }
    497