Home | History | Annotate | Download | only in StrGather
      1 /*++
      2 
      3 Copyright (c) 2004 - 2010, Intel Corporation. All rights reserved.<BR>
      4 This program and the accompanying materials
      5 are licensed and made available under the terms and conditions of the BSD License
      6 which accompanies this distribution.  The full text of the license may be found at
      7 http://opensource.org/licenses/bsd-license.php
      8 
      9 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
     10 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     11 
     12 Module Name:
     13 
     14   StrGather.c
     15 
     16 Abstract:
     17 
     18   Parse a strings file and create or add to a string database file.
     19 
     20 --*/
     21 
     22 #include <stdio.h>
     23 #include <string.h>
     24 #include <stdlib.h>
     25 #include <ctype.h>
     26 
     27 #include "Tiano.h"
     28 #include "EfiUtilityMsgs.h"
     29 #include "StrGather.h"
     30 #include "StringDB.h"
     31 
     32 #define UTILITY_NAME     "StrGather"
     33 #define UTILITY_VERSION  "v1.0"
     34 
     35 typedef UINT16  WCHAR;
     36 
     37 #define MAX_PATH                    1024
     38 #define MAX_NEST_DEPTH              20  // just in case we get in an endless loop.
     39 #define MAX_STRING_IDENTIFIER_NAME  100 // number of wchars
     40 #define MAX_LINE_LEN                400
     41 #define STRING_TOKEN                "STRING_TOKEN"
     42 #define DEFAULT_BASE_NAME           "BaseName"
     43 //
     44 // Operational modes for this utility
     45 //
     46 #define MODE_UNKNOWN  0
     47 #define MODE_PARSE    1
     48 #define MODE_SCAN     2
     49 #define MODE_DUMP     3
     50 
     51 //
     52 // We keep a linked list of these for the source files we process
     53 //
     54 typedef struct _SOURCE_FILE {
     55   FILE                *Fptr;
     56   WCHAR               *FileBuffer;
     57   WCHAR               *FileBufferPtr;
     58   UINT32              FileSize;
     59   INT8                FileName[MAX_PATH];
     60   UINT32              LineNum;
     61   BOOLEAN             EndOfFile;
     62   BOOLEAN             SkipToHash;
     63   struct _SOURCE_FILE *Previous;
     64   struct _SOURCE_FILE *Next;
     65   WCHAR               ControlCharacter;
     66 } SOURCE_FILE;
     67 
     68 #define DEFAULT_CONTROL_CHARACTER UNICODE_SLASH
     69 
     70 //
     71 // Here's all our globals. We need a linked list of include paths, a linked
     72 // list of source files, a linked list of subdirectories (appended to each
     73 // include path when searching), and a couple other fields.
     74 //
     75 static struct {
     76   SOURCE_FILE                 SourceFiles;
     77   TEXT_STRING_LIST            *IncludePaths;                    // all include paths to search
     78   TEXT_STRING_LIST            *LastIncludePath;
     79   TEXT_STRING_LIST            *ScanFileName;
     80   TEXT_STRING_LIST            *LastScanFileName;
     81   TEXT_STRING_LIST            *SkipExt;                         // if -skipext .uni
     82   TEXT_STRING_LIST            *LastSkipExt;
     83   TEXT_STRING_LIST            *IndirectionFileName;
     84   TEXT_STRING_LIST            *LastIndirectionFileName;
     85   TEXT_STRING_LIST            *DatabaseFileName;
     86   TEXT_STRING_LIST            *LastDatabaseFileName;
     87   WCHAR_STRING_LIST           *Language;
     88   WCHAR_STRING_LIST           *LastLanguage;
     89   WCHAR_MATCHING_STRING_LIST  *IndirectionList;                 // from indirection file(s)
     90   WCHAR_MATCHING_STRING_LIST  *LastIndirectionList;
     91   BOOLEAN                     Verbose;                          // for more detailed output
     92   BOOLEAN                     VerboseDatabaseWrite;             // for more detailed output when writing database
     93   BOOLEAN                     VerboseDatabaseRead;              // for more detailed output when reading database
     94   BOOLEAN                     NewDatabase;                      // to start from scratch
     95   BOOLEAN                     IgnoreNotFound;                   // when scanning
     96   BOOLEAN                     VerboseScan;
     97   BOOLEAN                     UnquotedStrings;                  // -uqs option
     98   INT8                        OutputDatabaseFileName[MAX_PATH];
     99   INT8                        StringHFileName[MAX_PATH];
    100   INT8                        StringCFileName[MAX_PATH];        // output .C filename
    101   INT8                        DumpUFileName[MAX_PATH];          // output unicode dump file name
    102   INT8                        HiiExportPackFileName[MAX_PATH];  // HII export pack file name
    103   INT8                        BaseName[MAX_PATH];               // base filename of the strings file
    104   INT8                        OutputDependencyFileName[MAX_PATH];
    105   FILE                        *OutputDependencyFptr;
    106   UINT32                      Mode;
    107 } mGlobals;
    108 
    109 static
    110 BOOLEAN
    111 IsValidIdentifierChar (
    112   INT8      Char,
    113   BOOLEAN   FirstChar
    114   );
    115 
    116 static
    117 void
    118 RewindFile (
    119   SOURCE_FILE *SourceFile
    120   );
    121 
    122 static
    123 BOOLEAN
    124 SkipTo (
    125   SOURCE_FILE *SourceFile,
    126   WCHAR       WChar,
    127   BOOLEAN     StopAfterNewline
    128   );
    129 
    130 static
    131 UINT32
    132 SkipWhiteSpace (
    133   SOURCE_FILE *SourceFile
    134   );
    135 
    136 static
    137 BOOLEAN
    138 IsWhiteSpace (
    139   SOURCE_FILE *SourceFile
    140   );
    141 
    142 static
    143 BOOLEAN
    144 EndOfFile (
    145   SOURCE_FILE *SourceFile
    146   );
    147 
    148 static
    149 void
    150 PreprocessFile (
    151   SOURCE_FILE *SourceFile
    152   );
    153 
    154 static
    155 UINT32
    156 GetStringIdentifierName (
    157   IN SOURCE_FILE  *SourceFile,
    158   IN OUT WCHAR    *StringIdentifierName,
    159   IN UINT32       StringIdentifierNameLen
    160   );
    161 
    162 static
    163 UINT32
    164 GetLanguageIdentifierName (
    165   IN SOURCE_FILE  *SourceFile,
    166   IN OUT WCHAR    *LanguageIdentifierName,
    167   IN UINT32       LanguageIdentifierNameLen,
    168   IN BOOLEAN      Optional
    169   );
    170 
    171 static
    172 WCHAR *
    173 GetPrintableLanguageName (
    174   IN SOURCE_FILE  *SourceFile
    175   );
    176 
    177 static
    178 STATUS
    179 AddCommandLineLanguage (
    180   IN INT8          *Language
    181   );
    182 
    183 static
    184 WCHAR *
    185 GetQuotedString (
    186   SOURCE_FILE *SourceFile,
    187   BOOLEAN     Optional
    188   );
    189 
    190 static
    191 STATUS
    192 ProcessIncludeFile (
    193   SOURCE_FILE *SourceFile,
    194   SOURCE_FILE *ParentSourceFile
    195   );
    196 
    197 static
    198 STATUS
    199 ParseFile (
    200   SOURCE_FILE *SourceFile
    201   );
    202 
    203 static
    204 FILE  *
    205 FindFile (
    206   IN INT8     *FileName,
    207   OUT INT8    *FoundFileName,
    208   IN UINT32   FoundFileNameLen
    209   );
    210 
    211 static
    212 STATUS
    213 ProcessArgs (
    214   int   Argc,
    215   char  *Argv[]
    216   );
    217 
    218 static
    219 STATUS
    220 ProcessFile (
    221   SOURCE_FILE *SourceFile
    222   );
    223 
    224 static
    225 UINT32
    226 wstrcmp (
    227   WCHAR *Buffer,
    228   WCHAR *Str
    229   );
    230 
    231 static
    232 void
    233 Usage (
    234   VOID
    235   );
    236 
    237 static
    238 void
    239 FreeLists (
    240   VOID
    241   );
    242 
    243 static
    244 void
    245 ProcessTokenString (
    246   SOURCE_FILE *SourceFile
    247   );
    248 
    249 static
    250 void
    251 ProcessTokenInclude (
    252   SOURCE_FILE *SourceFile
    253   );
    254 
    255 static
    256 void
    257 ProcessTokenScope (
    258   SOURCE_FILE *SourceFile
    259   );
    260 
    261 static
    262 void
    263 ProcessTokenLanguage (
    264   SOURCE_FILE *SourceFile
    265   );
    266 
    267 static
    268 void
    269 ProcessTokenLangDef (
    270   SOURCE_FILE *SourceFile
    271   );
    272 
    273 static
    274 STATUS
    275 ScanFiles (
    276   TEXT_STRING_LIST *ScanFiles
    277   );
    278 
    279 static
    280 STATUS
    281 ParseIndirectionFiles (
    282   TEXT_STRING_LIST    *Files
    283   );
    284 
    285 int
    286 main (
    287   int   Argc,
    288   char  *Argv[]
    289   )
    290 /*++
    291 
    292 Routine Description:
    293 
    294   Call the routine to parse the command-line options, then process the file.
    295 
    296 Arguments:
    297 
    298   Argc - Standard C main() argc and argv.
    299   Argv - Standard C main() argc and argv.
    300 
    301 Returns:
    302 
    303   0       if successful
    304   nonzero otherwise
    305 
    306 --*/
    307 {
    308   STATUS  Status;
    309 
    310   SetUtilityName (UTILITY_NAME);
    311   //
    312   // Process the command-line arguments
    313   //
    314   Status = ProcessArgs (Argc, Argv);
    315   if (Status != STATUS_SUCCESS) {
    316     return Status;
    317   }
    318   //
    319   // Initialize the database manager
    320   //
    321   StringDBConstructor ();
    322   //
    323   // We always try to read in an existing database file. It may not
    324   // exist, which is ok usually.
    325   //
    326   if (mGlobals.NewDatabase == 0) {
    327     //
    328     // Read all databases specified.
    329     //
    330     for (mGlobals.LastDatabaseFileName = mGlobals.DatabaseFileName;
    331          mGlobals.LastDatabaseFileName != NULL;
    332          mGlobals.LastDatabaseFileName = mGlobals.LastDatabaseFileName->Next
    333         ) {
    334       Status = StringDBReadDatabase (mGlobals.LastDatabaseFileName->Str, TRUE, mGlobals.VerboseDatabaseRead);
    335       if (Status != STATUS_SUCCESS) {
    336         return Status;
    337       }
    338     }
    339   }
    340   //
    341   // Read indirection file(s) if specified
    342   //
    343   if (ParseIndirectionFiles (mGlobals.IndirectionFileName) != STATUS_SUCCESS) {
    344     goto Finish;
    345   }
    346   //
    347   // If scanning source files, do that now
    348   //
    349   if (mGlobals.Mode == MODE_SCAN) {
    350     ScanFiles (mGlobals.ScanFileName);
    351   } else if (mGlobals.Mode == MODE_PARSE) {
    352     //
    353     // Parsing a unicode strings file
    354     //
    355     mGlobals.SourceFiles.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
    356     if (mGlobals.OutputDependencyFileName[0] != 0) {
    357       if ((mGlobals.OutputDependencyFptr = fopen (mGlobals.OutputDependencyFileName, "w")) == NULL) {
    358         Error (NULL, 0, 0, mGlobals.OutputDependencyFileName, "failed to open output dependency file");
    359         goto Finish;
    360       }
    361     }
    362     Status = ProcessIncludeFile (&mGlobals.SourceFiles, NULL);
    363     if (mGlobals.OutputDependencyFptr != NULL) {
    364       fclose (mGlobals.OutputDependencyFptr);
    365     }
    366     if (Status != STATUS_SUCCESS) {
    367       goto Finish;
    368     }
    369   }
    370   //
    371   // Create the string defines header file if there have been no errors.
    372   //
    373   ParserSetPosition (NULL, 0);
    374   if ((mGlobals.StringHFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
    375     Status = StringDBDumpStringDefines (mGlobals.StringHFileName, mGlobals.BaseName);
    376     if (Status != EFI_SUCCESS) {
    377       goto Finish;
    378     }
    379   }
    380   //
    381   // Dump the strings to a .c file if there have still been no errors.
    382   //
    383   if ((mGlobals.StringCFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
    384     Status = StringDBDumpCStrings (
    385               mGlobals.StringCFileName,
    386               mGlobals.BaseName,
    387               mGlobals.Language,
    388               mGlobals.IndirectionList
    389               );
    390     if (Status != EFI_SUCCESS) {
    391       goto Finish;
    392     }
    393   }
    394   //
    395   // Dump the database if requested
    396   //
    397   if ((mGlobals.DumpUFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
    398     StringDBDumpDatabase (NULL, mGlobals.DumpUFileName, FALSE);
    399   }
    400   //
    401   // Dump the string data as HII binary string pack if requested
    402   //
    403   if ((mGlobals.HiiExportPackFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
    404     StringDBCreateHiiExportPack (mGlobals.HiiExportPackFileName, mGlobals.Language);
    405   }
    406   //
    407   // Always update the database if no errors and not in dump mode. If they specified -od
    408   // for an output database file name, then use that name. Otherwise use the name of
    409   // the first database file specified with -db
    410   //
    411   if ((mGlobals.Mode != MODE_DUMP) && (GetUtilityStatus () < STATUS_ERROR)) {
    412     if (mGlobals.OutputDatabaseFileName[0]) {
    413       Status = StringDBWriteDatabase (mGlobals.OutputDatabaseFileName, mGlobals.VerboseDatabaseWrite);
    414     } else {
    415       Status = StringDBWriteDatabase (mGlobals.DatabaseFileName->Str, mGlobals.VerboseDatabaseWrite);
    416     }
    417 
    418     if (Status != EFI_SUCCESS) {
    419       goto Finish;
    420     }
    421   }
    422 
    423 Finish:
    424   //
    425   // Free up memory
    426   //
    427   FreeLists ();
    428   StringDBDestructor ();
    429   return GetUtilityStatus ();
    430 }
    431 
    432 static
    433 STATUS
    434 ProcessIncludeFile (
    435   SOURCE_FILE *SourceFile,
    436   SOURCE_FILE *ParentSourceFile
    437   )
    438 /*++
    439 
    440 Routine Description:
    441 
    442   Given a source file, open the file and parse it
    443 
    444 Arguments:
    445 
    446   SourceFile        - name of file to parse
    447   ParentSourceFile  - for error reporting purposes, the file that #included SourceFile.
    448 
    449 Returns:
    450 
    451   Standard status.
    452 
    453 --*/
    454 {
    455   static UINT32 NestDepth = 0;
    456   INT8          FoundFileName[MAX_PATH];
    457   STATUS        Status;
    458 
    459   Status = STATUS_SUCCESS;
    460   NestDepth++;
    461   //
    462   // Print the file being processed. Indent so you can tell the include nesting
    463   // depth.
    464   //
    465   if (mGlobals.Verbose) {
    466     fprintf (stdout, "%*cProcessing file '%s'\n", NestDepth * 2, ' ', SourceFile->FileName);
    467   }
    468 
    469   //
    470   // Make sure we didn't exceed our maximum nesting depth
    471   //
    472   if (NestDepth > MAX_NEST_DEPTH) {
    473     Error (NULL, 0, 0, SourceFile->FileName, "max nesting depth (%d) exceeded", NestDepth);
    474     Status = STATUS_ERROR;
    475     goto Finish;
    476   }
    477   //
    478   // Try to open the file locally, and if that fails try along our include paths.
    479   //
    480   strcpy (FoundFileName, SourceFile->FileName);
    481   if ((SourceFile->Fptr = fopen (FoundFileName, "rb")) == NULL) {
    482     //
    483     // Try to find it among the paths if it has a parent (that is, it is included
    484     // by someone else).
    485     //
    486     if (ParentSourceFile == NULL) {
    487       Error (NULL, 0, 0, SourceFile->FileName, "file not found");
    488       Status = STATUS_ERROR;
    489       goto Finish;
    490     }
    491 
    492     SourceFile->Fptr = FindFile (SourceFile->FileName, FoundFileName, sizeof (FoundFileName));
    493     if (SourceFile->Fptr == NULL) {
    494       Error (ParentSourceFile->FileName, ParentSourceFile->LineNum, 0, SourceFile->FileName, "include file not found");
    495       Status = STATUS_ERROR;
    496       goto Finish;
    497     }
    498   }
    499 
    500   //
    501   // Output the dependency
    502   //
    503   if (mGlobals.OutputDependencyFptr != NULL) {
    504     fprintf (mGlobals.OutputDependencyFptr, "%s : %s\n", mGlobals.DatabaseFileName->Str, FoundFileName);
    505     //
    506     // Add pseudo target to avoid incremental build failure when the file is deleted
    507     //
    508     fprintf (mGlobals.OutputDependencyFptr, "%s : \n", FoundFileName);
    509   }
    510 
    511   //
    512   // Process the file found
    513   //
    514   ProcessFile (SourceFile);
    515 
    516 Finish:
    517   NestDepth--;
    518   //
    519   // Close open files and return status
    520   //
    521   if (SourceFile->Fptr != NULL) {
    522     fclose (SourceFile->Fptr);
    523   }
    524 
    525   return Status;
    526 }
    527 
    528 static
    529 STATUS
    530 ProcessFile (
    531   SOURCE_FILE *SourceFile
    532   )
    533 {
    534   //
    535   // Get the file size, and then read the entire thing into memory.
    536   // Allocate space for a terminator character.
    537   //
    538   fseek (SourceFile->Fptr, 0, SEEK_END);
    539   SourceFile->FileSize = ftell (SourceFile->Fptr);
    540   fseek (SourceFile->Fptr, 0, SEEK_SET);
    541   SourceFile->FileBuffer = (WCHAR *) malloc (SourceFile->FileSize + sizeof (WCHAR));
    542   if (SourceFile->FileBuffer == NULL) {
    543     Error (NULL, 0, 0, "memory allocation failure", NULL);
    544     return STATUS_ERROR;
    545   }
    546 
    547   fread ((VOID *) SourceFile->FileBuffer, SourceFile->FileSize, 1, SourceFile->Fptr);
    548   SourceFile->FileBuffer[(SourceFile->FileSize / sizeof (WCHAR))] = UNICODE_NULL;
    549   //
    550   // Pre-process the file to replace comments with spaces
    551   //
    552   PreprocessFile (SourceFile);
    553   //
    554   // Parse the file
    555   //
    556   ParseFile (SourceFile);
    557   free (SourceFile->FileBuffer);
    558   return STATUS_SUCCESS;
    559 }
    560 
    561 static
    562 STATUS
    563 ParseFile (
    564   SOURCE_FILE *SourceFile
    565   )
    566 {
    567   BOOLEAN InComment;
    568   UINT32  Len;
    569 
    570   //
    571   // First character of a unicode file is special. Make sure
    572   //
    573   if (SourceFile->FileBufferPtr[0] != UNICODE_FILE_START) {
    574     Error (SourceFile->FileName, 1, 0, SourceFile->FileName, "file does not appear to be a unicode file");
    575     return STATUS_ERROR;
    576   }
    577 
    578   SourceFile->FileBufferPtr++;
    579   InComment = FALSE;
    580   //
    581   // Print the first line if in verbose mode
    582   //
    583   if (mGlobals.Verbose) {
    584     printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
    585   }
    586   //
    587   // Since the syntax is relatively straightforward, just switch on the next char
    588   //
    589   while (!EndOfFile (SourceFile)) {
    590     //
    591     // Check for whitespace
    592     //
    593     if (SourceFile->FileBufferPtr[0] == UNICODE_SPACE) {
    594       SourceFile->FileBufferPtr++;
    595     } else if (SourceFile->FileBufferPtr[0] == UNICODE_TAB) {
    596       SourceFile->FileBufferPtr++;
    597     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
    598       SourceFile->FileBufferPtr++;
    599     } else if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
    600       SourceFile->FileBufferPtr++;
    601       SourceFile->LineNum++;
    602       if (mGlobals.Verbose) {
    603         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
    604       }
    605 
    606       InComment = FALSE;
    607     } else if (SourceFile->FileBufferPtr[0] == 0) {
    608       SourceFile->FileBufferPtr++;
    609     } else if (InComment) {
    610       SourceFile->FileBufferPtr++;
    611     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
    612       SourceFile->FileBufferPtr += 2;
    613       InComment = TRUE;
    614     } else if (SourceFile->SkipToHash && (SourceFile->FileBufferPtr[0] != SourceFile->ControlCharacter)) {
    615       SourceFile->FileBufferPtr++;
    616     } else {
    617       SourceFile->SkipToHash = FALSE;
    618       if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
    619           ((Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"include")) > 0)
    620           ) {
    621         SourceFile->FileBufferPtr += Len + 1;
    622         ProcessTokenInclude (SourceFile);
    623       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
    624                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"scope")) > 0
    625               ) {
    626         SourceFile->FileBufferPtr += Len + 1;
    627         ProcessTokenScope (SourceFile);
    628       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
    629                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"language")) > 0
    630               ) {
    631         SourceFile->FileBufferPtr += Len + 1;
    632         ProcessTokenLanguage (SourceFile);
    633       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
    634                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"langdef")) > 0
    635               ) {
    636         SourceFile->FileBufferPtr += Len + 1;
    637         ProcessTokenLangDef (SourceFile);
    638       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
    639                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"string")) > 0
    640               ) {
    641         SourceFile->FileBufferPtr += Len + 1;
    642         ProcessTokenString (SourceFile);
    643       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
    644                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"EFI_BREAKPOINT()")) > 0
    645               ) {
    646         SourceFile->FileBufferPtr += Len;
    647         EFI_BREAKPOINT ();
    648       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
    649                (SourceFile->FileBufferPtr[1] == UNICODE_EQUAL_SIGN)
    650               ) {
    651         SourceFile->ControlCharacter = SourceFile->FileBufferPtr[2];
    652         SourceFile->FileBufferPtr += 3;
    653       } else {
    654         Error (SourceFile->FileName, SourceFile->LineNum, 0, "unrecognized token", "%S", SourceFile->FileBufferPtr);
    655         //
    656         // Treat rest of line as a comment.
    657         //
    658         InComment = TRUE;
    659       }
    660     }
    661   }
    662 
    663   return STATUS_SUCCESS;
    664 }
    665 
    666 static
    667 void
    668 PreprocessFile (
    669   SOURCE_FILE *SourceFile
    670   )
    671 /*++
    672 
    673 Routine Description:
    674   Preprocess a file to replace all carriage returns with NULLs so
    675   we can print lines from the file to the screen.
    676 
    677 Arguments:
    678   SourceFile - structure that we use to keep track of an input file.
    679 
    680 Returns:
    681   Nothing.
    682 
    683 --*/
    684 {
    685   BOOLEAN InComment;
    686 
    687   RewindFile (SourceFile);
    688   InComment = FALSE;
    689   while (!EndOfFile (SourceFile)) {
    690     //
    691     // If a line-feed, then no longer in a comment
    692     //
    693     if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
    694       SourceFile->FileBufferPtr++;
    695       SourceFile->LineNum++;
    696       InComment = 0;
    697     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
    698       //
    699       // Replace all carriage returns with a NULL so we can print stuff
    700       //
    701       SourceFile->FileBufferPtr[0] = 0;
    702       SourceFile->FileBufferPtr++;
    703     } else if (InComment) {
    704       SourceFile->FileBufferPtr[0] = UNICODE_SPACE;
    705       SourceFile->FileBufferPtr++;
    706     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
    707       SourceFile->FileBufferPtr += 2;
    708       InComment = TRUE;
    709     } else {
    710       SourceFile->FileBufferPtr++;
    711     }
    712   }
    713   //
    714   // Could check for end-of-file and still in a comment, but
    715   // should not be necessary. So just restore the file pointers.
    716   //
    717   RewindFile (SourceFile);
    718 }
    719 
    720 static
    721 WCHAR *
    722 GetPrintableLanguageName (
    723   IN SOURCE_FILE  *SourceFile
    724   )
    725 {
    726   WCHAR   *String;
    727   WCHAR   *Start;
    728   WCHAR   *Ptr;
    729   UINT32  Len;
    730 
    731   SkipWhiteSpace (SourceFile);
    732   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
    733     Error (
    734       SourceFile->FileName,
    735       SourceFile->LineNum,
    736       0,
    737       "expected quoted printable language name",
    738       "%S",
    739       SourceFile->FileBufferPtr
    740       );
    741     SourceFile->SkipToHash = TRUE;
    742     return NULL;
    743   }
    744 
    745   Len = 0;
    746   SourceFile->FileBufferPtr++;
    747   Start = Ptr = SourceFile->FileBufferPtr;
    748   while (!EndOfFile (SourceFile)) {
    749     if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
    750       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
    751       break;
    752     } else if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
    753       break;
    754     }
    755 
    756     SourceFile->FileBufferPtr++;
    757     Len++;
    758   }
    759 
    760   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
    761     Warning (
    762       SourceFile->FileName,
    763       SourceFile->LineNum,
    764       0,
    765       "missing closing quote on printable language name string",
    766       "%S",
    767       Start
    768       );
    769   } else {
    770     SourceFile->FileBufferPtr++;
    771   }
    772   //
    773   // Now allocate memory for the string and save it off
    774   //
    775   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
    776   if (String == NULL) {
    777     Error (NULL, 0, 0, "memory allocation failed", NULL);
    778     return NULL;
    779   }
    780   //
    781   // Copy the string from the file buffer to the local copy.
    782   // We do no reformatting of it whatsoever at this point.
    783   //
    784   Ptr = String;
    785   while (Len > 0) {
    786     *Ptr = *Start;
    787     Start++;
    788     Ptr++;
    789     Len--;
    790   }
    791 
    792   *Ptr = 0;
    793   //
    794   // Now format the string to convert \wide and \narrow controls
    795   //
    796   StringDBFormatString (String);
    797   return String;
    798 }
    799 
    800 static
    801 WCHAR *
    802 GetQuotedString (
    803   SOURCE_FILE *SourceFile,
    804   BOOLEAN     Optional
    805   )
    806 {
    807   WCHAR   *String;
    808   WCHAR   *Start;
    809   WCHAR   *Ptr;
    810   UINT32  Len;
    811   BOOLEAN PreviousBackslash;
    812 
    813   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
    814     if (!Optional) {
    815       Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted string", "%S", SourceFile->FileBufferPtr);
    816     }
    817 
    818     return NULL;
    819   }
    820 
    821   Len = 0;
    822   SourceFile->FileBufferPtr++;
    823   Start             = Ptr = SourceFile->FileBufferPtr;
    824   PreviousBackslash = FALSE;
    825   while (!EndOfFile (SourceFile)) {
    826     if ((SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) && (!PreviousBackslash)) {
    827       break;
    828     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
    829       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
    830       PreviousBackslash = FALSE;
    831     } else if (SourceFile->FileBufferPtr[0] == UNICODE_BACKSLASH) {
    832       PreviousBackslash = TRUE;
    833     } else {
    834       PreviousBackslash = FALSE;
    835     }
    836 
    837     SourceFile->FileBufferPtr++;
    838     Len++;
    839   }
    840 
    841   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
    842     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "missing closing quote on string", "%S", Start);
    843   } else {
    844     SourceFile->FileBufferPtr++;
    845   }
    846   //
    847   // Now allocate memory for the string and save it off
    848   //
    849   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
    850   if (String == NULL) {
    851     Error (NULL, 0, 0, "memory allocation failed", NULL);
    852     return NULL;
    853   }
    854   //
    855   // Copy the string from the file buffer to the local copy.
    856   // We do no reformatting of it whatsoever at this point.
    857   //
    858   Ptr = String;
    859   while (Len > 0) {
    860     *Ptr = *Start;
    861     Start++;
    862     Ptr++;
    863     Len--;
    864   }
    865 
    866   *Ptr = 0;
    867   return String;
    868 }
    869 //
    870 // Parse:
    871 //    #string STR_ID_NAME
    872 //
    873 // All we can do is call the string database to add the string identifier. Unfortunately
    874 // he'll have to keep track of the last identifier we added.
    875 //
    876 static
    877 void
    878 ProcessTokenString (
    879   SOURCE_FILE *SourceFile
    880   )
    881 {
    882   WCHAR   StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
    883   UINT16  StringId;
    884   //
    885   // Extract the string identifier name and add it to the database.
    886   //
    887   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
    888     StringId = STRING_ID_INVALID;
    889     StringDBAddStringIdentifier (StringIdentifier, &StringId, 0);
    890   } else {
    891     //
    892     // Error recovery -- skip to the next #
    893     //
    894     SourceFile->SkipToHash = TRUE;
    895   }
    896 }
    897 
    898 static
    899 BOOLEAN
    900 EndOfFile (
    901   SOURCE_FILE *SourceFile
    902   )
    903 {
    904   //
    905   // The file buffer pointer will typically get updated before the End-of-file flag in the
    906   // source file structure, so check it first.
    907   //
    908   if (SourceFile->FileBufferPtr >= SourceFile->FileBuffer + SourceFile->FileSize / sizeof (WCHAR)) {
    909     SourceFile->EndOfFile = TRUE;
    910     return TRUE;
    911   }
    912 
    913   if (SourceFile->EndOfFile) {
    914     return TRUE;
    915   }
    916 
    917   return FALSE;
    918 }
    919 
    920 static
    921 UINT32
    922 GetStringIdentifierName (
    923   IN SOURCE_FILE  *SourceFile,
    924   IN OUT WCHAR    *StringIdentifierName,
    925   IN UINT32       StringIdentifierNameLen
    926   )
    927 {
    928   UINT32  Len;
    929   WCHAR   *From;
    930   WCHAR   *Start;
    931 
    932   //
    933   // Skip whitespace
    934   //
    935   SkipWhiteSpace (SourceFile);
    936   if (SourceFile->EndOfFile) {
    937     Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-file encountered", "expected string identifier");
    938     return 0;
    939   }
    940   //
    941   // Verify first character of name is [A-Za-z]
    942   //
    943   Len = 0;
    944   StringIdentifierNameLen /= 2;
    945   From  = SourceFile->FileBufferPtr;
    946   Start = SourceFile->FileBufferPtr;
    947   if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
    948       ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))
    949       ) {
    950     //
    951     // Do nothing
    952     //
    953   } else {
    954     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid character in string identifier name", "%S", Start);
    955     return 0;
    956   }
    957 
    958   while (!EndOfFile (SourceFile)) {
    959     if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
    960         ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z)) ||
    961         ((SourceFile->FileBufferPtr[0] >= UNICODE_0) && (SourceFile->FileBufferPtr[0] <= UNICODE_9)) ||
    962         (SourceFile->FileBufferPtr[0] == UNICODE_UNDERSCORE)
    963         ) {
    964       Len++;
    965       if (Len >= StringIdentifierNameLen) {
    966         Error (SourceFile->FileName, SourceFile->LineNum, 0, "string identifier name too long", "%S", Start);
    967         return 0;
    968       }
    969 
    970       *StringIdentifierName = SourceFile->FileBufferPtr[0];
    971       StringIdentifierName++;
    972       SourceFile->FileBufferPtr++;
    973     } else if (SkipWhiteSpace (SourceFile) == 0) {
    974       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid string identifier name", "%S", Start);
    975       return 0;
    976     } else {
    977       break;
    978     }
    979   }
    980   //
    981   // Terminate the copy of the string.
    982   //
    983   *StringIdentifierName = 0;
    984   return Len;
    985 }
    986 
    987 static
    988 UINT32
    989 GetLanguageIdentifierName (
    990   IN SOURCE_FILE  *SourceFile,
    991   IN OUT WCHAR    *LanguageIdentifierName,
    992   IN UINT32       LanguageIdentifierNameLen,
    993   IN BOOLEAN      Optional
    994   )
    995 {
    996   UINT32  Len;
    997   WCHAR   *From;
    998   WCHAR   *Start;
    999   //
   1000   // Skip whitespace
   1001   //
   1002   SkipWhiteSpace (SourceFile);
   1003   if (SourceFile->EndOfFile) {
   1004     if (!Optional) {
   1005       Error (
   1006         SourceFile->FileName,
   1007         SourceFile->LineNum,
   1008         0,
   1009         "end-of-file encountered",
   1010         "expected language identifier"
   1011         );
   1012     }
   1013 
   1014     return 0;
   1015   }
   1016   //
   1017   // This function is called to optionally get a language identifier name in:
   1018   //   #string STR_ID eng "the string"
   1019   // If it's optional, and we find a double-quote, then return now.
   1020   //
   1021   if (Optional) {
   1022     if (*SourceFile->FileBufferPtr == UNICODE_DOUBLE_QUOTE) {
   1023       return 0;
   1024     }
   1025   }
   1026 
   1027   Len = 0;
   1028   LanguageIdentifierNameLen /= 2;
   1029   //
   1030   // Internal error if we weren't given at least 4 WCHAR's to work with.
   1031   //
   1032   if (LanguageIdentifierNameLen < LANGUAGE_IDENTIFIER_NAME_LEN + 1) {
   1033     Error (
   1034       SourceFile->FileName,
   1035       SourceFile->LineNum,
   1036       0,
   1037       "app error -- language identifier name length is invalid",
   1038       NULL
   1039       );
   1040   }
   1041 
   1042   From  = SourceFile->FileBufferPtr;
   1043   Start = SourceFile->FileBufferPtr;
   1044   while (!EndOfFile (SourceFile)) {
   1045     if (((SourceFile->FileBufferPtr[0] >= UNICODE_a) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))) {
   1046       Len++;
   1047       if (Len > LANGUAGE_IDENTIFIER_NAME_LEN) {
   1048         Error (SourceFile->FileName, SourceFile->LineNum, 0, "language identifier name too long", "%S", Start);
   1049         return 0;
   1050       }
   1051 
   1052       *LanguageIdentifierName = SourceFile->FileBufferPtr[0];
   1053       SourceFile->FileBufferPtr++;
   1054       LanguageIdentifierName++;
   1055     } else if (!IsWhiteSpace (SourceFile)) {
   1056       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid language identifier name", "%S", Start);
   1057       return 0;
   1058     } else {
   1059       break;
   1060     }
   1061   }
   1062   //
   1063   // Terminate the copy of the string.
   1064   //
   1065   *LanguageIdentifierName = 0;
   1066   return Len;
   1067 }
   1068 
   1069 static
   1070 void
   1071 ProcessTokenInclude (
   1072   SOURCE_FILE *SourceFile
   1073   )
   1074 {
   1075   INT8        IncludeFileName[MAX_PATH];
   1076   INT8        *To;
   1077   UINT32      Len;
   1078   BOOLEAN     ReportedError;
   1079   SOURCE_FILE IncludedSourceFile;
   1080 
   1081   ReportedError = FALSE;
   1082   if (SkipWhiteSpace (SourceFile) == 0) {
   1083     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "expected whitespace following #include keyword", NULL);
   1084   }
   1085   //
   1086   // Should be quoted file name
   1087   //
   1088   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
   1089     Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted include file name", NULL);
   1090     goto FailDone;
   1091   }
   1092 
   1093   SourceFile->FileBufferPtr++;
   1094   //
   1095   // Copy the filename as ascii to our local string
   1096   //
   1097   To  = IncludeFileName;
   1098   Len = 0;
   1099   while (!EndOfFile (SourceFile)) {
   1100     if ((SourceFile->FileBufferPtr[0] == UNICODE_CR) || (SourceFile->FileBufferPtr[0] == UNICODE_LF)) {
   1101       Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-line found in quoted include file name", NULL);
   1102       goto FailDone;
   1103     }
   1104 
   1105     if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
   1106       SourceFile->FileBufferPtr++;
   1107       break;
   1108     }
   1109     //
   1110     // If too long, then report the error once and process until the closing quote
   1111     //
   1112     Len++;
   1113     if (!ReportedError && (Len >= sizeof (IncludeFileName))) {
   1114       Error (SourceFile->FileName, SourceFile->LineNum, 0, "length of include file name exceeds limit", NULL);
   1115       ReportedError = TRUE;
   1116     }
   1117 
   1118     if (!ReportedError) {
   1119       *To = UNICODE_TO_ASCII (SourceFile->FileBufferPtr[0]);
   1120       To++;
   1121     }
   1122 
   1123     SourceFile->FileBufferPtr++;
   1124   }
   1125 
   1126   if (!ReportedError) {
   1127     *To = 0;
   1128     memset ((char *) &IncludedSourceFile, 0, sizeof (SOURCE_FILE));
   1129     strcpy (IncludedSourceFile.FileName, IncludeFileName);
   1130     IncludedSourceFile.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
   1131     ProcessIncludeFile (&IncludedSourceFile, SourceFile);
   1132     //
   1133     // printf ("including file '%s'\n", IncludeFileName);
   1134     //
   1135   }
   1136 
   1137   return ;
   1138 FailDone:
   1139   //
   1140   // Error recovery -- skip to next #
   1141   //
   1142   SourceFile->SkipToHash = TRUE;
   1143 }
   1144 
   1145 static
   1146 void
   1147 ProcessTokenScope (
   1148   SOURCE_FILE *SourceFile
   1149   )
   1150 {
   1151   WCHAR StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
   1152   //
   1153   // Extract the scope name
   1154   //
   1155   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
   1156     StringDBSetScope (StringIdentifier);
   1157   }
   1158 }
   1159 //
   1160 // Parse:  #langdef eng "English"
   1161 //         #langdef chn "\wideChinese"
   1162 //
   1163 static
   1164 void
   1165 ProcessTokenLangDef (
   1166   SOURCE_FILE *SourceFile
   1167   )
   1168 {
   1169   WCHAR   LanguageIdentifier[MAX_STRING_IDENTIFIER_NAME];
   1170   UINT32  Len;
   1171   WCHAR   *PrintableName;
   1172   //
   1173   // Extract the 3-character language identifier
   1174   //
   1175   Len = GetLanguageIdentifierName (SourceFile, LanguageIdentifier, sizeof (LanguageIdentifier), FALSE);
   1176   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
   1177     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", NULL);
   1178   } else {
   1179     //
   1180     // Extract the printable name
   1181     //
   1182     PrintableName = GetPrintableLanguageName (SourceFile);
   1183     if (PrintableName != NULL) {
   1184       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
   1185       StringDBAddLanguage (LanguageIdentifier, PrintableName);
   1186       free (PrintableName);
   1187       return ;
   1188     }
   1189   }
   1190   //
   1191   // Error recovery -- skip to next #
   1192   //
   1193   SourceFile->SkipToHash = TRUE;
   1194 }
   1195 
   1196 static
   1197 BOOLEAN
   1198 ApparentQuotedString (
   1199   SOURCE_FILE *SourceFile
   1200   )
   1201 {
   1202   WCHAR *Ptr;
   1203   //
   1204   // See if the first and last nonblank characters on the line are double quotes
   1205   //
   1206   for (Ptr = SourceFile->FileBufferPtr; *Ptr && (*Ptr == UNICODE_SPACE); Ptr++)
   1207     ;
   1208   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
   1209     return FALSE;
   1210   }
   1211 
   1212   while (*Ptr) {
   1213     Ptr++;
   1214   }
   1215 
   1216   Ptr--;
   1217   for (; *Ptr && (*Ptr == UNICODE_SPACE); Ptr--)
   1218     ;
   1219   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
   1220     return FALSE;
   1221   }
   1222 
   1223   return TRUE;
   1224 }
   1225 //
   1226 // Parse:
   1227 //   #language eng "some string " "more string"
   1228 //
   1229 static
   1230 void
   1231 ProcessTokenLanguage (
   1232   SOURCE_FILE *SourceFile
   1233   )
   1234 {
   1235   WCHAR   *String;
   1236   WCHAR   *SecondString;
   1237   WCHAR   *TempString;
   1238   WCHAR   *From;
   1239   WCHAR   *To;
   1240   WCHAR   Language[LANGUAGE_IDENTIFIER_NAME_LEN + 1];
   1241   UINT32  Len;
   1242   BOOLEAN PreviousNewline;
   1243   //
   1244   // Get the language identifier
   1245   //
   1246   Language[0] = 0;
   1247   Len         = GetLanguageIdentifierName (SourceFile, Language, sizeof (Language), TRUE);
   1248   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
   1249     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", "%S", Language);
   1250     SourceFile->SkipToHash = TRUE;
   1251     return ;
   1252   }
   1253   //
   1254   // Extract the string value. It's either a quoted string that starts on the current line, or
   1255   // an unquoted string that starts on the following line and continues until the next control
   1256   // character in column 1.
   1257   // Look ahead to find a quote or a newline
   1258   //
   1259   if (SkipTo (SourceFile, UNICODE_DOUBLE_QUOTE, TRUE)) {
   1260     String = GetQuotedString (SourceFile, FALSE);
   1261     if (String != NULL) {
   1262       //
   1263       // Set the position in the file of where we are parsing for error
   1264       // reporting purposes. Then start looking ahead for additional
   1265       // quoted strings, and concatenate them until we get a failure
   1266       // back from the string parser.
   1267       //
   1268       Len = wcslen (String) + 1;
   1269       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
   1270       do {
   1271         SkipWhiteSpace (SourceFile);
   1272         SecondString = GetQuotedString (SourceFile, TRUE);
   1273         if (SecondString != NULL) {
   1274           Len += wcslen (SecondString);
   1275           TempString = (WCHAR *) malloc (Len * sizeof (WCHAR));
   1276           if (TempString == NULL) {
   1277             Error (NULL, 0, 0, "application error", "failed to allocate memory");
   1278             return ;
   1279           }
   1280 
   1281           wcscpy (TempString, String);
   1282           wcscat (TempString, SecondString);
   1283           free (String);
   1284           free (SecondString);
   1285           String = TempString;
   1286         }
   1287       } while (SecondString != NULL);
   1288       StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
   1289       free (String);
   1290     } else {
   1291       //
   1292       // Error was reported at lower level. Error recovery mode.
   1293       //
   1294       SourceFile->SkipToHash = TRUE;
   1295     }
   1296   } else {
   1297     if (!mGlobals.UnquotedStrings) {
   1298       //
   1299       // They're using unquoted strings. If the next non-blank character is a double quote, and the
   1300       // last non-blank character on the line is a double quote, then more than likely they're using
   1301       // quotes, so they need to put the quoted string on the end of the previous line
   1302       //
   1303       if (ApparentQuotedString (SourceFile)) {
   1304         Warning (
   1305           SourceFile->FileName,
   1306           SourceFile->LineNum,
   1307           0,
   1308           "unexpected quoted string on line",
   1309           "specify -uqs option if necessary"
   1310           );
   1311       }
   1312     }
   1313     //
   1314     // Found end-of-line (hopefully). Skip over it and start taking in characters
   1315     // until we find a control character at the start of a line.
   1316     //
   1317     Len             = 0;
   1318     From            = SourceFile->FileBufferPtr;
   1319     PreviousNewline = FALSE;
   1320     while (!EndOfFile (SourceFile)) {
   1321       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
   1322         PreviousNewline = TRUE;
   1323         SourceFile->LineNum++;
   1324       } else {
   1325         Len++;
   1326         if (PreviousNewline && (SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter)) {
   1327           break;
   1328         }
   1329 
   1330         PreviousNewline = FALSE;
   1331       }
   1332 
   1333       SourceFile->FileBufferPtr++;
   1334     }
   1335 
   1336     if ((Len == 0) && EndOfFile (SourceFile)) {
   1337       Error (SourceFile->FileName, SourceFile->LineNum, 0, "unexpected end of file", NULL);
   1338       SourceFile->SkipToHash = TRUE;
   1339       return ;
   1340     }
   1341     //
   1342     // Now allocate a buffer, copy the characters, and add the string.
   1343     //
   1344     String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
   1345     if (String == NULL) {
   1346       Error (NULL, 0, 0, "application error", "failed to allocate memory");
   1347       return ;
   1348     }
   1349 
   1350     To = String;
   1351     while (From < SourceFile->FileBufferPtr) {
   1352       switch (*From) {
   1353       case UNICODE_LF:
   1354       case 0:
   1355         break;
   1356 
   1357       default:
   1358         *To = *From;
   1359         To++;
   1360         break;
   1361       }
   1362 
   1363       From++;
   1364     }
   1365 
   1366     //
   1367     // String[Len] = 0;
   1368     //
   1369     *To = 0;
   1370     StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
   1371   }
   1372 }
   1373 
   1374 static
   1375 BOOLEAN
   1376 IsWhiteSpace (
   1377   SOURCE_FILE *SourceFile
   1378   )
   1379 {
   1380   switch (SourceFile->FileBufferPtr[0]) {
   1381   case UNICODE_NULL:
   1382   case UNICODE_CR:
   1383   case UNICODE_SPACE:
   1384   case UNICODE_TAB:
   1385   case UNICODE_LF:
   1386     return TRUE;
   1387 
   1388   default:
   1389     return FALSE;
   1390   }
   1391 }
   1392 
   1393 static
   1394 UINT32
   1395 SkipWhiteSpace (
   1396   SOURCE_FILE *SourceFile
   1397   )
   1398 {
   1399   UINT32  Count;
   1400 
   1401   Count = 0;
   1402   while (!EndOfFile (SourceFile)) {
   1403     Count++;
   1404     switch (*SourceFile->FileBufferPtr) {
   1405     case UNICODE_NULL:
   1406     case UNICODE_CR:
   1407     case UNICODE_SPACE:
   1408     case UNICODE_TAB:
   1409       SourceFile->FileBufferPtr++;
   1410       break;
   1411 
   1412     case UNICODE_LF:
   1413       SourceFile->FileBufferPtr++;
   1414       SourceFile->LineNum++;
   1415       if (mGlobals.Verbose) {
   1416         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
   1417       }
   1418       break;
   1419 
   1420     default:
   1421       return Count - 1;
   1422     }
   1423   }
   1424   //
   1425   // Some tokens require trailing whitespace. If we're at the end of the
   1426   // file, then we count that as well.
   1427   //
   1428   if ((Count == 0) && (EndOfFile (SourceFile))) {
   1429     Count++;
   1430   }
   1431 
   1432   return Count;
   1433 }
   1434 
   1435 static
   1436 UINT32
   1437 wstrcmp (
   1438   WCHAR *Buffer,
   1439   WCHAR *Str
   1440   )
   1441 {
   1442   UINT32  Len;
   1443 
   1444   Len = 0;
   1445   while (*Str == *Buffer) {
   1446     Buffer++;
   1447     Str++;
   1448     Len++;
   1449   }
   1450 
   1451   if (*Str) {
   1452     return 0;
   1453   }
   1454 
   1455   return Len;
   1456 }
   1457 //
   1458 // Given a filename, try to find it along the include paths.
   1459 //
   1460 static
   1461 FILE *
   1462 FindFile (
   1463   IN INT8    *FileName,
   1464   OUT INT8   *FoundFileName,
   1465   IN UINT32  FoundFileNameLen
   1466   )
   1467 {
   1468   FILE              *Fptr;
   1469   TEXT_STRING_LIST  *List;
   1470 
   1471   //
   1472   // Traverse the list of paths and try to find the file
   1473   //
   1474   List = mGlobals.IncludePaths;
   1475   while (List != NULL) {
   1476     //
   1477     // Put the path and filename together
   1478     //
   1479     if (strlen (List->Str) + strlen (FileName) + 1 > FoundFileNameLen) {
   1480       Error (UTILITY_NAME, 0, 0, NULL, "internal error - cannot concatenate path+filename");
   1481       return NULL;
   1482     }
   1483     //
   1484     // Append the filename to this include path and try to open the file.
   1485     //
   1486     strcpy (FoundFileName, List->Str);
   1487     strcat (FoundFileName, FileName);
   1488     if ((Fptr = fopen (FoundFileName, "rb")) != NULL) {
   1489       //
   1490       // Return the file pointer
   1491       //
   1492       return Fptr;
   1493     }
   1494 
   1495     List = List->Next;
   1496   }
   1497   //
   1498   // Not found
   1499   //
   1500   FoundFileName[0] = 0;
   1501   return NULL;
   1502 }
   1503 //
   1504 // Process the command-line arguments
   1505 //
   1506 static
   1507 STATUS
   1508 ProcessArgs (
   1509   int   Argc,
   1510   char  *Argv[]
   1511   )
   1512 {
   1513   TEXT_STRING_LIST  *NewList;
   1514   //
   1515   // Clear our globals
   1516   //
   1517   memset ((char *) &mGlobals, 0, sizeof (mGlobals));
   1518   strcpy (mGlobals.BaseName, DEFAULT_BASE_NAME);
   1519   //
   1520   // Skip program name
   1521   //
   1522   Argc--;
   1523   Argv++;
   1524 
   1525   if (Argc == 0) {
   1526     Usage ();
   1527     return STATUS_ERROR;
   1528   }
   1529 
   1530   mGlobals.Mode = MODE_UNKNOWN;
   1531   //
   1532   // Process until no more -args.
   1533   //
   1534   while ((Argc > 0) && (Argv[0][0] == '-')) {
   1535     //
   1536     // -parse option
   1537     //
   1538     if (_stricmp (Argv[0], "-parse") == 0) {
   1539       if (mGlobals.Mode != MODE_UNKNOWN) {
   1540         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
   1541         return STATUS_ERROR;
   1542       }
   1543 
   1544       mGlobals.Mode = MODE_PARSE;
   1545       //
   1546       // -scan option
   1547       //
   1548     } else if (_stricmp (Argv[0], "-scan") == 0) {
   1549       if (mGlobals.Mode != MODE_UNKNOWN) {
   1550         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
   1551         return STATUS_ERROR;
   1552       }
   1553 
   1554       mGlobals.Mode = MODE_SCAN;
   1555       //
   1556       // -vscan verbose scanning option
   1557       //
   1558     } else if (_stricmp (Argv[0], "-vscan") == 0) {
   1559       mGlobals.VerboseScan = TRUE;
   1560       //
   1561       // -dump option
   1562       //
   1563     } else if (_stricmp (Argv[0], "-dump") == 0) {
   1564       if (mGlobals.Mode != MODE_UNKNOWN) {
   1565         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
   1566         return STATUS_ERROR;
   1567       }
   1568 
   1569       mGlobals.Mode = MODE_DUMP;
   1570     } else if (_stricmp (Argv[0], "-uqs") == 0) {
   1571       mGlobals.UnquotedStrings = TRUE;
   1572       //
   1573       // -i path    add include search path when parsing
   1574       //
   1575     } else if (_stricmp (Argv[0], "-i") == 0) {
   1576       //
   1577       // check for one more arg
   1578       //
   1579       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1580         Error (UTILITY_NAME, 0, 0, Argv[0], "missing include path");
   1581         return STATUS_ERROR;
   1582       }
   1583       //
   1584       // Allocate memory for a new list element, fill it in, and
   1585       // add it to our list of include paths. Always make sure it
   1586       // has a "\" on the end of it.
   1587       //
   1588       NewList = malloc (sizeof (TEXT_STRING_LIST));
   1589       if (NewList == NULL) {
   1590         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1591         return STATUS_ERROR;
   1592       }
   1593 
   1594       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
   1595       NewList->Str = malloc (strlen (Argv[1]) + 2);
   1596       if (NewList->Str == NULL) {
   1597         free (NewList);
   1598         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1599         return STATUS_ERROR;
   1600       }
   1601 
   1602       strcpy (NewList->Str, Argv[1]);
   1603       if (NewList->Str[strlen (NewList->Str) - 1] != '\\') {
   1604         strcat (NewList->Str, "\\");
   1605       }
   1606       //
   1607       // Add it to our linked list
   1608       //
   1609       if (mGlobals.IncludePaths == NULL) {
   1610         mGlobals.IncludePaths = NewList;
   1611       } else {
   1612         mGlobals.LastIncludePath->Next = NewList;
   1613       }
   1614 
   1615       mGlobals.LastIncludePath = NewList;
   1616       Argc--;
   1617       Argv++;
   1618     } else if (_stricmp (Argv[0], "-if") == 0) {
   1619       //
   1620       // Indirection file -- check for one more arg
   1621       //
   1622       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1623         Error (UTILITY_NAME, 0, 0, Argv[0], "missing indirection file name");
   1624         return STATUS_ERROR;
   1625       }
   1626       //
   1627       // Allocate memory for a new list element, fill it in, and
   1628       // add it to our list of include paths. Always make sure it
   1629       // has a "\" on the end of it.
   1630       //
   1631       NewList = malloc (sizeof (TEXT_STRING_LIST));
   1632       if (NewList == NULL) {
   1633         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1634         return STATUS_ERROR;
   1635       }
   1636 
   1637       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
   1638       NewList->Str = malloc (strlen (Argv[1]) + 1);
   1639       if (NewList->Str == NULL) {
   1640         free (NewList);
   1641         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1642         return STATUS_ERROR;
   1643       }
   1644 
   1645       strcpy (NewList->Str, Argv[1]);
   1646       //
   1647       // Add it to our linked list
   1648       //
   1649       if (mGlobals.IndirectionFileName == NULL) {
   1650         mGlobals.IndirectionFileName = NewList;
   1651       } else {
   1652         mGlobals.LastIndirectionFileName->Next = NewList;
   1653       }
   1654 
   1655       mGlobals.LastIndirectionFileName = NewList;
   1656       Argc--;
   1657       Argv++;
   1658     } else if (_stricmp (Argv[0], "-db") == 0) {
   1659       //
   1660       // -db option to specify a database file.
   1661       // Check for one more arg (the database file name)
   1662       //
   1663       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1664         Error (UTILITY_NAME, 0, 0, Argv[0], "missing database file name");
   1665         return STATUS_ERROR;
   1666       }
   1667 
   1668       NewList = malloc (sizeof (TEXT_STRING_LIST));
   1669       if (NewList == NULL) {
   1670         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1671         return STATUS_ERROR;
   1672       }
   1673 
   1674       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
   1675       NewList->Str = malloc (strlen (Argv[1]) + 1);
   1676       if (NewList->Str == NULL) {
   1677         free (NewList);
   1678         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1679         return STATUS_ERROR;
   1680       }
   1681 
   1682       strcpy (NewList->Str, Argv[1]);
   1683       //
   1684       // Add it to our linked list
   1685       //
   1686       if (mGlobals.DatabaseFileName == NULL) {
   1687         mGlobals.DatabaseFileName = NewList;
   1688       } else {
   1689         mGlobals.LastDatabaseFileName->Next = NewList;
   1690       }
   1691 
   1692       mGlobals.LastDatabaseFileName = NewList;
   1693       Argc--;
   1694       Argv++;
   1695     } else if (_stricmp (Argv[0], "-ou") == 0) {
   1696       //
   1697       // -ou option to specify an output unicode file to
   1698       // which we can dump our database.
   1699       //
   1700       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1701         Error (UTILITY_NAME, 0, 0, Argv[0], "missing database dump output file name");
   1702         return STATUS_ERROR;
   1703       }
   1704 
   1705       if (mGlobals.DumpUFileName[0] == 0) {
   1706         strcpy (mGlobals.DumpUFileName, Argv[1]);
   1707       } else {
   1708         Error (UTILITY_NAME, 0, 0, Argv[1], "-ou option already specified with '%s'", mGlobals.DumpUFileName);
   1709         return STATUS_ERROR;
   1710       }
   1711 
   1712       Argc--;
   1713       Argv++;
   1714     } else if (_stricmp (Argv[0], "-hpk") == 0) {
   1715       //
   1716       // -hpk option to create an HII export pack of the input database file
   1717       //
   1718       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1719         Error (UTILITY_NAME, 0, 0, Argv[0], "missing raw string data dump output file name");
   1720         return STATUS_ERROR;
   1721       }
   1722 
   1723       if (mGlobals.HiiExportPackFileName[0] == 0) {
   1724         strcpy (mGlobals.HiiExportPackFileName, Argv[1]);
   1725       } else {
   1726         Error (UTILITY_NAME, 0, 0, Argv[1], "-or option already specified with '%s'", mGlobals.HiiExportPackFileName);
   1727         return STATUS_ERROR;
   1728       }
   1729 
   1730       Argc--;
   1731       Argv++;
   1732     } else if ((_stricmp (Argv[0], "-?") == 0) || (_stricmp (Argv[0], "-h") == 0)) {
   1733       Usage ();
   1734       return STATUS_ERROR;
   1735     } else if (_stricmp (Argv[0], "-v") == 0) {
   1736       mGlobals.Verbose = 1;
   1737     } else if (_stricmp (Argv[0], "-vdbw") == 0) {
   1738       mGlobals.VerboseDatabaseWrite = 1;
   1739     } else if (_stricmp (Argv[0], "-vdbr") == 0) {
   1740       mGlobals.VerboseDatabaseRead = 1;
   1741     } else if (_stricmp (Argv[0], "-newdb") == 0) {
   1742       mGlobals.NewDatabase = 1;
   1743     } else if (_stricmp (Argv[0], "-ignorenotfound") == 0) {
   1744       mGlobals.IgnoreNotFound = 1;
   1745     } else if (_stricmp (Argv[0], "-oc") == 0) {
   1746       //
   1747       // check for one more arg
   1748       //
   1749       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1750         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output C filename");
   1751         return STATUS_ERROR;
   1752       }
   1753 
   1754       strcpy (mGlobals.StringCFileName, Argv[1]);
   1755       Argc--;
   1756       Argv++;
   1757     } else if (_stricmp (Argv[0], "-bn") == 0) {
   1758       //
   1759       // check for one more arg
   1760       //
   1761       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1762         Error (UTILITY_NAME, 0, 0, Argv[0], "missing base name");
   1763         Usage ();
   1764         return STATUS_ERROR;
   1765       }
   1766 
   1767       strcpy (mGlobals.BaseName, Argv[1]);
   1768       Argc--;
   1769       Argv++;
   1770     } else if (_stricmp (Argv[0], "-oh") == 0) {
   1771       //
   1772       // -oh to specify output .h defines file name
   1773       //
   1774       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1775         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output .h filename");
   1776         return STATUS_ERROR;
   1777       }
   1778 
   1779       strcpy (mGlobals.StringHFileName, Argv[1]);
   1780       Argc--;
   1781       Argv++;
   1782     } else if (_stricmp (Argv[0], "-dep") == 0) {
   1783       //
   1784       // -dep to specify output dependency file name
   1785       //
   1786       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1787         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output dependency filename");
   1788         return STATUS_ERROR;
   1789       }
   1790 
   1791       strcpy (mGlobals.OutputDependencyFileName, Argv[1]);
   1792       Argc--;
   1793       Argv++;
   1794     } else if (_stricmp (Argv[0], "-skipext") == 0) {
   1795       //
   1796       // -skipext to skip scanning of files with certain filename extensions
   1797       //
   1798       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1799         Error (UTILITY_NAME, 0, 0, Argv[0], "missing filename extension");
   1800         return STATUS_ERROR;
   1801       }
   1802       //
   1803       // Allocate memory for a new list element, fill it in, and
   1804       // add it to our list of excluded extensions. Always make sure it
   1805       // has a "." as the first character.
   1806       //
   1807       NewList = malloc (sizeof (TEXT_STRING_LIST));
   1808       if (NewList == NULL) {
   1809         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1810         return STATUS_ERROR;
   1811       }
   1812 
   1813       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
   1814       NewList->Str = malloc (strlen (Argv[1]) + 2);
   1815       if (NewList->Str == NULL) {
   1816         free (NewList);
   1817         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1818         return STATUS_ERROR;
   1819       }
   1820 
   1821       if (Argv[1][0] == '.') {
   1822         strcpy (NewList->Str, Argv[1]);
   1823       } else {
   1824         NewList->Str[0] = '.';
   1825         strcpy (NewList->Str + 1, Argv[1]);
   1826       }
   1827       //
   1828       // Add it to our linked list
   1829       //
   1830       if (mGlobals.SkipExt == NULL) {
   1831         mGlobals.SkipExt = NewList;
   1832       } else {
   1833         mGlobals.LastSkipExt->Next = NewList;
   1834       }
   1835 
   1836       mGlobals.LastSkipExt = NewList;
   1837       Argc--;
   1838       Argv++;
   1839     } else if (_stricmp (Argv[0], "-lang") == 0) {
   1840       //
   1841       // "-lang eng" or "-lang spa+cat" to only output certain languages
   1842       //
   1843       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1844         Error (UTILITY_NAME, 0, 0, Argv[0], "missing language name");
   1845         Usage ();
   1846         return STATUS_ERROR;
   1847       }
   1848 
   1849       if (AddCommandLineLanguage (Argv[1]) != STATUS_SUCCESS) {
   1850         return STATUS_ERROR;
   1851       }
   1852 
   1853       Argc--;
   1854       Argv++;
   1855     } else if (_stricmp (Argv[0], "-od") == 0) {
   1856       //
   1857       // Output database file name -- check for another arg
   1858       //
   1859       if ((Argc <= 1) || (Argv[1][0] == '-')) {
   1860         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output database file name");
   1861         return STATUS_ERROR;
   1862       }
   1863 
   1864       strcpy (mGlobals.OutputDatabaseFileName, Argv[1]);
   1865       Argv++;
   1866       Argc--;
   1867     } else {
   1868       //
   1869       // Unrecognized arg
   1870       //
   1871       Error (UTILITY_NAME, 0, 0, Argv[0], "unrecognized option");
   1872       Usage ();
   1873       return STATUS_ERROR;
   1874     }
   1875 
   1876     Argv++;
   1877     Argc--;
   1878   }
   1879   //
   1880   // Make sure they specified the mode parse/scan/dump
   1881   //
   1882   if (mGlobals.Mode == MODE_UNKNOWN) {
   1883     Error (NULL, 0, 0, "must specify one of -parse/-scan/-dump", NULL);
   1884     return STATUS_ERROR;
   1885   }
   1886   //
   1887   // All modes require a database filename
   1888   //
   1889   if (mGlobals.DatabaseFileName == 0) {
   1890     Error (NULL, 0, 0, "must specify a database filename using -db DbFileName", NULL);
   1891     Usage ();
   1892     return STATUS_ERROR;
   1893   }
   1894   //
   1895   // If dumping the database file, then return immediately if all
   1896   // parameters check out.
   1897   //
   1898   if (mGlobals.Mode == MODE_DUMP) {
   1899     //
   1900     // Not much use if they didn't specify -oh or -oc or -ou or -hpk
   1901     //
   1902     if ((mGlobals.DumpUFileName[0] == 0) &&
   1903         (mGlobals.StringHFileName[0] == 0) &&
   1904         (mGlobals.StringCFileName[0] == 0) &&
   1905         (mGlobals.HiiExportPackFileName[0] == 0)
   1906         ) {
   1907       Error (NULL, 0, 0, "-dump without -oc/-oh/-ou/-hpk is a NOP", NULL);
   1908       return STATUS_ERROR;
   1909     }
   1910 
   1911     return STATUS_SUCCESS;
   1912   }
   1913   //
   1914   // Had to specify source string file and output string defines header filename.
   1915   //
   1916   if (mGlobals.Mode == MODE_SCAN) {
   1917     if (Argc < 1) {
   1918       Error (UTILITY_NAME, 0, 0, NULL, "must specify at least one source file to scan with -scan");
   1919       Usage ();
   1920       return STATUS_ERROR;
   1921     }
   1922     //
   1923     // Get the list of filenames
   1924     //
   1925     while (Argc > 0) {
   1926       NewList = malloc (sizeof (TEXT_STRING_LIST));
   1927       if (NewList == NULL) {
   1928         Error (UTILITY_NAME, 0, 0, "memory allocation failure", NULL);
   1929         return STATUS_ERROR;
   1930       }
   1931 
   1932       memset (NewList, 0, sizeof (TEXT_STRING_LIST));
   1933       NewList->Str = (UINT8 *) malloc (strlen (Argv[0]) + 1);
   1934       if (NewList->Str == NULL) {
   1935         Error (UTILITY_NAME, 0, 0, "memory allocation failure", NULL);
   1936         return STATUS_ERROR;
   1937       }
   1938 
   1939       strcpy (NewList->Str, Argv[0]);
   1940       if (mGlobals.ScanFileName == NULL) {
   1941         mGlobals.ScanFileName = NewList;
   1942       } else {
   1943         mGlobals.LastScanFileName->Next = NewList;
   1944       }
   1945 
   1946       mGlobals.LastScanFileName = NewList;
   1947       Argc--;
   1948       Argv++;
   1949     }
   1950   } else {
   1951     //
   1952     // Parse mode -- must specify an input unicode file name
   1953     //
   1954     if (Argc < 1) {
   1955       Error (UTILITY_NAME, 0, 0, NULL, "must specify input unicode string file name with -parse");
   1956       Usage ();
   1957       return STATUS_ERROR;
   1958     }
   1959 
   1960     strcpy (mGlobals.SourceFiles.FileName, Argv[0]);
   1961   }
   1962 
   1963   return STATUS_SUCCESS;
   1964 }
   1965 //
   1966 // Found "-lang eng,spa+cat" on the command line. Parse the
   1967 // language list and save the setting for later processing.
   1968 //
   1969 static
   1970 STATUS
   1971 AddCommandLineLanguage (
   1972   IN INT8          *Language
   1973   )
   1974 {
   1975   WCHAR_STRING_LIST *WNewList;
   1976   WCHAR             *From;
   1977   WCHAR             *To;
   1978   //
   1979   // Keep processing the input string until we find the end.
   1980   //
   1981   while (*Language) {
   1982     //
   1983     // Allocate memory for a new list element, fill it in, and
   1984     // add it to our list.
   1985     //
   1986     WNewList = MALLOC (sizeof (WCHAR_STRING_LIST));
   1987     if (WNewList == NULL) {
   1988       Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1989       return STATUS_ERROR;
   1990     }
   1991 
   1992     memset ((char *) WNewList, 0, sizeof (WCHAR_STRING_LIST));
   1993     WNewList->Str = malloc ((strlen (Language) + 1) * sizeof (WCHAR));
   1994     if (WNewList->Str == NULL) {
   1995       free (WNewList);
   1996       Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
   1997       return STATUS_ERROR;
   1998     }
   1999     //
   2000     // Copy it as unicode to our new structure. Then remove the
   2001     // plus signs in it, and verify each language name is 3 characters
   2002     // long. If we find a comma, then we're done with this group, so
   2003     // break out.
   2004     //
   2005 #ifdef USE_VC8
   2006     swprintf (WNewList->Str, (strlen (Language) + 1) * sizeof (WCHAR), L"%S", Language);
   2007 #else
   2008     swprintf (WNewList->Str, L"%S", Language);
   2009 #endif
   2010     From = To = WNewList->Str;
   2011     while (*From) {
   2012       if (*From == L',') {
   2013         break;
   2014       }
   2015 
   2016       if ((wcslen (From) < LANGUAGE_IDENTIFIER_NAME_LEN) ||
   2017             (
   2018               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != 0) &&
   2019               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != UNICODE_PLUS_SIGN) &&
   2020               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != L',')
   2021             )
   2022           ) {
   2023         Error (UTILITY_NAME, 0, 0, Language, "invalid format for language name on command line");
   2024         FREE (WNewList->Str);
   2025         FREE (WNewList);
   2026         return STATUS_ERROR;
   2027       }
   2028 
   2029       wcsncpy (To, From, LANGUAGE_IDENTIFIER_NAME_LEN);
   2030       To += LANGUAGE_IDENTIFIER_NAME_LEN;
   2031       From += LANGUAGE_IDENTIFIER_NAME_LEN;
   2032       if (*From == L'+') {
   2033         From++;
   2034       }
   2035     }
   2036 
   2037     *To = 0;
   2038     //
   2039     // Add it to our linked list
   2040     //
   2041     if (mGlobals.Language == NULL) {
   2042       mGlobals.Language = WNewList;
   2043     } else {
   2044       mGlobals.LastLanguage->Next = WNewList;
   2045     }
   2046 
   2047     mGlobals.LastLanguage = WNewList;
   2048     //
   2049     // Skip to next entry (comma-separated list)
   2050     //
   2051     while (*Language) {
   2052       if (*Language == L',') {
   2053         Language++;
   2054         break;
   2055       }
   2056 
   2057       Language++;
   2058     }
   2059   }
   2060 
   2061   return STATUS_SUCCESS;
   2062 }
   2063 //
   2064 // The contents of the text file are expected to be (one per line)
   2065 //   STRING_IDENTIFIER_NAME   ScopeName
   2066 // For example:
   2067 //   STR_ID_MY_FAVORITE_STRING   IBM
   2068 //
   2069 static
   2070 STATUS
   2071 ParseIndirectionFiles (
   2072   TEXT_STRING_LIST    *Files
   2073   )
   2074 {
   2075   FILE                        *Fptr;
   2076   INT8                        Line[200];
   2077   INT8                        *StringName;
   2078   INT8                        *ScopeName;
   2079   INT8                        *End;
   2080   UINT32                      LineCount;
   2081   WCHAR_MATCHING_STRING_LIST  *NewList;
   2082 
   2083   Line[sizeof (Line) - 1] = 0;
   2084   Fptr                    = NULL;
   2085   while (Files != NULL) {
   2086     Fptr      = fopen (Files->Str, "r");
   2087     LineCount = 0;
   2088     if (Fptr == NULL) {
   2089       Error (NULL, 0, 0, Files->Str, "failed to open input indirection file for reading");
   2090       return STATUS_ERROR;
   2091     }
   2092 
   2093     while (fgets (Line, sizeof (Line), Fptr) != NULL) {
   2094       //
   2095       // remove terminating newline for error printing purposes.
   2096       //
   2097       if (Line[strlen (Line) - 1] == '\n') {
   2098         Line[strlen (Line) - 1] = 0;
   2099       }
   2100 
   2101       LineCount++;
   2102       if (Line[sizeof (Line) - 1] != 0) {
   2103         Error (Files->Str, LineCount, 0, "line length exceeds maximum supported", NULL);
   2104         goto Done;
   2105       }
   2106 
   2107       StringName = Line;
   2108       while (*StringName && (isspace (*StringName))) {
   2109         StringName++;
   2110       }
   2111 
   2112       if (*StringName) {
   2113         if ((*StringName == '_') || isalpha (*StringName)) {
   2114           End = StringName;
   2115           while ((*End) && (*End == '_') || (isalnum (*End))) {
   2116             End++;
   2117           }
   2118 
   2119           if (isspace (*End)) {
   2120             *End = 0;
   2121             End++;
   2122             while (isspace (*End)) {
   2123               End++;
   2124             }
   2125 
   2126             if (*End) {
   2127               ScopeName = End;
   2128               while (*End && !isspace (*End)) {
   2129                 End++;
   2130               }
   2131 
   2132               *End = 0;
   2133               //
   2134               // Add the string name/scope pair
   2135               //
   2136               NewList = malloc (sizeof (WCHAR_MATCHING_STRING_LIST));
   2137               if (NewList == NULL) {
   2138                 Error (NULL, 0, 0, "memory allocation error", NULL);
   2139                 goto Done;
   2140               }
   2141 
   2142               memset (NewList, 0, sizeof (WCHAR_MATCHING_STRING_LIST));
   2143               NewList->Str1 = (WCHAR *) malloc ((strlen (StringName) + 1) * sizeof (WCHAR));
   2144               NewList->Str2 = (WCHAR *) malloc ((strlen (ScopeName) + 1) * sizeof (WCHAR));
   2145               if ((NewList->Str1 == NULL) || (NewList->Str2 == NULL)) {
   2146                 Error (NULL, 0, 0, "memory allocation error", NULL);
   2147                 goto Done;
   2148               }
   2149 
   2150 #ifdef USE_VC8
   2151               swprintf (NewList->Str1, (strlen (StringName) + 1) * sizeof (WCHAR), L"%S", StringName);
   2152               swprintf (NewList->Str2, (strlen (ScopeName) + 1) * sizeof (WCHAR), L"%S", ScopeName);
   2153 #else
   2154               swprintf (NewList->Str1, L"%S", StringName);
   2155               swprintf (NewList->Str2, L"%S", ScopeName);
   2156 #endif
   2157               if (mGlobals.IndirectionList == NULL) {
   2158                 mGlobals.IndirectionList = NewList;
   2159               } else {
   2160                 mGlobals.LastIndirectionList->Next = NewList;
   2161               }
   2162 
   2163               mGlobals.LastIndirectionList = NewList;
   2164             } else {
   2165               Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
   2166               goto Done;
   2167             }
   2168           } else {
   2169             Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
   2170             goto Done;
   2171           }
   2172         } else {
   2173           Error (Files->Str, LineCount, 0, StringName, "invalid string identifier");
   2174           goto Done;
   2175         }
   2176       }
   2177     }
   2178 
   2179     fclose (Fptr);
   2180     Fptr  = NULL;
   2181     Files = Files->Next;
   2182   }
   2183 
   2184 Done:
   2185   if (Fptr != NULL) {
   2186     fclose (Fptr);
   2187     return STATUS_ERROR;
   2188   }
   2189 
   2190   return STATUS_SUCCESS;
   2191 }
   2192 
   2193 static
   2194 STATUS
   2195 ScanFiles (
   2196   TEXT_STRING_LIST *ScanFiles
   2197   )
   2198 {
   2199   char              Line[MAX_LINE_LEN];
   2200   FILE              *Fptr;
   2201   UINT32            LineNum;
   2202   char              *Cptr;
   2203   char              *SavePtr;
   2204   char              *TermPtr;
   2205   char              *StringTokenPos;
   2206   TEXT_STRING_LIST  *SList;
   2207   BOOLEAN           SkipIt;
   2208 
   2209   //
   2210   // Put a null-terminator at the end of the line. If we read in
   2211   // a line longer than we support, then we can catch it.
   2212   //
   2213   Line[MAX_LINE_LEN - 1] = 0;
   2214   //
   2215   // Process each file. If they gave us a skip extension list, then
   2216   // skip it if the extension matches.
   2217   //
   2218   while (ScanFiles != NULL) {
   2219     SkipIt = FALSE;
   2220     for (SList = mGlobals.SkipExt; SList != NULL; SList = SList->Next) {
   2221       if ((strlen (ScanFiles->Str) > strlen (SList->Str)) &&
   2222           (strcmp (ScanFiles->Str + strlen (ScanFiles->Str) - strlen (SList->Str), SList->Str) == 0)
   2223           ) {
   2224         SkipIt = TRUE;
   2225         //
   2226         // printf ("Match: %s : %s\n", ScanFiles->Str, SList->Str);
   2227         //
   2228         break;
   2229       }
   2230     }
   2231 
   2232     if (!SkipIt) {
   2233       if (mGlobals.VerboseScan) {
   2234         printf ("Scanning %s\n", ScanFiles->Str);
   2235       }
   2236 
   2237       Fptr = fopen (ScanFiles->Str, "r");
   2238       if (Fptr == NULL) {
   2239         Error (NULL, 0, 0, ScanFiles->Str, "failed to open input file for scanning");
   2240         return STATUS_ERROR;
   2241       }
   2242 
   2243       LineNum = 0;
   2244       while (fgets (Line, sizeof (Line), Fptr) != NULL) {
   2245         LineNum++;
   2246         if (Line[MAX_LINE_LEN - 1] != 0) {
   2247           Error (ScanFiles->Str, LineNum, 0, "line length exceeds maximum supported by tool", NULL);
   2248           fclose (Fptr);
   2249           return STATUS_ERROR;
   2250         }
   2251         //
   2252         // Remove the newline from the input line so we can print a warning message
   2253         //
   2254         if (Line[strlen (Line) - 1] == '\n') {
   2255           Line[strlen (Line) - 1] = 0;
   2256         }
   2257         //
   2258         // Terminate the line at // comments
   2259         //
   2260         Cptr = strstr (Line, "//");
   2261         if (Cptr != NULL) {
   2262           *Cptr = 0;
   2263         }
   2264 
   2265         Cptr = Line;
   2266         while ((Cptr = strstr (Cptr, STRING_TOKEN)) != NULL) {
   2267           //
   2268           // Found "STRING_TOKEN". Make sure we don't have NUM_STRING_TOKENS or
   2269           // something like that. Then make sure it's followed by
   2270           // an open parenthesis, a string identifier, and then a closing
   2271           // parenthesis.
   2272           //
   2273           if (mGlobals.VerboseScan) {
   2274             printf (" %d: %s", LineNum, Cptr);
   2275           }
   2276 
   2277           if (((Cptr == Line) || (!IsValidIdentifierChar (*(Cptr - 1), FALSE))) &&
   2278               (!IsValidIdentifierChar (*(Cptr + sizeof (STRING_TOKEN) - 1), FALSE))
   2279               ) {
   2280             StringTokenPos  = Cptr;
   2281             SavePtr         = Cptr;
   2282             Cptr += strlen (STRING_TOKEN);
   2283             while (*Cptr && isspace (*Cptr) && (*Cptr != '(')) {
   2284               Cptr++;
   2285             }
   2286 
   2287             if (*Cptr != '(') {
   2288               Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
   2289             } else {
   2290               //
   2291               // Skip over the open-parenthesis and find the next non-blank character
   2292               //
   2293               Cptr++;
   2294               while (isspace (*Cptr)) {
   2295                 Cptr++;
   2296               }
   2297 
   2298               SavePtr = Cptr;
   2299               if ((*Cptr == '_') || isalpha (*Cptr)) {
   2300                 while ((*Cptr == '_') || (isalnum (*Cptr))) {
   2301                   Cptr++;
   2302                 }
   2303 
   2304                 TermPtr = Cptr;
   2305                 while (*Cptr && isspace (*Cptr)) {
   2306                   Cptr++;
   2307                 }
   2308 
   2309                 if (*Cptr != ')') {
   2310                   Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
   2311                 }
   2312 
   2313                 if (*TermPtr) {
   2314                   *TermPtr  = 0;
   2315                   Cptr      = TermPtr + 1;
   2316                 } else {
   2317                   Cptr = TermPtr;
   2318                 }
   2319                 //
   2320                 // Add the string identifier to the list of used strings
   2321                 //
   2322                 ParserSetPosition (ScanFiles->Str, LineNum);
   2323                 StringDBSetStringReferenced (SavePtr, mGlobals.IgnoreNotFound);
   2324                 if (mGlobals.VerboseScan) {
   2325                   printf ("...referenced %s", SavePtr);
   2326                 }
   2327               } else {
   2328                 Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected valid string identifier name");
   2329               }
   2330             }
   2331           } else {
   2332             //
   2333             // Found it, but it's a substring of something else. Advance our pointer.
   2334             //
   2335             Cptr++;
   2336           }
   2337 
   2338           if (mGlobals.VerboseScan) {
   2339             printf ("\n");
   2340           }
   2341         }
   2342       }
   2343 
   2344       fclose (Fptr);
   2345     } else {
   2346       //
   2347       // Skipping this file type
   2348       //
   2349       if (mGlobals.VerboseScan) {
   2350         printf ("Skip scanning of %s\n", ScanFiles->Str);
   2351       }
   2352     }
   2353 
   2354     ScanFiles = ScanFiles->Next;
   2355   }
   2356 
   2357   return STATUS_SUCCESS;
   2358 }
   2359 //
   2360 // Free the global string lists we allocated memory for
   2361 //
   2362 static
   2363 void
   2364 FreeLists (
   2365   VOID
   2366   )
   2367 {
   2368   TEXT_STRING_LIST  *Temp;
   2369   WCHAR_STRING_LIST *WTemp;
   2370 
   2371   //
   2372   // Traverse the include paths, freeing each
   2373   //
   2374   while (mGlobals.IncludePaths != NULL) {
   2375     Temp = mGlobals.IncludePaths->Next;
   2376     free (mGlobals.IncludePaths->Str);
   2377     free (mGlobals.IncludePaths);
   2378     mGlobals.IncludePaths = Temp;
   2379   }
   2380   //
   2381   // If we did a scan, then free up our
   2382   // list of files to scan.
   2383   //
   2384   while (mGlobals.ScanFileName != NULL) {
   2385     Temp = mGlobals.ScanFileName->Next;
   2386     free (mGlobals.ScanFileName->Str);
   2387     free (mGlobals.ScanFileName);
   2388     mGlobals.ScanFileName = Temp;
   2389   }
   2390   //
   2391   // If they gave us a list of filename extensions to
   2392   // skip on scan, then free them up.
   2393   //
   2394   while (mGlobals.SkipExt != NULL) {
   2395     Temp = mGlobals.SkipExt->Next;
   2396     free (mGlobals.SkipExt->Str);
   2397     free (mGlobals.SkipExt);
   2398     mGlobals.SkipExt = Temp;
   2399   }
   2400   //
   2401   // Free up any languages specified
   2402   //
   2403   while (mGlobals.Language != NULL) {
   2404     WTemp = mGlobals.Language->Next;
   2405     free (mGlobals.Language->Str);
   2406     free (mGlobals.Language);
   2407     mGlobals.Language = WTemp;
   2408   }
   2409   //
   2410   // Free up our indirection list
   2411   //
   2412   while (mGlobals.IndirectionList != NULL) {
   2413     mGlobals.LastIndirectionList = mGlobals.IndirectionList->Next;
   2414     free (mGlobals.IndirectionList->Str1);
   2415     free (mGlobals.IndirectionList->Str2);
   2416     free (mGlobals.IndirectionList);
   2417     mGlobals.IndirectionList = mGlobals.LastIndirectionList;
   2418   }
   2419 
   2420   while (mGlobals.IndirectionFileName != NULL) {
   2421     mGlobals.LastIndirectionFileName = mGlobals.IndirectionFileName->Next;
   2422     free (mGlobals.IndirectionFileName->Str);
   2423     free (mGlobals.IndirectionFileName);
   2424     mGlobals.IndirectionFileName = mGlobals.LastIndirectionFileName;
   2425   }
   2426 }
   2427 
   2428 static
   2429 BOOLEAN
   2430 IsValidIdentifierChar (
   2431   INT8      Char,
   2432   BOOLEAN   FirstChar
   2433   )
   2434 {
   2435   //
   2436   // If it's the first character of an identifier, then
   2437   // it must be one of [A-Za-z_].
   2438   //
   2439   if (FirstChar) {
   2440     if (isalpha (Char) || (Char == '_')) {
   2441       return TRUE;
   2442     }
   2443   } else {
   2444     //
   2445     // If it's not the first character, then it can
   2446     // be one of [A-Za-z_0-9]
   2447     //
   2448     if (isalnum (Char) || (Char == '_')) {
   2449       return TRUE;
   2450     }
   2451   }
   2452 
   2453   return FALSE;
   2454 }
   2455 
   2456 static
   2457 void
   2458 RewindFile (
   2459   SOURCE_FILE *SourceFile
   2460   )
   2461 {
   2462   SourceFile->LineNum       = 1;
   2463   SourceFile->FileBufferPtr = SourceFile->FileBuffer;
   2464   SourceFile->EndOfFile     = 0;
   2465 }
   2466 
   2467 static
   2468 BOOLEAN
   2469 SkipTo (
   2470   SOURCE_FILE *SourceFile,
   2471   WCHAR       WChar,
   2472   BOOLEAN     StopAfterNewline
   2473   )
   2474 {
   2475   while (!EndOfFile (SourceFile)) {
   2476     //
   2477     // Check for the character of interest
   2478     //
   2479     if (SourceFile->FileBufferPtr[0] == WChar) {
   2480       return TRUE;
   2481     } else {
   2482       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
   2483         SourceFile->LineNum++;
   2484         if (StopAfterNewline) {
   2485           SourceFile->FileBufferPtr++;
   2486           if (SourceFile->FileBufferPtr[0] == 0) {
   2487             SourceFile->FileBufferPtr++;
   2488           }
   2489 
   2490           return FALSE;
   2491         }
   2492       }
   2493 
   2494       SourceFile->FileBufferPtr++;
   2495     }
   2496   }
   2497 
   2498   return FALSE;
   2499 }
   2500 
   2501 static
   2502 void
   2503 Usage (
   2504   VOID
   2505   )
   2506 /*++
   2507 
   2508 Routine Description:
   2509 
   2510   Print usage information for this utility.
   2511 
   2512 Arguments:
   2513 
   2514   None.
   2515 
   2516 Returns:
   2517 
   2518   Nothing.
   2519 
   2520 --*/
   2521 {
   2522   int         Index;
   2523   const char  *Str[] = {
   2524     UTILITY_NAME" "UTILITY_VERSION" - Intel String Gather Utility",
   2525     "  Copyright (C), 2004 - 2008 Intel Corporation",
   2526 
   2527 #if ( defined(UTILITY_BUILD) && defined(UTILITY_VENDOR) )
   2528     "  Built from "UTILITY_BUILD", project of "UTILITY_VENDOR,
   2529 #endif
   2530     "",
   2531     "Usage:",
   2532     "  "UTILITY_NAME" -parse [OPTION] FILE",
   2533     "  "UTILITY_NAME" -scan  [OPTION] FILE",
   2534     "  "UTILITY_NAME" -dump  [OPTION]",
   2535     "Description:",
   2536     "  Process unicode strings file.",
   2537     "Common options include:",
   2538     "  -h or -?         for this help information",
   2539     "  -db Database     required name of output/input database file",
   2540     "  -bn BaseName     for use in the .h and .c output files",
   2541     "                   Default = "DEFAULT_BASE_NAME,
   2542     "  -v               for verbose output",
   2543     "  -vdbw            for verbose output when writing database",
   2544     "  -vdbr            for verbose output when reading database",
   2545     "  -od FileName     to specify an output database file name",
   2546     "Parse options include:",
   2547     "  -i IncludePath   add IncludePath to list of search paths",
   2548     "  -dep FileName    to specify an output dependency file name",
   2549     "  -newdb           to not read in existing database file",
   2550     "  -uqs             to indicate that unquoted strings are used",
   2551     "  FileNames        name of one or more unicode files to parse",
   2552     "Scan options include:",
   2553     "  -scan            scan text file(s) for STRING_TOKEN() usage",
   2554     "  -skipext .ext    to skip scan of files with .ext filename extension",
   2555     "  -ignorenotfound  ignore if a given STRING_TOKEN(STR) is not ",
   2556     "                   found in the database",
   2557     "  FileNames        one or more files to scan",
   2558     "Dump options include:",
   2559     "  -oc FileName     write string data to FileName",
   2560     "  -oh FileName     write string defines to FileName",
   2561     "  -ou FileName     dump database to unicode file FileName",
   2562     "  -lang Lang       only dump for the language 'Lang'",
   2563     "  -if FileName     to specify an indirection file",
   2564     "  -hpk FileName    to create an HII export pack of the strings",
   2565     "",
   2566     "The expected process is to parse a unicode string file to create an initial",
   2567     "database of string identifier names and string definitions. Then text files",
   2568     "should be scanned for STRING_TOKEN() usages, and the referenced",
   2569     "strings will be tagged as used in the database. After all files have been",
   2570     "scanned, then the database should be dumped to create the necessary output",
   2571     "files.",
   2572     "",
   2573     NULL
   2574   };
   2575   for (Index = 0; Str[Index] != NULL; Index++) {
   2576     fprintf (stdout, "%s\n", Str[Index]);
   2577   }
   2578 }
   2579