1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /****************************************************************************** 4 * Copyright (C) 2008-2012, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 ******************************************************************************* 7 */ 8 #include "unicode/utypes.h" 9 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include "unicode/utypes.h" 13 #include "unicode/putil.h" 14 #include "cmemory.h" 15 #include "cstring.h" 16 #include "filestrm.h" 17 #include "toolutil.h" 18 #include "unicode/uclean.h" 19 #include "unewdata.h" 20 #include "putilimp.h" 21 #include "pkg_gencmn.h" 22 23 #define STRING_STORE_SIZE 200000 24 25 #define COMMON_DATA_NAME U_ICUDATA_NAME 26 #define DATA_TYPE "dat" 27 28 /* ICU package data file format (.dat files) ------------------------------- *** 29 30 Description of the data format after the usual ICU data file header 31 (UDataInfo etc.). 32 33 Format version 1 34 35 A .dat package file contains a simple Table of Contents of item names, 36 followed by the items themselves: 37 38 1. ToC table 39 40 uint32_t count; - number of items 41 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: 42 uint32_t nameOffset; - offset of the item name 43 uint32_t dataOffset; - offset of the item data 44 both are byte offsets from the beginning of the data 45 46 2. item name strings 47 48 All item names are stored as char * strings in one block between the ToC table 49 and the data items. 50 51 3. data items 52 53 The data items are stored following the item names block. 54 Each data item is 16-aligned. 55 The data items are stored in the sorted order of their names. 56 57 Therefore, the top of the name strings block is the offset of the first item, 58 the length of the last item is the difference between its offset and 59 the .dat file length, and the length of all previous items is the difference 60 between its offset and the next one. 61 62 ----------------------------------------------------------------------------- */ 63 64 /* UDataInfo cf. udata.h */ 65 static const UDataInfo dataInfo={ 66 sizeof(UDataInfo), 67 0, 68 69 U_IS_BIG_ENDIAN, 70 U_CHARSET_FAMILY, 71 sizeof(UChar), 72 0, 73 74 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 75 {1, 0, 0, 0}, /* formatVersion */ 76 {3, 0, 0, 0} /* dataVersion */ 77 }; 78 79 static uint32_t maxSize; 80 81 static char stringStore[STRING_STORE_SIZE]; 82 static uint32_t stringTop=0, basenameTotal=0; 83 84 typedef struct { 85 char *pathname, *basename; 86 uint32_t basenameLength, basenameOffset, fileSize, fileOffset; 87 } File; 88 89 #define CHUNK_FILE_COUNT 256 90 static File *files = NULL; 91 static uint32_t fileCount=0; 92 static uint32_t fileMax = 0; 93 94 95 static char *symPrefix = NULL; 96 97 #define LINE_BUFFER_SIZE 512 98 /* prototypes --------------------------------------------------------------- */ 99 100 static void 101 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose); 102 103 static char * 104 allocString(uint32_t length); 105 106 static int 107 compareFiles(const void *file1, const void *file2); 108 109 static char * 110 pathToFullPath(const char *path, const char *source); 111 112 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */ 113 static void 114 fixDirToTreePath(char *s); 115 /* -------------------------------------------------------------------------- */ 116 117 U_CAPI void U_EXPORT2 118 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, 119 const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) { 120 static char buffer[4096]; 121 char *line; 122 char *linePtr; 123 char *s = NULL; 124 UErrorCode errorCode=U_ZERO_ERROR; 125 uint32_t i, fileOffset, basenameOffset, length, nread; 126 FileStream *in, *file; 127 128 line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE); 129 if (line == NULL) { 130 fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE); 131 exit(U_MEMORY_ALLOCATION_ERROR); 132 } 133 134 linePtr = line; 135 136 maxSize = max_size; 137 138 if (destDir == NULL) { 139 destDir = u_getDataDirectory(); 140 } 141 if (name == NULL) { 142 name = COMMON_DATA_NAME; 143 } 144 if (type == NULL) { 145 type = DATA_TYPE; 146 } 147 if (source == NULL) { 148 source = "."; 149 } 150 151 if (dataFile == NULL) { 152 in = T_FileStream_stdin(); 153 } else { 154 in = T_FileStream_open(dataFile, "r"); 155 if(in == NULL) { 156 fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); 157 exit(U_FILE_ACCESS_ERROR); 158 } 159 } 160 161 if (verbose) { 162 if(sourceTOC) { 163 printf("generating %s_%s.c (table of contents source file)\n", name, type); 164 } else { 165 printf("generating %s.%s (common data file with table of contents)\n", name, type); 166 } 167 } 168 169 /* read the list of files and get their lengths */ 170 while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr), 171 LINE_BUFFER_SIZE))!=NULL) { 172 /* remove trailing newline characters and parse space separated items */ 173 if (s != NULL && *s != 0) { 174 line=s; 175 } else { 176 s=line; 177 } 178 while(*s!=0) { 179 if(*s==' ') { 180 *s=0; 181 ++s; 182 break; 183 } else if(*s=='\r' || *s=='\n') { 184 *s=0; 185 break; 186 } 187 ++s; 188 } 189 190 /* check for comment */ 191 192 if (*line == '#') { 193 continue; 194 } 195 196 /* add the file */ 197 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 198 { 199 char *t; 200 while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { 201 *t = U_FILE_SEP_CHAR; 202 } 203 } 204 #endif 205 addFile(getLongPathname(line), name, source, sourceTOC, verbose); 206 } 207 208 uprv_free(linePtr); 209 210 if(in!=T_FileStream_stdin()) { 211 T_FileStream_close(in); 212 } 213 214 if(fileCount==0) { 215 fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile); 216 return; 217 } 218 219 /* sort the files by basename */ 220 qsort(files, fileCount, sizeof(File), compareFiles); 221 222 if(!sourceTOC) { 223 UNewDataMemory *out; 224 225 /* determine the offsets of all basenames and files in this common one */ 226 basenameOffset=4+8*fileCount; 227 fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; 228 for(i=0; i<fileCount; ++i) { 229 files[i].fileOffset=fileOffset; 230 fileOffset+=(files[i].fileSize+15)&~0xf; 231 files[i].basenameOffset=basenameOffset; 232 basenameOffset+=files[i].basenameLength; 233 } 234 235 /* create the output file */ 236 out=udata_create(destDir, type, name, 237 &dataInfo, 238 copyRight == NULL ? U_COPYRIGHT_STRING : copyRight, 239 &errorCode); 240 if(U_FAILURE(errorCode)) { 241 fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n", 242 destDir, name, type, 243 u_errorName(errorCode)); 244 exit(errorCode); 245 } 246 247 /* write the table of contents */ 248 udata_write32(out, fileCount); 249 for(i=0; i<fileCount; ++i) { 250 udata_write32(out, files[i].basenameOffset); 251 udata_write32(out, files[i].fileOffset); 252 } 253 254 /* write the basenames */ 255 for(i=0; i<fileCount; ++i) { 256 udata_writeString(out, files[i].basename, files[i].basenameLength); 257 } 258 length=4+8*fileCount+basenameTotal; 259 260 /* copy the files */ 261 for(i=0; i<fileCount; ++i) { 262 /* pad to 16-align the next file */ 263 length&=0xf; 264 if(length!=0) { 265 udata_writePadding(out, 16-length); 266 } 267 268 if (verbose) { 269 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); 270 } 271 272 /* copy the next file */ 273 file=T_FileStream_open(files[i].pathname, "rb"); 274 if(file==NULL) { 275 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname); 276 exit(U_FILE_ACCESS_ERROR); 277 } 278 for(nread = 0;;) { 279 length=T_FileStream_read(file, buffer, sizeof(buffer)); 280 if(length <= 0) { 281 break; 282 } 283 nread += length; 284 udata_writeBlock(out, buffer, length); 285 } 286 T_FileStream_close(file); 287 length=files[i].fileSize; 288 289 if (nread != files[i].fileSize) { 290 fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); 291 exit(U_FILE_ACCESS_ERROR); 292 } 293 } 294 295 /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */ 296 length&=0xf; 297 if(length!=0) { 298 udata_writePadding(out, 16-length); 299 } 300 301 /* finish */ 302 udata_finish(out, &errorCode); 303 if(U_FAILURE(errorCode)) { 304 fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode)); 305 exit(errorCode); 306 } 307 } else { 308 /* write a .c source file with the table of contents */ 309 char *filename; 310 FileStream *out; 311 312 /* create the output filename */ 313 filename=s=buffer; 314 uprv_strcpy(filename, destDir); 315 s=filename+uprv_strlen(filename); 316 if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) { 317 *s++=U_FILE_SEP_CHAR; 318 } 319 uprv_strcpy(s, name); 320 if(*(type)!=0) { 321 s+=uprv_strlen(s); 322 *s++='_'; 323 uprv_strcpy(s, type); 324 } 325 s+=uprv_strlen(s); 326 uprv_strcpy(s, ".c"); 327 328 /* open the output file */ 329 out=T_FileStream_open(filename, "w"); 330 if (gencmnFileName != NULL) { 331 uprv_strcpy(gencmnFileName, filename); 332 } 333 if(out==NULL) { 334 fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); 335 exit(U_FILE_ACCESS_ERROR); 336 } 337 338 /* write the source file */ 339 sprintf(buffer, 340 "/*\n" 341 " * ICU common data table of contents for %s.%s\n" 342 " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" 343 " */\n\n" 344 "#include \"unicode/utypes.h\"\n" 345 "#include \"unicode/udata.h\"\n" 346 "\n" 347 "/* external symbol declarations for data (%d files) */\n", 348 name, type, fileCount); 349 T_FileStream_writeLine(out, buffer); 350 351 sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); 352 T_FileStream_writeLine(out, buffer); 353 for(i=1; i<fileCount; ++i) { 354 sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname); 355 T_FileStream_writeLine(out, buffer); 356 } 357 T_FileStream_writeLine(out, ";\n\n"); 358 359 sprintf( 360 buffer, 361 "U_EXPORT struct {\n" 362 " uint16_t headerSize;\n" 363 " uint8_t magic1, magic2;\n" 364 " UDataInfo info;\n" 365 " char padding[%lu];\n" 366 " uint32_t count, reserved;\n" 367 " struct {\n" 368 " const char *name;\n" 369 " const void *data;\n" 370 " } toc[%lu];\n" 371 "} U_EXPORT2 %s_dat = {\n" 372 " 32, 0xda, 0x27, {\n" 373 " %lu, 0,\n" 374 " %u, %u, %u, 0,\n" 375 " {0x54, 0x6f, 0x43, 0x50},\n" 376 " {1, 0, 0, 0},\n" 377 " {0, 0, 0, 0}\n" 378 " },\n" 379 " \"\", %lu, 0, {\n", 380 (unsigned long)32-4-sizeof(UDataInfo), 381 (unsigned long)fileCount, 382 entrypointName, 383 (unsigned long)sizeof(UDataInfo), 384 U_IS_BIG_ENDIAN, 385 U_CHARSET_FAMILY, 386 U_SIZEOF_UCHAR, 387 (unsigned long)fileCount 388 ); 389 T_FileStream_writeLine(out, buffer); 390 391 sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname); 392 T_FileStream_writeLine(out, buffer); 393 for(i=1; i<fileCount; ++i) { 394 sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname); 395 T_FileStream_writeLine(out, buffer); 396 } 397 398 T_FileStream_writeLine(out, "\n }\n};\n"); 399 T_FileStream_close(out); 400 401 uprv_free(symPrefix); 402 } 403 } 404 405 static void 406 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) { 407 char *s; 408 uint32_t length; 409 char *fullPath = NULL; 410 411 if(fileCount==fileMax) { 412 fileMax += CHUNK_FILE_COUNT; 413 files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */ 414 if(files==NULL) { 415 fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount); 416 exit(U_MEMORY_ALLOCATION_ERROR); 417 } 418 } 419 420 if(!sourceTOC) { 421 FileStream *file; 422 423 if(uprv_pathIsAbsolute(filename)) { 424 fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename); 425 exit(U_ILLEGAL_ARGUMENT_ERROR); 426 } 427 fullPath = pathToFullPath(filename, source); 428 /* store the pathname */ 429 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); 430 s=allocString(length); 431 uprv_strcpy(s, name); 432 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); 433 uprv_strcat(s, filename); 434 435 /* get the basename */ 436 fixDirToTreePath(s); 437 files[fileCount].basename=s; 438 files[fileCount].basenameLength=length; 439 440 files[fileCount].pathname=fullPath; 441 442 basenameTotal+=length; 443 444 /* try to open the file */ 445 file=T_FileStream_open(fullPath, "rb"); 446 if(file==NULL) { 447 fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath); 448 exit(U_FILE_ACCESS_ERROR); 449 } 450 451 /* get the file length */ 452 length=T_FileStream_size(file); 453 if(T_FileStream_error(file) || length<=20) { 454 fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath); 455 exit(U_FILE_ACCESS_ERROR); 456 } 457 458 T_FileStream_close(file); 459 460 /* do not add files that are longer than maxSize */ 461 if(maxSize && length>maxSize) { 462 if (verbose) { 463 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize); 464 } 465 return; 466 } 467 files[fileCount].fileSize=length; 468 } else { 469 char *t; 470 /* get and store the basename */ 471 /* need to include the package name */ 472 length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); 473 s=allocString(length); 474 uprv_strcpy(s, name); 475 uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); 476 uprv_strcat(s, filename); 477 fixDirToTreePath(s); 478 files[fileCount].basename=s; 479 /* turn the basename into an entry point name and store in the pathname field */ 480 t=files[fileCount].pathname=allocString(length); 481 while(--length>0) { 482 if(*s=='.' || *s=='-' || *s=='/') { 483 *t='_'; 484 } else { 485 *t=*s; 486 } 487 ++s; 488 ++t; 489 } 490 *t=0; 491 } 492 ++fileCount; 493 } 494 495 static char * 496 allocString(uint32_t length) { 497 uint32_t top=stringTop+length; 498 char *p; 499 500 if(top>STRING_STORE_SIZE) { 501 fprintf(stderr, "gencmn: out of memory\n"); 502 exit(U_MEMORY_ALLOCATION_ERROR); 503 } 504 p=stringStore+stringTop; 505 stringTop=top; 506 return p; 507 } 508 509 static char * 510 pathToFullPath(const char *path, const char *source) { 511 int32_t length; 512 int32_t newLength; 513 char *fullPath; 514 int32_t n; 515 516 length = (uint32_t)(uprv_strlen(path) + 1); 517 newLength = (length + 1 + (int32_t)uprv_strlen(source)); 518 fullPath = uprv_malloc(newLength); 519 if(source != NULL) { 520 uprv_strcpy(fullPath, source); 521 uprv_strcat(fullPath, U_FILE_SEP_STRING); 522 } else { 523 fullPath[0] = 0; 524 } 525 n = (int32_t)uprv_strlen(fullPath); 526 fullPath[n] = 0; /* Suppress compiler warning for unused variable n */ 527 /* when conditional code below is not compiled. */ 528 uprv_strcat(fullPath, path); 529 530 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) 531 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) 532 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ 533 for(;fullPath[n];n++) { 534 if(fullPath[n] == U_FILE_ALT_SEP_CHAR) { 535 fullPath[n] = U_FILE_SEP_CHAR; 536 } 537 } 538 #endif 539 #endif 540 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) 541 /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ 542 for(;fullPath[n];n++) { 543 if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) { 544 fullPath[n] = U_FILE_SEP_CHAR; 545 } 546 } 547 #endif 548 return fullPath; 549 } 550 551 static int 552 compareFiles(const void *file1, const void *file2) { 553 /* sort by basename */ 554 return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename); 555 } 556 557 static void 558 fixDirToTreePath(char *s) 559 { 560 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)) 561 char *t; 562 #endif 563 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) 564 for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) { 565 *t = U_TREE_ENTRY_SEP_CHAR; 566 } 567 #endif 568 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) 569 for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) { 570 *t = U_TREE_ENTRY_SEP_CHAR; 571 } 572 #endif 573 } 574