1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 2 * 3 * ***** BEGIN LICENSE BLOCK ***** 4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 7 * 1.1 (the "License"); you may not use this file except in compliance with 8 * the License. You may obtain a copy of the License at 9 * http://www.mozilla.org/MPL/ 10 * 11 * Software distributed under the License is distributed on an "AS IS" basis, 12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 13 * for the specific language governing rights and limitations under the 14 * License. 15 * 16 * The Original Code is msmap2tsv.c code, released 17 * Oct 3, 2002. 18 * 19 * The Initial Developer of the Original Code is 20 * Netscape Communications Corporation. 21 * Portions created by the Initial Developer are Copyright (C) 2002 22 * the Initial Developer. All Rights Reserved. 23 * 24 * Contributor(s): 25 * Garrett Arch Blythe, 03-October-2002 26 * 27 * Alternatively, the contents of this file may be used under the terms of 28 * either the GNU General Public License Version 2 or later (the "GPL"), or 29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 30 * in which case the provisions of the GPL or the LGPL are applicable instead 31 * of those above. If you wish to allow use of your version of this file only 32 * under the terms of either the GPL or the LGPL, and not to allow others to 33 * use your version of this file under the terms of the MPL, indicate your 34 * decision by deleting the provisions above and replace them with the notice 35 * and other provisions required by the GPL or the LGPL. If you do not delete 36 * the provisions above, a recipient may use your version of this file under 37 * the terms of any one of the MPL, the GPL or the LGPL. 38 * 39 * ***** END LICENSE BLOCK ***** */ 40 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <time.h> 45 #include <ctype.h> 46 47 #include "msmap.h" 48 49 #if defined(_WIN32) 50 #include <windows.h> 51 #include <imagehlp.h> 52 53 #define F_DEMANGLE 1 54 #define DEMANGLE_STATE_NORMAL 0 55 #define DEMANGLE_STATE_QDECODE 1 56 #define DEMANGLE_STATE_PROLOGUE_1 2 57 #define DEMANGLE_STATE_HAVE_TYPE 3 58 #define DEMANGLE_STATE_DEC_LENGTH 4 59 #define DEMANGLE_STATE_HEX_LENGTH 5 60 #define DEMANGLE_STATE_PROLOGUE_SECONDARY 6 61 #define DEMANGLE_STATE_DOLLAR_1 7 62 #define DEMANGLE_STATE_DOLLAR_2 8 63 #define DEMANGLE_STATE_START 9 64 #define DEMANGLE_STATE_STOP 10 65 #define DEMANGLE_SAFE_CHAR(eval) (isprint(eval) ? eval : ' ') 66 67 #else 68 #define F_DEMANGLE 0 69 #endif /* WIN32 */ 70 71 72 #define ERROR_REPORT(num, val, msg) fprintf(stderr, "error(%d):\t\"%s\"\t%s\n", (num), (val), (msg)); 73 #define CLEANUP(ptr) do { if(NULL != ptr) { free(ptr); ptr = NULL; } } while(0) 74 75 76 typedef struct __struct_SymDB_Size 77 /* 78 ** The size of the symbol. 79 ** The size is nested withing a symbols structures to produce a fast 80 ** lookup path. 81 ** The objects are listed in case the client of the symdb needs to 82 ** match the object name in the scenario where multiple symbol 83 ** sizes are present. 84 ** 85 ** mSize The size of the symbol in these objects. 86 ** mObjects A list of objects containing said symbol. 87 ** mObjectCount Number of objects. 88 */ 89 { 90 unsigned mSize; 91 char** mObjects; 92 unsigned mObjectCount; 93 } 94 SymDB_Size; 95 96 97 typedef struct __struct_SymDB_Section 98 /* 99 ** Each section for a symbol has a list of sizes. 100 ** Should there be exactly one size for the symbol, then that 101 ** is the size that should be accepted. 102 ** If there is more than one size, then a match on the object 103 ** should be attempted, held withing each size. 104 ** 105 ** mName The section name. 106 ** mSizes The varoius sizes of the symbol in this section. 107 ** mSizeCount The number of available sizes. 108 */ 109 { 110 char* mName; 111 SymDB_Size* mSizes; 112 unsigned mSizeCount; 113 } 114 SymDB_Section; 115 116 117 typedef struct __struct_SymDB_Symbol 118 /* 119 ** Each symbol has at least one section. 120 ** The section indicates what type of symbol a client may be looking for. 121 ** If there is no match on the section, then the client should not trust 122 ** the symbdb. 123 ** 124 ** mName The mangled name of the symbol. 125 ** mSections Various sections this symbol belongs to. 126 ** mSectionCount The number of sections. 127 */ 128 { 129 char* mName; 130 SymDB_Section* mSections; 131 unsigned mSectionCount; 132 } 133 SymDB_Symbol; 134 135 136 #define SYMDB_SYMBOL_GROWBY 0x1000 /* how many sybols to allocate at a time */ 137 138 139 typedef struct __struct_SymDB_Container 140 /* 141 ** The symbol DB container object. 142 ** The goal of the symbol DB is to have exactly one SymDB_Symbol for each 143 ** mangled name, no matter how ever many identical mangled names there 144 ** are in the input. 145 ** The input is already expected to be well sorted, futher this leads to 146 ** the ability to binary search for symbol name matches. 147 ** 148 ** mSymbols The symbols. 149 ** mSymbolCount The number of symbols in the DB. 150 ** mSymbolCapacity The number of symbols we can hold (before realloc). 151 */ 152 { 153 SymDB_Symbol* mSymbols; 154 unsigned mSymbolCount; 155 unsigned mSymbolCapacity; 156 } 157 SymDB_Container; 158 159 160 typedef struct __struct_Options 161 /* 162 ** Options to control how we perform. 163 ** 164 ** mProgramName Used in help text. 165 ** mInput File to read for input. 166 ** Default is stdin. 167 ** mInputName Name of the file. 168 ** mOutput Output file, append. 169 ** Default is stdout. 170 ** mOutputName Name of the file. 171 ** mHelp Whether or not help should be shown. 172 ** mMatchModules Array of strings which the module name should match. 173 ** mMatchModuleCount Number of items in array. 174 ** mSymDBName Symbol DB filename. 175 ** mBatchMode Batch mode. 176 ** When in batch mode, the input file contains a list of 177 ** map files to process. 178 ** Normally the input file is a single map file itself. 179 */ 180 { 181 const char* mProgramName; 182 FILE* mInput; 183 char* mInputName; 184 FILE* mOutput; 185 char* mOutputName; 186 int mHelp; 187 char** mMatchModules; 188 unsigned mMatchModuleCount; 189 char* mSymDBName; 190 SymDB_Container* mSymDB; 191 int mBatchMode; 192 } 193 Options; 194 195 196 typedef struct __struct_Switch 197 /* 198 ** Command line options. 199 */ 200 { 201 const char* mLongName; 202 const char* mShortName; 203 int mHasValue; 204 const char* mValue; 205 const char* mDescription; 206 } 207 Switch; 208 209 #define DESC_NEWLINE "\n\t\t" 210 211 static Switch gInputSwitch = {"--input", "-i", 1, NULL, "Specify input file." DESC_NEWLINE "stdin is default."}; 212 static Switch gOutputSwitch = {"--output", "-o", 1, NULL, "Specify output file." DESC_NEWLINE "Appends if file exists." DESC_NEWLINE "stdout is default."}; 213 static Switch gHelpSwitch = {"--help", "-h", 0, NULL, "Information on usage."}; 214 static Switch gMatchModuleSwitch = {"--match-module", "-mm", 1, NULL, "Specify a valid module name." DESC_NEWLINE "Multiple specifications allowed." DESC_NEWLINE "If a module name does not match one of the names specified then no output will occur."}; 215 static Switch gSymDBSwitch = {"--symdb", "-sdb", 1, NULL, "Specify a symbol tsv db input file." DESC_NEWLINE "Such a symdb is produced using the tool msdump2symdb." DESC_NEWLINE "This allows better symbol size approximations." DESC_NEWLINE "The symdb file must be pre-sorted."}; 216 static Switch gBatchModeSwitch = {"--batch", "-b", 0, NULL, "Runs in batch mode." DESC_NEWLINE "The input file contains a list of map files." DESC_NEWLINE "Normally the input file is a map file itself." DESC_NEWLINE "This eliminates reprocessing the symdb for multiple map files."}; 217 218 static Switch* gSwitches[] = { 219 &gInputSwitch, 220 &gOutputSwitch, 221 &gMatchModuleSwitch, 222 &gSymDBSwitch, 223 &gBatchModeSwitch, 224 &gHelpSwitch 225 }; 226 227 228 typedef struct __struct_MSMap_ReadState 229 /* 230 ** Keep track of what state we are while reading input. 231 ** This gives the input context in which we absorb the datum. 232 */ 233 { 234 int mHasModule; 235 236 int mHasTimestamp; 237 238 int mHasPreferredLoadAddress; 239 240 int mHasSegmentData; 241 int mSegmentDataSkippedLine; 242 243 int mHasPublicSymbolData; 244 int mHasPublicSymbolDataSkippedLines; 245 246 int mHasEntryPoint; 247 248 int mFoundStaticSymbols; 249 } 250 MSMap_ReadState; 251 252 253 char* skipWhite(char* inScan) 254 /* 255 ** Skip whitespace. 256 */ 257 { 258 char* retval = inScan; 259 260 while(isspace(*retval)) 261 { 262 retval++; 263 } 264 265 return retval; 266 } 267 268 void trimWhite(char* inString) 269 /* 270 ** Remove any whitespace from the end of the string. 271 */ 272 { 273 int len = strlen(inString); 274 275 while(len) 276 { 277 len--; 278 279 if(isspace(*(inString + len))) 280 { 281 *(inString + len) = '\0'; 282 } 283 else 284 { 285 break; 286 } 287 } 288 } 289 290 291 char* lastWord(char* inString) 292 /* 293 ** Finds and returns the last word in a string. 294 ** It is assumed no whitespace is at the end of the string. 295 */ 296 { 297 int mod = 0; 298 int len = strlen(inString); 299 300 while(len) 301 { 302 len--; 303 if(isspace(*(inString + len))) 304 { 305 mod = 1; 306 break; 307 } 308 } 309 310 return inString + len + mod; 311 } 312 313 314 MSMap_Segment* getSymbolSection(MSMap_Module* inModule, MSMap_Symbol* inoutSymbol) 315 /* 316 ** Perform a lookup for the section of the symbol. 317 ** The function could cache the value. 318 */ 319 { 320 MSMap_Segment* retval = NULL; 321 322 if(NULL != inoutSymbol->mSection) 323 { 324 /* 325 ** Use cached value. 326 */ 327 retval = inoutSymbol->mSection; 328 } 329 else 330 { 331 unsigned secLoop = 0; 332 333 /* 334 ** Go through sections in module to find the match for the symbol. 335 */ 336 for(secLoop = 0; secLoop < inModule->mSegmentCount; secLoop++) 337 { 338 if(inoutSymbol->mPrefix == inModule->mSegments[secLoop].mPrefix) 339 { 340 if(inoutSymbol->mOffset >= inModule->mSegments[secLoop].mOffset) 341 { 342 if(inoutSymbol->mOffset < (inModule->mSegments[secLoop].mOffset + inModule->mSegments[secLoop].mLength)) 343 { 344 /* 345 ** We have the section. 346 */ 347 retval = &inModule->mSegments[secLoop]; 348 break; 349 } 350 } 351 } 352 } 353 354 /* 355 ** Cache the value for next time. 356 */ 357 inoutSymbol->mSection = retval; 358 } 359 360 return retval; 361 } 362 363 364 int readSymDB(const char* inDBName, SymDB_Container** outDB) 365 /* 366 ** Intialize the symbol DB. 367 ** Only call if the symbol DB should be initialized. 368 */ 369 { 370 int retval = 0; 371 372 /* 373 ** Initialize out arguments. 374 */ 375 if(NULL != outDB) 376 { 377 *outDB = NULL; 378 } 379 380 if(NULL != outDB && NULL != inDBName) 381 { 382 FILE* symDB = NULL; 383 384 symDB = fopen(inDBName, "r"); 385 if(NULL != symDB) 386 { 387 *outDB = (SymDB_Container*)calloc(1, sizeof(SymDB_Container)); 388 if(NULL != *outDB) 389 { 390 char lineBuf[0x400]; 391 char* symbol = NULL; 392 char* section = NULL; 393 char* object = NULL; 394 char* length = NULL; 395 unsigned lengthNum = 0; 396 char* endLength = NULL; 397 398 /* 399 ** Read the file line by line. 400 */ 401 while(0 == retval && NULL != fgets(lineBuf, sizeof(lineBuf), symDB)) 402 { 403 trimWhite(lineBuf); 404 405 /* 406 ** Each line has four arguments. tab separated values (tsv). 407 ** Symbol 408 ** Section 409 ** Length 410 ** Object 411 */ 412 413 symbol = skipWhite(lineBuf); 414 if(NULL == symbol) 415 { 416 retval = __LINE__; 417 ERROR_REPORT(retval, inDBName, "File does not appear to be a symbol DB."); 418 break; 419 } 420 421 section = strchr(symbol, '\t'); 422 if(NULL == section) 423 { 424 retval = __LINE__; 425 ERROR_REPORT(retval, inDBName, "File does not appear to be a symbol DB."); 426 break; 427 } 428 *section = '\0'; 429 section++; 430 431 length = strchr(section, '\t'); 432 if(NULL == length) 433 { 434 retval = __LINE__; 435 ERROR_REPORT(retval, inDBName, "File does not appear to be a symbol DB."); 436 break; 437 } 438 *length = '\0'; 439 length++; 440 441 object = strchr(length, '\t'); 442 if(NULL == object) 443 { 444 retval = __LINE__; 445 ERROR_REPORT(retval, inDBName, "File does not appear to be a symbol DB."); 446 break; 447 } 448 *object = '\0'; 449 object++; 450 451 /* 452 ** Convert the length into a number. 453 */ 454 errno = 0; 455 lengthNum = strtoul(length, &endLength, 16); 456 if(0 == errno && endLength != length) 457 { 458 SymDB_Symbol* dbSymbol = NULL; 459 SymDB_Section* dbSection = NULL; 460 SymDB_Size* dbSize = NULL; 461 char* dbObject = NULL; 462 void* moved = NULL; 463 464 /* 465 ** Are we looking at the same symbol as last line? 466 ** This assumes the symdb is pre sorted!!! 467 */ 468 if(0 != (*outDB)->mSymbolCount) 469 { 470 unsigned index = (*outDB)->mSymbolCount - 1; 471 472 if(0 == strcmp((*outDB)->mSymbols[index].mName, symbol)) 473 { 474 dbSymbol = &(*outDB)->mSymbols[index]; 475 } 476 } 477 478 /* 479 ** May need to create symbol. 480 */ 481 if(NULL == dbSymbol) 482 { 483 /* 484 ** Could be time to grow the symbol pool. 485 */ 486 if((*outDB)->mSymbolCount >= (*outDB)->mSymbolCapacity) 487 { 488 moved = realloc((*outDB)->mSymbols, sizeof(SymDB_Symbol) * ((*outDB)->mSymbolCapacity + SYMDB_SYMBOL_GROWBY)); 489 if(NULL != moved) 490 { 491 (*outDB)->mSymbols = (SymDB_Symbol*)moved; 492 memset(&(*outDB)->mSymbols[(*outDB)->mSymbolCapacity], 0, sizeof(SymDB_Symbol) * SYMDB_SYMBOL_GROWBY); 493 (*outDB)->mSymbolCapacity += SYMDB_SYMBOL_GROWBY; 494 } 495 else 496 { 497 retval = __LINE__; 498 ERROR_REPORT(retval, inDBName, "Unable to grow symbol DB symbol array."); 499 break; 500 } 501 } 502 503 if((*outDB)->mSymbolCount < (*outDB)->mSymbolCapacity) 504 { 505 dbSymbol = &(*outDB)->mSymbols[(*outDB)->mSymbolCount]; 506 (*outDB)->mSymbolCount++; 507 508 dbSymbol->mName = strdup(symbol); 509 if(NULL == dbSymbol->mName) 510 { 511 retval = __LINE__; 512 ERROR_REPORT(retval, symbol, "Unable to duplicate string."); 513 break; 514 } 515 } 516 else 517 { 518 retval = __LINE__; 519 ERROR_REPORT(retval, symbol, "Unable to grow symbol DB for symbol."); 520 break; 521 } 522 } 523 524 /* 525 ** Assume we have the symbol. 526 ** 527 ** Is this the same section as the last section in the symbol? 528 ** This assumes the symdb was presorted!!!! 529 */ 530 if(0 != dbSymbol->mSectionCount) 531 { 532 unsigned index = dbSymbol->mSectionCount - 1; 533 534 if(0 == strcmp(dbSymbol->mSections[index].mName, section)) 535 { 536 dbSection = &dbSymbol->mSections[index]; 537 } 538 } 539 540 /* 541 ** May need to create the section. 542 */ 543 if(NULL == dbSection) 544 { 545 moved = realloc(dbSymbol->mSections, sizeof(SymDB_Section) * (dbSymbol->mSectionCount + 1)); 546 if(NULL != moved) 547 { 548 dbSymbol->mSections = (SymDB_Section*)moved; 549 dbSection = &dbSymbol->mSections[dbSymbol->mSectionCount]; 550 dbSymbol->mSectionCount++; 551 552 memset(dbSection, 0, sizeof(SymDB_Section)); 553 554 dbSection->mName = strdup(section); 555 if(NULL == dbSection->mName) 556 { 557 retval = __LINE__; 558 ERROR_REPORT(retval, section, "Unable to duplicate string."); 559 break; 560 } 561 } 562 else 563 { 564 retval = __LINE__; 565 ERROR_REPORT(retval, section, "Unable to grow symbol sections for symbol DB."); 566 break; 567 } 568 } 569 570 /* 571 ** Assume we have the section. 572 ** 573 ** Is this the same size as the last size? 574 ** This assumes the symdb was presorted!!! 575 */ 576 if(0 != dbSection->mSizeCount) 577 { 578 unsigned index = dbSection->mSizeCount - 1; 579 580 if(dbSection->mSizes[index].mSize == lengthNum) 581 { 582 dbSize = &dbSection->mSizes[index]; 583 } 584 } 585 586 /* 587 ** May need to create the size in question. 588 */ 589 if(NULL == dbSize) 590 { 591 moved = realloc(dbSection->mSizes, sizeof(SymDB_Size) * (dbSection->mSizeCount + 1)); 592 if(NULL != moved) 593 { 594 dbSection->mSizes = (SymDB_Size*)moved; 595 dbSize = &dbSection->mSizes[dbSection->mSizeCount]; 596 dbSection->mSizeCount++; 597 598 memset(dbSize, 0, sizeof(SymDB_Size)); 599 600 dbSize->mSize = lengthNum; 601 } 602 else 603 { 604 retval = __LINE__; 605 ERROR_REPORT(retval, length, "Unable to grow symbol section sizes for symbol DB."); 606 break; 607 } 608 } 609 610 /* 611 ** Assume we have the size. 612 ** 613 ** We assume a one to one correllation between size and object. 614 ** Always try to add the new object name. 615 ** As the symdb is assumed to be sorted, the object names should also be in order. 616 */ 617 moved = realloc(dbSize->mObjects, sizeof(char*) * (dbSize->mObjectCount + 1)); 618 if(NULL != moved) 619 { 620 dbObject = strdup(object); 621 622 dbSize->mObjects = (char**)moved; 623 dbSize->mObjects[dbSize->mObjectCount] = dbObject; 624 dbSize->mObjectCount++; 625 626 if(NULL == dbObject) 627 { 628 retval = __LINE__; 629 ERROR_REPORT(retval, object, "Unable to duplicate string."); 630 break; 631 } 632 } 633 else 634 { 635 retval = __LINE__; 636 ERROR_REPORT(retval, object, "Unable to grow symbol section size objects for symbol DB."); 637 break; 638 } 639 } 640 else 641 { 642 retval = __LINE__; 643 ERROR_REPORT(retval, length, "Unable to convert symbol DB length into a number."); 644 break; 645 } 646 } 647 648 if(0 == retval && 0 != ferror(symDB)) 649 { 650 retval = __LINE__; 651 ERROR_REPORT(retval, inDBName, "Unable to read file."); 652 } 653 } 654 else 655 { 656 retval = __LINE__; 657 ERROR_REPORT(retval, inDBName, "Unable to allocate symbol DB."); 658 } 659 660 fclose(symDB); 661 symDB = NULL; 662 } 663 else 664 { 665 retval = __LINE__; 666 ERROR_REPORT(retval, inDBName, "Unable to open symbol DB."); 667 } 668 } 669 else 670 { 671 retval = __LINE__; 672 ERROR_REPORT(retval, "(NULL)", "Invalid arguments."); 673 } 674 675 return retval; 676 } 677 678 679 void cleanSymDB(SymDB_Container** inDB) 680 /* 681 ** Free it all up. 682 */ 683 { 684 if(NULL != inDB && NULL != *inDB) 685 { 686 unsigned symLoop = 0; 687 unsigned secLoop = 0; 688 unsigned sizLoop = 0; 689 unsigned objLoop = 0; 690 691 for(symLoop = 0; symLoop < (*inDB)->mSymbolCount; symLoop++) 692 { 693 for(secLoop = 0; secLoop < (*inDB)->mSymbols[symLoop].mSectionCount; secLoop++) 694 { 695 for(sizLoop = 0; sizLoop < (*inDB)->mSymbols[symLoop].mSections[secLoop].mSizeCount; sizLoop++) 696 { 697 for(objLoop = 0; objLoop < (*inDB)->mSymbols[symLoop].mSections[secLoop].mSizes[sizLoop].mObjectCount; objLoop++) 698 { 699 CLEANUP((*inDB)->mSymbols[symLoop].mSections[secLoop].mSizes[sizLoop].mObjects[objLoop]); 700 } 701 CLEANUP((*inDB)->mSymbols[symLoop].mSections[secLoop].mSizes[sizLoop].mObjects); 702 } 703 CLEANUP((*inDB)->mSymbols[symLoop].mSections[secLoop].mName); 704 CLEANUP((*inDB)->mSymbols[symLoop].mSections[secLoop].mSizes); 705 } 706 CLEANUP((*inDB)->mSymbols[symLoop].mName); 707 CLEANUP((*inDB)->mSymbols[symLoop].mSections); 708 } 709 CLEANUP((*inDB)->mSymbols); 710 CLEANUP(*inDB); 711 } 712 } 713 714 715 int symDBLookup(const void* inKey, const void* inItem) 716 /* 717 ** bsearch utility routine to find the symbol in the symdb. 718 */ 719 { 720 int retval = 0; 721 const char* key = (const char*)inKey; 722 const SymDB_Symbol* symbol = (const SymDB_Symbol*)inItem; 723 724 retval = strcmp(key, symbol->mName); 725 726 return retval; 727 } 728 729 730 int fillSymbolSizeFromDB(Options* inOptions, MSMap_Module* inModule, MSMap_Symbol* inoutSymbol, const char* inMangledName) 731 /* 732 ** If we have a symbol DB, attempt to determine the real size of the symbol 733 ** up front. 734 ** This helps us later in the game to avoid performing size guesses by 735 ** offset. 736 */ 737 { 738 int retval = 0; 739 740 /* 741 ** May need to initialize symdb. 742 */ 743 if(NULL == inOptions->mSymDB && NULL != inOptions->mSymDBName) 744 { 745 retval = readSymDB(inOptions->mSymDBName, &inOptions->mSymDB); 746 } 747 748 /* 749 ** Optional 750 */ 751 if(0 == retval && NULL != inOptions->mSymDB) 752 { 753 void* match = NULL; 754 755 /* 756 ** Find the symbol. 757 */ 758 match = bsearch(inMangledName, inOptions->mSymDB->mSymbols, inOptions->mSymDB->mSymbolCount, sizeof(SymDB_Symbol), symDBLookup); 759 if(NULL != match) 760 { 761 SymDB_Symbol* symbol = (SymDB_Symbol*)match; 762 unsigned symDBSize = 0; 763 MSMap_Segment* mapSection = NULL; 764 765 /* 766 ** We found the symbol. 767 ** 768 ** See if it has the section in question. 769 */ 770 mapSection = getSymbolSection(inModule, inoutSymbol); 771 if(NULL != mapSection) 772 { 773 unsigned secLoop = 0; 774 775 for(secLoop = 0; secLoop < symbol->mSectionCount; secLoop++) 776 { 777 if(0 == strcmp(mapSection->mSegment, symbol->mSections[secLoop].mName)) 778 { 779 SymDB_Section* section = &symbol->mSections[secLoop]; 780 781 /* 782 ** We have a section match. 783 ** Should there be a single size for the symbol, 784 ** then we just default to that. 785 ** If more than one size, we have to do an 786 ** object match search. 787 ** Should there be no object match, we do nothign. 788 */ 789 if(1 == section->mSizeCount) 790 { 791 symDBSize = section->mSizes[0].mSize; 792 } 793 else 794 { 795 char* mapObject = NULL; 796 797 /* 798 ** Figure out the map object file name. 799 ** Skip any colon. 800 ** If it doesn't have a .obj in it, not worth continuing. 801 */ 802 mapObject = strrchr(inoutSymbol->mObject, ':'); 803 if(NULL == mapObject) 804 { 805 mapObject = inoutSymbol->mObject; 806 } 807 else 808 { 809 mapObject++; /* colon */ 810 } 811 812 if(NULL != strstr(mapObject, ".obj")) 813 { 814 unsigned sizLoop = 0; 815 unsigned objLoop = 0; 816 SymDB_Size* size = NULL; 817 818 for(sizLoop = 0; sizLoop < section->mSizeCount; sizLoop++) 819 { 820 size = §ion->mSizes[sizLoop]; 821 822 for(objLoop = 0; objLoop < size->mObjectCount; objLoop++) 823 { 824 if(NULL != strstr(size->mObjects[objLoop], mapObject)) 825 { 826 /* 827 ** As we matched the object, in a particular section, 828 ** we'll go with this as the number. 829 */ 830 symDBSize = size->mSize; 831 break; 832 } 833 } 834 835 /* 836 ** If the object loop broke early, we break too. 837 */ 838 if(objLoop < size->mObjectCount) 839 { 840 break; 841 } 842 } 843 } 844 } 845 846 break; 847 } 848 } 849 } 850 851 /* 852 ** Put the size in. 853 */ 854 inoutSymbol->mSymDBSize = symDBSize; 855 } 856 } 857 858 return retval; 859 } 860 861 862 char* symdup(const char* inSymbol) 863 /* 864 ** Attempts to demangle the symbol if appropriate. 865 ** Otherwise acts like strdup. 866 */ 867 { 868 char* retval = NULL; 869 870 #if F_DEMANGLE 871 { 872 int isImport = 0; 873 874 if(0 == strncmp("__imp_", inSymbol, 6)) 875 { 876 isImport = __LINE__; 877 inSymbol += 6; 878 } 879 880 if('?' == inSymbol[0]) 881 { 882 char demangleBuf[0x200]; 883 DWORD demangleRes = 0; 884 885 demangleRes = UnDecorateSymbolName(inSymbol, demangleBuf, sizeof(demangleBuf), UNDNAME_COMPLETE); 886 if(0 != demangleRes) 887 { 888 if (strcmp(demangleBuf, "`string'") == 0) 889 { 890 891 /* attempt manual demangling of string prefix.. */ 892 893 /* first make sure we have enough space for the 894 updated string - the demangled string will 895 always be shorter than strlen(inSymbol) and the 896 prologue will always be longer than the 897 "string: " that we tack on the front of the string 898 */ 899 char *curresult = retval = malloc(strlen(inSymbol) + 11); 900 const char *curchar = inSymbol; 901 902 int state = DEMANGLE_STATE_START; 903 904 /* the hex state is for stuff like ?$EA which 905 really means hex value 0x40 */ 906 char hex_state = 0; 907 char string_is_unicode = 0; 908 909 /* sometimes we get a null-termination before the 910 final @ sign - in that case, remember that 911 we've seen the whole string */ 912 int have_null_char = 0; 913 914 /* stick our user-readable prefix on */ 915 strcpy(curresult, "string: \""); 916 curresult += 9; 917 918 while (*curchar) { 919 920 // process current state 921 switch (state) { 922 923 /* the Prologue states are divided up so 924 that someday we can try to decode 925 the random letters in between the '@' 926 signs. Also, some strings only have 2 927 prologue '@' signs, so we have to 928 figure out how to distinguish between 929 them at some point. */ 930 case DEMANGLE_STATE_START: 931 if (*curchar == '@') 932 state = DEMANGLE_STATE_PROLOGUE_1; 933 /* ignore all other states */ 934 break; 935 936 case DEMANGLE_STATE_PROLOGUE_1: 937 switch (*curchar) { 938 case '0': 939 string_is_unicode=0; 940 state = DEMANGLE_STATE_HAVE_TYPE; 941 break; 942 case '1': 943 string_is_unicode=1; 944 state = DEMANGLE_STATE_HAVE_TYPE; 945 break; 946 947 /* ignore all other characters */ 948 } 949 break; 950 951 case DEMANGLE_STATE_HAVE_TYPE: 952 if (*curchar >= '0' && *curchar <= '9') { 953 state = DEMANGLE_STATE_DEC_LENGTH; 954 } else if (*curchar >= 'A' && *curchar <= 'Z') { 955 state = DEMANGLE_STATE_HEX_LENGTH; 956 } 957 case DEMANGLE_STATE_DEC_LENGTH: 958 /* decimal lengths don't have the 2nd 959 field 960 */ 961 if (*curchar == '@') 962 state = DEMANGLE_STATE_NORMAL; 963 break; 964 965 case DEMANGLE_STATE_HEX_LENGTH: 966 /* hex lengths have a 2nd field 967 (though I have no idea what it is for) 968 */ 969 if (*curchar == '@') 970 state = DEMANGLE_STATE_PROLOGUE_SECONDARY; 971 break; 972 973 case DEMANGLE_STATE_PROLOGUE_SECONDARY: 974 if (*curchar == '@') 975 state = DEMANGLE_STATE_NORMAL; 976 break; 977 978 case DEMANGLE_STATE_NORMAL: 979 switch (*curchar) { 980 case '?': 981 state = DEMANGLE_STATE_QDECODE; 982 break; 983 case '@': 984 state = DEMANGLE_STATE_STOP; 985 break; 986 default: 987 *curresult++ = DEMANGLE_SAFE_CHAR(*curchar); 988 state = DEMANGLE_STATE_NORMAL; 989 break; 990 } 991 break; 992 993 /* found a '?' */ 994 case DEMANGLE_STATE_QDECODE: 995 state = DEMANGLE_STATE_NORMAL; 996 997 /* there are certain shortcuts, like 998 "?3" means ":" 999 */ 1000 switch (*curchar) { 1001 case '1': 1002 *curresult++ = '/'; 1003 break; 1004 case '2': 1005 *curresult++ = '\\'; 1006 break; 1007 case '3': 1008 *curresult++ = ':'; 1009 break; 1010 case '4': 1011 *curresult++ = '.'; 1012 break; 1013 case '5': 1014 *curresult++ = ' '; 1015 break; 1016 case '6': 1017 *curresult++ = '\\'; 1018 *curresult++ = 'n'; 1019 break; 1020 case '8': 1021 *curresult++ = '\''; 1022 break; 1023 case '9': 1024 *curresult++ = '-'; 1025 break; 1026 1027 /* any other arbitrary ASCII value can 1028 be stored by prefixing it with ?$ 1029 */ 1030 case '$': 1031 state = DEMANGLE_STATE_DOLLAR_1; 1032 } 1033 break; 1034 1035 case DEMANGLE_STATE_DOLLAR_1: 1036 /* first digit of ?$ notation. All digits 1037 are hex, represented starting with the 1038 capital leter 'A' such that 'A' means 0x0, 1039 'B' means 0x1, 'K' means 0xA 1040 */ 1041 hex_state = (*curchar - 'A') * 0x10; 1042 state = DEMANGLE_STATE_DOLLAR_2; 1043 break; 1044 1045 case DEMANGLE_STATE_DOLLAR_2: 1046 /* same mechanism as above */ 1047 hex_state += (*curchar - 'A'); 1048 if (hex_state) { 1049 *curresult++ = DEMANGLE_SAFE_CHAR(hex_state); 1050 have_null_char = 0; 1051 } 1052 else { 1053 have_null_char = 1; 1054 } 1055 1056 state = DEMANGLE_STATE_NORMAL; 1057 break; 1058 1059 case DEMANGLE_STATE_STOP: 1060 break; 1061 } 1062 1063 curchar++; 1064 } 1065 1066 /* add the appropriate termination depending 1067 if we completed the string or not */ 1068 if (!have_null_char) 1069 strcpy(curresult, "...\""); 1070 else 1071 strcpy(curresult, "\""); 1072 } else { 1073 retval = strdup(demangleBuf); 1074 } 1075 } 1076 else 1077 { 1078 /* 1079 ** fall back to normal. 1080 */ 1081 retval = strdup(inSymbol); 1082 } 1083 } 1084 else if('_' == inSymbol[0]) 1085 { 1086 retval = strdup(inSymbol + 1); 1087 } 1088 else 1089 { 1090 retval = strdup(inSymbol); 1091 } 1092 1093 /* 1094 ** May need to rewrite the symbol if an import. 1095 */ 1096 if(NULL != retval && isImport) 1097 { 1098 const char importPrefix[] = "__declspec(dllimport) "; 1099 char importBuf[0x200]; 1100 int printRes = 0; 1101 1102 printRes = _snprintf(importBuf, sizeof(importBuf), "%s%s", importPrefix, retval); 1103 free(retval); 1104 retval = NULL; 1105 1106 if(printRes > 0) 1107 { 1108 retval = strdup(importBuf); 1109 } 1110 } 1111 } 1112 #else /* F_DEMANGLE */ 1113 retval = strdup(inSymbol); 1114 #endif /* F_DEMANGLE */ 1115 1116 return retval; 1117 } 1118 1119 1120 int readmap(Options* inOptions, MSMap_Module* inModule) 1121 /* 1122 ** Read the input line by line, adding it to the module. 1123 */ 1124 { 1125 int retval = 0; 1126 char lineBuffer[0x400]; 1127 char* current = NULL; 1128 MSMap_ReadState fsm; 1129 int len = 0; 1130 int forceContinue = 0; 1131 1132 memset(&fsm, 0, sizeof(fsm)); 1133 1134 /* 1135 ** Read the map file line by line. 1136 ** We keep a simple state machine to determine what we're looking at. 1137 */ 1138 while(0 == retval && NULL != fgets(lineBuffer, sizeof(lineBuffer), inOptions->mInput)) 1139 { 1140 if(forceContinue) 1141 { 1142 /* 1143 ** Used to skip anticipated blank lines. 1144 */ 1145 forceContinue--; 1146 continue; 1147 } 1148 1149 current = skipWhite(lineBuffer); 1150 trimWhite(current); 1151 1152 len = strlen(current); 1153 1154 if(fsm.mHasModule) 1155 { 1156 if(fsm.mHasTimestamp) 1157 { 1158 if(fsm.mHasPreferredLoadAddress) 1159 { 1160 if(fsm.mHasSegmentData) 1161 { 1162 if(fsm.mHasPublicSymbolData) 1163 { 1164 if(fsm.mHasEntryPoint) 1165 { 1166 if(fsm.mFoundStaticSymbols) 1167 { 1168 /* 1169 ** A blank line means we've reached the end of all static symbols. 1170 */ 1171 if(len) 1172 { 1173 /* 1174 ** We're adding a new symbol. 1175 ** Make sure we have room for it. 1176 */ 1177 if(inModule->mSymbolCapacity == inModule->mSymbolCount) 1178 { 1179 void* moved = NULL; 1180 1181 moved = realloc(inModule->mSymbols, sizeof(MSMap_Symbol) * (inModule->mSymbolCapacity + MSMAP_SYMBOL_GROWBY)); 1182 if(NULL != moved) 1183 { 1184 inModule->mSymbolCapacity += MSMAP_SYMBOL_GROWBY; 1185 inModule->mSymbols = (MSMap_Symbol*)moved; 1186 } 1187 else 1188 { 1189 retval = __LINE__; 1190 ERROR_REPORT(retval, inModule->mModule, "Unable to grow symbols."); 1191 } 1192 } 1193 1194 if(0 == retval && inModule->mSymbolCapacity > inModule->mSymbolCount) 1195 { 1196 MSMap_Symbol* theSymbol = NULL; 1197 unsigned index = 0; 1198 int scanRes = 0; 1199 char symbolBuf[0x200]; 1200 1201 index = inModule->mSymbolCount; 1202 inModule->mSymbolCount++; 1203 theSymbol = (inModule->mSymbols + index); 1204 1205 memset(theSymbol, 0, sizeof(MSMap_Symbol)); 1206 theSymbol->mScope = STATIC; 1207 1208 scanRes = sscanf(current, "%x:%x %s %x", (unsigned*)&(theSymbol->mPrefix), (unsigned*)&(theSymbol->mOffset), symbolBuf, (unsigned*)&(theSymbol->mRVABase)); 1209 if(4 == scanRes) 1210 { 1211 theSymbol->mSymbol = symdup(symbolBuf); 1212 1213 if(0 == retval) 1214 { 1215 if(NULL != theSymbol->mSymbol) 1216 { 1217 char *last = lastWord(current); 1218 1219 theSymbol->mObject = strdup(last); 1220 if(NULL == theSymbol->mObject) 1221 { 1222 retval = __LINE__; 1223 ERROR_REPORT(retval, last, "Unable to copy object name."); 1224 } 1225 } 1226 else 1227 { 1228 retval = __LINE__; 1229 ERROR_REPORT(retval, symbolBuf, "Unable to copy symbol name."); 1230 } 1231 } 1232 } 1233 else 1234 { 1235 retval = __LINE__; 1236 ERROR_REPORT(retval, inModule->mModule, "Unable to scan static symbols."); 1237 } 1238 } 1239 } 1240 else 1241 { 1242 /* 1243 ** All done. 1244 */ 1245 break; 1246 } 1247 } 1248 else 1249 { 1250 /* 1251 ** Static symbols are optional. 1252 ** If no static symbols we're done. 1253 ** Otherwise, set the flag such that it will work more. 1254 */ 1255 if(0 == strcmp(current, "Static symbols")) 1256 { 1257 fsm.mFoundStaticSymbols = __LINE__; 1258 forceContinue = 1; 1259 } 1260 else 1261 { 1262 /* 1263 ** All done. 1264 */ 1265 break; 1266 } 1267 } 1268 } 1269 else 1270 { 1271 int scanRes = 0; 1272 1273 scanRes = sscanf(current, "entry point at %x:%x", (unsigned*)&(inModule->mEntryPrefix), (unsigned*)&(inModule->mEntryOffset)); 1274 if(2 == scanRes) 1275 { 1276 fsm.mHasEntryPoint = __LINE__; 1277 forceContinue = 1; 1278 } 1279 else 1280 { 1281 retval = __LINE__; 1282 ERROR_REPORT(retval, current, "Unable to obtain entry point."); 1283 } 1284 } 1285 } 1286 else 1287 { 1288 /* 1289 ** Skip the N lines of public symbol data (column headers). 1290 */ 1291 if(2 <= fsm.mHasPublicSymbolDataSkippedLines) 1292 { 1293 /* 1294 ** A blank line indicates end of public symbols. 1295 */ 1296 if(len) 1297 { 1298 /* 1299 ** We're adding a new symbol. 1300 ** Make sure we have room for it. 1301 */ 1302 if(inModule->mSymbolCapacity == inModule->mSymbolCount) 1303 { 1304 void* moved = NULL; 1305 1306 moved = realloc(inModule->mSymbols, sizeof(MSMap_Symbol) * (inModule->mSymbolCapacity + MSMAP_SYMBOL_GROWBY)); 1307 if(NULL != moved) 1308 { 1309 inModule->mSymbolCapacity += MSMAP_SYMBOL_GROWBY; 1310 inModule->mSymbols = (MSMap_Symbol*)moved; 1311 } 1312 else 1313 { 1314 retval = __LINE__; 1315 ERROR_REPORT(retval, inModule->mModule, "Unable to grow symbols."); 1316 } 1317 } 1318 1319 if(0 == retval && inModule->mSymbolCapacity > inModule->mSymbolCount) 1320 { 1321 MSMap_Symbol* theSymbol = NULL; 1322 unsigned index = 0; 1323 int scanRes = 0; 1324 char symbolBuf[0x200]; 1325 1326 index = inModule->mSymbolCount; 1327 inModule->mSymbolCount++; 1328 theSymbol = (inModule->mSymbols + index); 1329 1330 memset(theSymbol, 0, sizeof(MSMap_Symbol)); 1331 theSymbol->mScope = PUBLIC; 1332 1333 scanRes = sscanf(current, "%x:%x %s %x", (unsigned*)&(theSymbol->mPrefix), (unsigned*)&(theSymbol->mOffset), symbolBuf, (unsigned *)&(theSymbol->mRVABase)); 1334 if(4 == scanRes) 1335 { 1336 theSymbol->mSymbol = symdup(symbolBuf); 1337 1338 if(NULL != theSymbol->mSymbol) 1339 { 1340 char *last = lastWord(current); 1341 1342 theSymbol->mObject = strdup(last); 1343 if(NULL != theSymbol->mObject) 1344 { 1345 /* 1346 ** Finally, attempt to lookup the actual size of the symbol 1347 ** if there is a symbol DB available. 1348 */ 1349 retval = fillSymbolSizeFromDB(inOptions, inModule, theSymbol, symbolBuf); 1350 } 1351 else 1352 { 1353 retval = __LINE__; 1354 ERROR_REPORT(retval, last, "Unable to copy object name."); 1355 } 1356 } 1357 else 1358 { 1359 retval = __LINE__; 1360 ERROR_REPORT(retval, symbolBuf, "Unable to copy symbol name."); 1361 } 1362 } 1363 else 1364 { 1365 retval = __LINE__; 1366 ERROR_REPORT(retval, inModule->mModule, "Unable to scan public symbols."); 1367 } 1368 } 1369 } 1370 else 1371 { 1372 fsm.mHasPublicSymbolData = __LINE__; 1373 } 1374 } 1375 else 1376 { 1377 fsm.mHasPublicSymbolDataSkippedLines++; 1378 } 1379 } 1380 } 1381 else 1382 { 1383 /* 1384 ** Skip the first line of segment data (column headers). 1385 ** Mark that we've begun grabbing segement data. 1386 */ 1387 if(fsm.mSegmentDataSkippedLine) 1388 { 1389 /* 1390 ** A blank line means end of the segment data. 1391 */ 1392 if(len) 1393 { 1394 /* 1395 ** We're adding a new segment. 1396 ** Make sure we have room for it. 1397 */ 1398 if(inModule->mSegmentCapacity == inModule->mSegmentCount) 1399 { 1400 void* moved = NULL; 1401 1402 moved = realloc(inModule->mSegments, sizeof(MSMap_Segment) * (inModule->mSegmentCapacity + MSMAP_SEGMENT_GROWBY)); 1403 if(NULL != moved) 1404 { 1405 inModule->mSegmentCapacity += MSMAP_SEGMENT_GROWBY; 1406 inModule->mSegments = (MSMap_Segment*)moved; 1407 } 1408 else 1409 { 1410 retval = __LINE__; 1411 ERROR_REPORT(retval, inModule->mModule, "Unable to grow segments."); 1412 } 1413 } 1414 1415 if(0 == retval && inModule->mSegmentCapacity > inModule->mSegmentCount) 1416 { 1417 MSMap_Segment* theSegment = NULL; 1418 unsigned index = 0; 1419 char classBuf[0x10]; 1420 char nameBuf[0x20]; 1421 int scanRes = 0; 1422 1423 index = inModule->mSegmentCount; 1424 inModule->mSegmentCount++; 1425 theSegment = (inModule->mSegments + index); 1426 1427 memset(theSegment, 0, sizeof(MSMap_Segment)); 1428 1429 scanRes = sscanf(current, "%x:%x %xH %s %s", (unsigned*)&(theSegment->mPrefix), (unsigned*)&(theSegment->mOffset), (unsigned*)&(theSegment->mLength), nameBuf, classBuf); 1430 if(5 == scanRes) 1431 { 1432 if('.' == nameBuf[0]) 1433 { 1434 theSegment->mSegment = strdup(&nameBuf[1]); 1435 } 1436 else 1437 { 1438 theSegment->mSegment = strdup(nameBuf); 1439 } 1440 1441 if(NULL != theSegment->mSegment) 1442 { 1443 if(0 == strcmp("DATA", classBuf)) 1444 { 1445 theSegment->mClass = DATA; 1446 } 1447 else if(0 == strcmp("CODE", classBuf)) 1448 { 1449 theSegment->mClass = CODE; 1450 } 1451 else 1452 { 1453 retval = __LINE__; 1454 ERROR_REPORT(retval, classBuf, "Unrecognized segment class."); 1455 } 1456 } 1457 else 1458 { 1459 retval = __LINE__; 1460 ERROR_REPORT(retval, nameBuf, "Unable to copy segment name."); 1461 } 1462 } 1463 else 1464 { 1465 retval = __LINE__; 1466 ERROR_REPORT(retval, inModule->mModule, "Unable to scan segments."); 1467 } 1468 } 1469 } 1470 else 1471 { 1472 fsm.mHasSegmentData = __LINE__; 1473 } 1474 } 1475 else 1476 { 1477 fsm.mSegmentDataSkippedLine = __LINE__; 1478 } 1479 } 1480 } 1481 else 1482 { 1483 int scanRes = 0; 1484 1485 /* 1486 ** The PLA has a particular format. 1487 */ 1488 scanRes = sscanf(current, "Preferred load address is %x", (unsigned*)&(inModule->mPreferredLoadAddress)); 1489 if(1 == scanRes) 1490 { 1491 fsm.mHasPreferredLoadAddress = __LINE__; 1492 forceContinue = 1; 1493 } 1494 else 1495 { 1496 retval = __LINE__; 1497 ERROR_REPORT(retval, current, "Unable to obtain preferred load address."); 1498 } 1499 } 1500 } 1501 else 1502 { 1503 int scanRes = 0; 1504 1505 /* 1506 ** The timestamp has a particular format. 1507 */ 1508 scanRes = sscanf(current, "Timestamp is %x", (unsigned*)&(inModule->mTimestamp)); 1509 if(1 == scanRes) 1510 { 1511 fsm.mHasTimestamp = __LINE__; 1512 forceContinue = 1; 1513 } 1514 else 1515 { 1516 retval = __LINE__; 1517 ERROR_REPORT(retval, current, "Unable to obtain timestamp."); 1518 } 1519 } 1520 } 1521 else 1522 { 1523 /* 1524 ** The module is on a line by itself. 1525 */ 1526 inModule->mModule = strdup(current); 1527 if(NULL != inModule->mModule) 1528 { 1529 fsm.mHasModule = __LINE__; 1530 forceContinue = 1; 1531 1532 if(0 != inOptions->mMatchModuleCount) 1533 { 1534 unsigned matchLoop = 0; 1535 1536 /* 1537 ** If this module name doesn't match, then bail. 1538 ** Compare in a case sensitive manner, exact match only. 1539 */ 1540 for(matchLoop = 0; matchLoop < inOptions->mMatchModuleCount; matchLoop++) 1541 { 1542 if(0 == strcmp(inModule->mModule, inOptions->mMatchModules[matchLoop])) 1543 { 1544 break; 1545 } 1546 } 1547 1548 if(matchLoop == inOptions->mMatchModuleCount) 1549 { 1550 /* 1551 ** A match did not occur, bail out of read loop. 1552 ** No error, however. 1553 */ 1554 break; 1555 } 1556 } 1557 } 1558 else 1559 { 1560 retval = __LINE__; 1561 ERROR_REPORT(retval, current, "Unable to obtain module."); 1562 } 1563 } 1564 } 1565 1566 if(0 == retval && 0 != ferror(inOptions->mInput)) 1567 { 1568 retval = __LINE__; 1569 ERROR_REPORT(retval, inOptions->mInputName, "Unable to read file."); 1570 } 1571 1572 return retval; 1573 } 1574 1575 1576 static int qsortRVABase(const void* in1, const void* in2) 1577 /* 1578 ** qsort callback to sort the symbols by their RVABase. 1579 */ 1580 { 1581 MSMap_Symbol* sym1 = (MSMap_Symbol*)in1; 1582 MSMap_Symbol* sym2 = (MSMap_Symbol*)in2; 1583 int retval = 0; 1584 1585 if(sym1->mRVABase < sym2->mRVABase) 1586 { 1587 retval = -1; 1588 } 1589 else if(sym1->mRVABase > sym2->mRVABase) 1590 { 1591 retval = 1; 1592 } 1593 1594 return retval; 1595 } 1596 1597 1598 static int tsvout(Options* inOptions, unsigned inSize, MSMap_SegmentClass inClass, MSMap_SymbolScope inScope, const char* inModule, const char* inSegment, const char* inObject, const char* inSymbol) 1599 /* 1600 ** Output a line of map information separated by tabs. 1601 ** Some items (const char*), if not present, will receive a default value. 1602 */ 1603 { 1604 int retval = 0; 1605 1606 /* 1607 ** No need to output on no size. 1608 ** This can happen with zero sized segments, 1609 ** or an imported symbol which has multiple names (one will count). 1610 */ 1611 if(0 != inSize) 1612 { 1613 char objectBuf[0x100]; 1614 const char* symScope = NULL; 1615 const char* segClass = NULL; 1616 const char* undefined = "UNDEF"; 1617 1618 /* 1619 ** Fill in unspecified values. 1620 */ 1621 if(NULL == inObject) 1622 { 1623 sprintf(objectBuf, "%s:%s:%s", undefined, inModule, inSegment); 1624 inObject = objectBuf; 1625 } 1626 if(NULL == inSymbol) 1627 { 1628 inSymbol = inObject; 1629 } 1630 1631 /* 1632 ** Convert some enumerations to text. 1633 */ 1634 switch(inClass) 1635 { 1636 case CODE: 1637 segClass = "CODE"; 1638 break; 1639 case DATA: 1640 segClass = "DATA"; 1641 break; 1642 default: 1643 retval = __LINE__; 1644 ERROR_REPORT(retval, "", "Unable to determine class for output."); 1645 break; 1646 } 1647 1648 switch(inScope) 1649 { 1650 case PUBLIC: 1651 symScope = "PUBLIC"; 1652 break; 1653 case STATIC: 1654 symScope = "STATIC"; 1655 break; 1656 case UNDEFINED: 1657 symScope = undefined; 1658 break; 1659 default: 1660 retval = __LINE__; 1661 ERROR_REPORT(retval, "", "Unable to determine scope for symbol."); 1662 break; 1663 } 1664 1665 if(0 == retval) 1666 { 1667 int printRes = 0; 1668 1669 printRes = fprintf(inOptions->mOutput, 1670 "%.8X\t%s\t%s\t%s\t%s\t%s\t%s\n", 1671 inSize, 1672 segClass, 1673 symScope, 1674 inModule, 1675 inSegment, 1676 inObject, 1677 inSymbol 1678 ); 1679 1680 if(0 > printRes) 1681 { 1682 retval = __LINE__; 1683 ERROR_REPORT(retval, inOptions->mOutputName, "Unable to output tsv data."); 1684 } 1685 } 1686 } 1687 1688 return retval; 1689 } 1690 1691 1692 void cleanModule(MSMap_Module* inModule) 1693 { 1694 unsigned loop = 0; 1695 1696 for(loop = 0; loop < inModule->mSymbolCount; loop++) 1697 { 1698 CLEANUP(inModule->mSymbols[loop].mObject); 1699 CLEANUP(inModule->mSymbols[loop].mSymbol); 1700 } 1701 CLEANUP(inModule->mSymbols); 1702 1703 for(loop = 0; loop < inModule->mSegmentCount; loop++) 1704 { 1705 CLEANUP(inModule->mSegments[loop].mSegment); 1706 } 1707 CLEANUP(inModule->mSegments); 1708 1709 CLEANUP(inModule->mModule); 1710 1711 memset(inModule, 0, sizeof(MSMap_Module)); 1712 } 1713 1714 1715 int map2tsv(Options* inOptions) 1716 /* 1717 ** Read all input. 1718 ** Output tab separated value data. 1719 */ 1720 { 1721 int retval = 0; 1722 MSMap_Module module; 1723 1724 memset(&module, 0, sizeof(module)); 1725 1726 /* 1727 ** Read in the map file. 1728 */ 1729 retval = readmap(inOptions, &module); 1730 if(0 == retval) 1731 { 1732 unsigned symLoop = 0; 1733 MSMap_Symbol* symbol = NULL; 1734 unsigned secLoop = 0; 1735 MSMap_Segment* section = NULL; 1736 unsigned size = 0; 1737 unsigned dbSize = 0; 1738 unsigned offsetSize = 0; 1739 unsigned endOffset = 0; 1740 1741 /* 1742 ** Quick sort the symbols via RVABase. 1743 */ 1744 qsort(module.mSymbols, module.mSymbolCount, sizeof(MSMap_Symbol), qsortRVABase); 1745 1746 /* 1747 ** Go through all the symbols (in order by sort). 1748 ** Output their sizes. 1749 */ 1750 for(symLoop = 0; 0 == retval && symLoop < module.mSymbolCount; symLoop++) 1751 { 1752 symbol = &module.mSymbols[symLoop]; 1753 section = getSymbolSection(&module, symbol); 1754 if (!section) 1755 continue; 1756 1757 /* 1758 ** Use the symbol DB size if available. 1759 */ 1760 dbSize = symbol->mSymDBSize; 1761 1762 /* 1763 ** Guess using offsets. 1764 ** Is there a next symbol available? If so, its start offset is the end of this symbol. 1765 ** Otherwise, our section offset + length is the end of this symbol. 1766 ** 1767 ** The trick is, the DB size can not go beyond the offset size, for sanity. 1768 */ 1769 1770 /* 1771 ** Try next symbol, but only if in same section. 1772 ** If still not, use the end of the segment. 1773 ** This implies we were the last symbol in the segment. 1774 */ 1775 if((symLoop + 1) < module.mSymbolCount) 1776 { 1777 MSMap_Symbol* nextSymbol = NULL; 1778 MSMap_Segment* nextSection = NULL; 1779 1780 nextSymbol = &module.mSymbols[symLoop + 1]; 1781 nextSection = getSymbolSection(&module, nextSymbol); 1782 1783 if(section == nextSection) 1784 { 1785 endOffset = nextSymbol->mOffset; 1786 } 1787 else 1788 { 1789 endOffset = section->mOffset + section->mLength; 1790 } 1791 } 1792 else 1793 { 1794 endOffset = section->mOffset + section->mLength; 1795 } 1796 1797 /* 1798 ** Can now guess at size. 1799 */ 1800 offsetSize = endOffset - symbol->mOffset; 1801 1802 /* 1803 ** Now, determine which size to use. 1804 ** This is really a sanity check as well. 1805 */ 1806 size = offsetSize; 1807 if(0 != dbSize) 1808 { 1809 if(dbSize < offsetSize) 1810 { 1811 size = dbSize; 1812 } 1813 } 1814 1815 /* 1816 ** Output the symbol with the size. 1817 */ 1818 retval = tsvout(inOptions, 1819 size, 1820 section->mClass, 1821 symbol->mScope, 1822 module.mModule, 1823 section->mSegment, 1824 symbol->mObject, 1825 symbol->mSymbol 1826 ); 1827 1828 /* 1829 ** Make sure we mark this amount of space as used in the section. 1830 */ 1831 section->mUsed += size; 1832 } 1833 1834 /* 1835 ** Go through the sections, and those whose length is longer than the 1836 ** amount of space used, output dummy filler values. 1837 */ 1838 for(secLoop = 0; 0 == retval && secLoop < module.mSegmentCount; secLoop++) 1839 { 1840 section = &module.mSegments[secLoop]; 1841 1842 if(section && section->mUsed < section->mLength) 1843 { 1844 retval = tsvout(inOptions, 1845 section->mLength - section->mUsed, 1846 section->mClass, 1847 UNDEFINED, 1848 module.mModule, 1849 section->mSegment, 1850 NULL, 1851 NULL 1852 ); 1853 } 1854 } 1855 } 1856 1857 /* 1858 ** Cleanup. 1859 */ 1860 cleanModule(&module); 1861 1862 return retval; 1863 } 1864 1865 1866 int initOptions(Options* outOptions, int inArgc, char** inArgv) 1867 /* 1868 ** returns int 0 if successful. 1869 */ 1870 { 1871 int retval = 0; 1872 int loop = 0; 1873 int switchLoop = 0; 1874 int match = 0; 1875 const int switchCount = sizeof(gSwitches) / sizeof(gSwitches[0]); 1876 Switch* current = NULL; 1877 1878 /* 1879 ** Set any defaults. 1880 */ 1881 memset(outOptions, 0, sizeof(Options)); 1882 outOptions->mProgramName = inArgv[0]; 1883 outOptions->mInput = stdin; 1884 outOptions->mInputName = strdup("stdin"); 1885 outOptions->mOutput = stdout; 1886 outOptions->mOutputName = strdup("stdout"); 1887 1888 if(NULL == outOptions->mOutputName || NULL == outOptions->mInputName) 1889 { 1890 retval = __LINE__; 1891 ERROR_REPORT(retval, "stdin/stdout", "Unable to strdup."); 1892 } 1893 1894 /* 1895 ** Go through and attempt to do the right thing. 1896 */ 1897 for(loop = 1; loop < inArgc && 0 == retval; loop++) 1898 { 1899 match = 0; 1900 current = NULL; 1901 1902 for(switchLoop = 0; switchLoop < switchCount && 0 == retval; switchLoop++) 1903 { 1904 if(0 == strcmp(gSwitches[switchLoop]->mLongName, inArgv[loop])) 1905 { 1906 match = __LINE__; 1907 } 1908 else if(0 == strcmp(gSwitches[switchLoop]->mShortName, inArgv[loop])) 1909 { 1910 match = __LINE__; 1911 } 1912 1913 if(match) 1914 { 1915 if(gSwitches[switchLoop]->mHasValue) 1916 { 1917 /* 1918 ** Attempt to absorb next option to fullfill value. 1919 */ 1920 if(loop + 1 < inArgc) 1921 { 1922 loop++; 1923 1924 current = gSwitches[switchLoop]; 1925 current->mValue = inArgv[loop]; 1926 } 1927 } 1928 else 1929 { 1930 current = gSwitches[switchLoop]; 1931 } 1932 1933 break; 1934 } 1935 } 1936 1937 if(0 == match) 1938 { 1939 outOptions->mHelp = __LINE__; 1940 retval = __LINE__; 1941 ERROR_REPORT(retval, inArgv[loop], "Unknown command line switch."); 1942 } 1943 else if(NULL == current) 1944 { 1945 outOptions->mHelp = __LINE__; 1946 retval = __LINE__; 1947 ERROR_REPORT(retval, inArgv[loop], "Command line switch requires a value."); 1948 } 1949 else 1950 { 1951 /* 1952 ** Do something based on address/swtich. 1953 */ 1954 if(current == &gInputSwitch) 1955 { 1956 CLEANUP(outOptions->mInputName); 1957 if(NULL != outOptions->mInput && stdin != outOptions->mInput) 1958 { 1959 fclose(outOptions->mInput); 1960 outOptions->mInput = NULL; 1961 } 1962 1963 outOptions->mInput = fopen(current->mValue, "r"); 1964 if(NULL == outOptions->mInput) 1965 { 1966 retval = __LINE__; 1967 ERROR_REPORT(retval, current->mValue, "Unable to open input file."); 1968 } 1969 else 1970 { 1971 outOptions->mInputName = strdup(current->mValue); 1972 if(NULL == outOptions->mInputName) 1973 { 1974 retval = __LINE__; 1975 ERROR_REPORT(retval, current->mValue, "Unable to strdup."); 1976 } 1977 } 1978 } 1979 else if(current == &gOutputSwitch) 1980 { 1981 CLEANUP(outOptions->mOutputName); 1982 if(NULL != outOptions->mOutput && stdout != outOptions->mOutput) 1983 { 1984 fclose(outOptions->mOutput); 1985 outOptions->mOutput = NULL; 1986 } 1987 1988 outOptions->mOutput = fopen(current->mValue, "a"); 1989 if(NULL == outOptions->mOutput) 1990 { 1991 retval = __LINE__; 1992 ERROR_REPORT(retval, current->mValue, "Unable to open output file."); 1993 } 1994 else 1995 { 1996 outOptions->mOutputName = strdup(current->mValue); 1997 if(NULL == outOptions->mOutputName) 1998 { 1999 retval = __LINE__; 2000 ERROR_REPORT(retval, current->mValue, "Unable to strdup."); 2001 } 2002 } 2003 } 2004 else if(current == &gHelpSwitch) 2005 { 2006 outOptions->mHelp = __LINE__; 2007 } 2008 else if(current == &gMatchModuleSwitch) 2009 { 2010 void* moved = NULL; 2011 2012 /* 2013 ** Add the value to the list of allowed module names. 2014 */ 2015 moved = realloc(outOptions->mMatchModules, sizeof(char*) * (outOptions->mMatchModuleCount + 1)); 2016 if(NULL != moved) 2017 { 2018 outOptions->mMatchModules = (char**)moved; 2019 outOptions->mMatchModules[outOptions->mMatchModuleCount] = strdup(current->mValue); 2020 if(NULL != outOptions->mMatchModules[outOptions->mMatchModuleCount]) 2021 { 2022 outOptions->mMatchModuleCount++; 2023 } 2024 else 2025 { 2026 retval = __LINE__; 2027 ERROR_REPORT(retval, current->mValue, "Unable to duplicate string."); 2028 } 2029 } 2030 else 2031 { 2032 retval = __LINE__; 2033 ERROR_REPORT(retval, current->mValue, "Unable to allocate space for string."); 2034 } 2035 } 2036 else if(current == &gSymDBSwitch) 2037 { 2038 CLEANUP(outOptions->mSymDBName); 2039 outOptions->mSymDBName = strdup(current->mValue); 2040 if(NULL == outOptions->mSymDBName) 2041 { 2042 retval = __LINE__; 2043 ERROR_REPORT(retval, current->mValue, "Unable to duplicate symbol db name."); 2044 } 2045 } 2046 else if(current == &gBatchModeSwitch) 2047 { 2048 outOptions->mBatchMode = __LINE__; 2049 } 2050 else 2051 { 2052 retval = __LINE__; 2053 ERROR_REPORT(retval, current->mLongName, "No handler for command line switch."); 2054 } 2055 } 2056 } 2057 2058 return retval; 2059 } 2060 2061 2062 void cleanOptions(Options* inOptions) 2063 /* 2064 ** Clean up any open handles, et. al. 2065 */ 2066 { 2067 CLEANUP(inOptions->mInputName); 2068 if(NULL != inOptions->mInput && stdin != inOptions->mInput) 2069 { 2070 fclose(inOptions->mInput); 2071 } 2072 CLEANUP(inOptions->mOutputName); 2073 if(NULL != inOptions->mOutput && stdout != inOptions->mOutput) 2074 { 2075 fclose(inOptions->mOutput); 2076 } 2077 while(0 != inOptions->mMatchModuleCount) 2078 { 2079 inOptions->mMatchModuleCount--; 2080 CLEANUP(inOptions->mMatchModules[inOptions->mMatchModuleCount]); 2081 } 2082 CLEANUP(inOptions->mMatchModules); 2083 2084 cleanSymDB(&inOptions->mSymDB); 2085 2086 memset(inOptions, 0, sizeof(Options)); 2087 } 2088 2089 2090 void showHelp(Options* inOptions) 2091 /* 2092 ** Show some simple help text on usage. 2093 */ 2094 { 2095 int loop = 0; 2096 const int switchCount = sizeof(gSwitches) / sizeof(gSwitches[0]); 2097 const char* valueText = NULL; 2098 2099 printf("usage:\t%s [arguments]\n", inOptions->mProgramName); 2100 printf("\n"); 2101 printf("arguments:\n"); 2102 2103 for(loop = 0; loop < switchCount; loop++) 2104 { 2105 if(gSwitches[loop]->mHasValue) 2106 { 2107 valueText = " <value>"; 2108 } 2109 else 2110 { 2111 valueText = ""; 2112 } 2113 2114 printf("\t%s%s\n", gSwitches[loop]->mLongName, valueText); 2115 printf("\t %s%s", gSwitches[loop]->mShortName, valueText); 2116 printf(DESC_NEWLINE "%s\n\n", gSwitches[loop]->mDescription); 2117 } 2118 2119 printf("This tool normalizes MS linker .map files for use by other tools.\n"); 2120 } 2121 2122 2123 int batchMode(Options* inOptions) 2124 /* 2125 ** Batch mode means that the input file is actually a list of map files. 2126 ** We simply swap out our input file names while we do this. 2127 */ 2128 { 2129 int retval = 0; 2130 char lineBuf[0x400]; 2131 FILE* realInput = NULL; 2132 char* realInputName = NULL; 2133 FILE* mapFile = NULL; 2134 int finalRes = 0; 2135 2136 realInput = inOptions->mInput; 2137 realInputName = inOptions->mInputName; 2138 2139 while(0 == retval && NULL != fgets(lineBuf, sizeof(lineBuf), realInput)) 2140 { 2141 trimWhite(lineBuf); 2142 2143 /* 2144 ** Skip/allow blank lines. 2145 */ 2146 if('\0' == lineBuf[0]) 2147 { 2148 continue; 2149 } 2150 2151 /* 2152 ** Override what we believe to be the input for this line. 2153 */ 2154 inOptions->mInputName = lineBuf; 2155 inOptions->mInput = fopen(lineBuf, "r"); 2156 if(NULL != inOptions->mInput) 2157 { 2158 int mapRes = 0; 2159 2160 /* 2161 ** Do it. 2162 */ 2163 mapRes = map2tsv(inOptions); 2164 2165 /* 2166 ** We report the first error that we encounter, but we continue. 2167 ** This is batch mode after all. 2168 */ 2169 if(0 == finalRes) 2170 { 2171 finalRes = mapRes; 2172 } 2173 2174 /* 2175 ** Close the input file. 2176 */ 2177 fclose(inOptions->mInput); 2178 } 2179 else 2180 { 2181 retval = __LINE__; 2182 ERROR_REPORT(retval, lineBuf, "Unable to open map file."); 2183 break; 2184 } 2185 } 2186 2187 if(0 == retval && 0 != ferror(realInput)) 2188 { 2189 retval = __LINE__; 2190 ERROR_REPORT(retval, realInputName, "Unable to read file."); 2191 } 2192 2193 /* 2194 ** Restore what we've swapped. 2195 */ 2196 inOptions->mInput = realInput; 2197 inOptions->mInputName = realInputName; 2198 2199 /* 2200 ** Report first map file error if there were no other operational 2201 ** problems. 2202 */ 2203 if(0 == retval) 2204 { 2205 retval = finalRes; 2206 } 2207 2208 return retval; 2209 } 2210 2211 2212 int main(int inArgc, char** inArgv) 2213 { 2214 int retval = 0; 2215 Options options; 2216 2217 retval = initOptions(&options, inArgc, inArgv); 2218 if(options.mHelp) 2219 { 2220 showHelp(&options); 2221 } 2222 else if(0 == retval) 2223 { 2224 if(options.mBatchMode) 2225 { 2226 retval = batchMode(&options); 2227 } 2228 else 2229 { 2230 retval = map2tsv(&options); 2231 } 2232 } 2233 2234 cleanOptions(&options); 2235 return retval; 2236 } 2237 2238