1 /** 2 * Copyright(c) 2011 Trusted Logic. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name Trusted Logic nor the names of its 15 * contributors may be used to endorse or promote products derived 16 * from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include "lib_manifest2.h" 31 #include <string.h> 32 33 #define CHAR_CR (uint8_t)0x0D 34 #define CHAR_LF (uint8_t)0x0A 35 #define CHAR_TAB (uint8_t)0x09 36 37 #ifdef LIB_TOOL_IMPLEMENTATION 38 #include "exos_trace.h" 39 #define LOG_ERROR(pContext, msg, ...) log_error("%s - line %d: " msg, pContext->pManifestName, pContext->nLine, __VA_ARGS__) 40 static void log_error(const char* msg, ...) 41 { 42 va_list arg_list; 43 va_start(arg_list, msg); 44 exosTraceVPrintf("LIB_MANIFEST2", EXOS_TRACE_ORG_APPLI, K_PRINT_ERROR_LOG, msg, &arg_list); 45 va_end(arg_list); 46 } 47 #else 48 /* No error messages on the target */ 49 #ifdef __SYMBIAN32__ 50 #define LOG_ERROR(pContext...) 51 #else 52 #define LOG_ERROR(...) 53 #endif 54 #endif 55 56 void libManifest2InitContext( 57 LIB_MANIFEST2_CONTEXT* pContext) 58 { 59 pContext->nOffset = 0; 60 pContext->nLine = 1; 61 pContext->nSectionStartOffset = 0; 62 } 63 64 65 #define CHARACTER_NAME_FIRST 1 66 #define CHARACTER_NAME_SUBSEQUENT 2 67 #define CHARACTER_SECTION_NAME 3 68 69 static bool static_checkCharacter(uint8_t x, uint32_t nType) 70 { 71 /* [A-Za-z0-9] is acceptable for everyone */ 72 if (x >= (uint8_t)'a' && x <= (uint8_t)'z') 73 { 74 return true; 75 } 76 if (x >=(uint8_t)'A' && x <= (uint8_t)'Z') 77 { 78 return true; 79 } 80 if (x >= (uint8_t)'0' && x <= (uint8_t)'9') 81 { 82 return true; 83 } 84 if (nType == CHARACTER_NAME_FIRST) 85 { 86 return false; 87 } 88 /* Subsequent property name or section name characters can be [_.-] */ 89 if (x == (uint8_t)'_' || x == (uint8_t)'.' || x == (uint8_t)'-') 90 { 91 return true; 92 } 93 if (nType == CHARACTER_NAME_SUBSEQUENT) 94 { 95 return false; 96 } 97 /* Space is also allowed in section names */ 98 if (x == (uint8_t)' ') 99 { 100 return true; 101 } 102 return false; 103 } 104 105 static bool static_sectionNameEqualCaseInsensitive( 106 uint8_t* pName1, 107 uint32_t nName1Length, 108 uint8_t* pName2, 109 uint32_t nName2Length) 110 { 111 uint32_t i; 112 if (nName1Length != nName2Length) 113 { 114 return false; 115 } 116 for (i = 0; i < nName1Length; i++) 117 { 118 uint8_t x1 = pName1[i]; 119 uint8_t x2 = pName2[i]; 120 121 /* This code assumes the characters have been checked before */ 122 123 if ((x1 & ~0x20) != (x2 & ~0x20)) 124 { 125 return false; 126 } 127 } 128 return true; 129 } 130 131 static S_RESULT static_libManifest2GetNextItemInternal( 132 LIB_MANIFEST2_CONTEXT* pContext, 133 OUT uint8_t** ppName, 134 OUT uint32_t* pNameLength, 135 OUT uint8_t** ppValue, 136 OUT uint32_t* pValueLength) 137 { 138 S_RESULT nResult = S_ERROR_BAD_FORMAT; 139 uint8_t* pCurrent = pContext->pManifestContent + pContext->nOffset; 140 uint8_t* pEnd = pContext->pManifestContent + pContext->nManifestLength; 141 uint8_t* pLastNonWhitespaceChar; 142 uint32_t nCurrentSequenceCount; 143 uint32_t nCurrentChar; 144 145 if (pContext->nType != LIB_MANIFEST2_TYPE_COMPILED) 146 { 147 /* Skip leading BOM if we're at the start */ 148 if (pCurrent == pContext->pManifestContent) 149 { 150 /* We're at the start. Skip leading BOM if present */ 151 /* Note that the UTF-8 encoding of the BOM marker is EF BB BF */ 152 if (pContext->nManifestLength >= 3 153 && pCurrent[0] == 0xEF 154 && pCurrent[1] == 0xBB 155 && pCurrent[2] == 0xBF) 156 { 157 pCurrent += 3; 158 } 159 } 160 /* Skip comments and newlines */ 161 while (pCurrent < pEnd) 162 { 163 if (*pCurrent == (uint8_t)'#') 164 { 165 /* This is the start of a comment. Skip until end of line or end of file */ 166 pCurrent++; 167 while (pCurrent < pEnd && *pCurrent != CHAR_LF && *pCurrent != CHAR_CR) 168 { 169 if (*pCurrent == 0) 170 { 171 LOG_ERROR(pContext, "NUL character forbidden"); 172 goto error; 173 } 174 pCurrent++; 175 } 176 } 177 else if (*pCurrent == CHAR_CR) 178 { 179 /* Check if a LF follows */ 180 pCurrent++; 181 if (pCurrent < pEnd && *pCurrent == CHAR_LF) 182 { 183 pCurrent++; 184 } 185 pContext->nLine++; 186 } 187 else if (*pCurrent == CHAR_LF) 188 { 189 pCurrent++; 190 pContext->nLine++; 191 } 192 else if (*pCurrent == ' ' || *pCurrent == '\t') 193 { 194 /* this is the start of a all-whitespace line */ 195 /* NOTE: this is not allowed by the current spec: spec update needed */ 196 pCurrent++; 197 while (pCurrent < pEnd) 198 { 199 if (*pCurrent == CHAR_LF || *pCurrent == CHAR_CR) 200 { 201 /* End-of-line reached */ 202 break; 203 } 204 if (! (*pCurrent == ' ' || *pCurrent == '\t')) 205 { 206 LOG_ERROR(pContext, "A line starting with whitespaces must contain only whitespaces. Illegal character: 0x%02X", *pCurrent); 207 goto error; 208 } 209 pCurrent++; 210 } 211 } 212 else 213 { 214 break; 215 } 216 } 217 } 218 219 if (pCurrent >= pEnd) 220 { 221 /* No more properties */ 222 nResult = S_ERROR_ITEM_NOT_FOUND; 223 goto error; 224 } 225 226 if (pContext->nType == LIB_MANIFEST2_TYPE_SOURCE_WITH_SECTIONS) 227 { 228 if (*pCurrent == '[') 229 { 230 /* This is a section descriptor */ 231 pCurrent++; 232 *ppName = pCurrent; 233 *ppValue = NULL; 234 *pValueLength = 0; 235 while (true) 236 { 237 if (pCurrent >= pEnd) 238 { 239 LOG_ERROR(pContext, "EOF reached within a section name"); 240 goto error; 241 } 242 if (*pCurrent == ']') 243 { 244 /* End of section name */ 245 *pNameLength = pCurrent - *ppName; 246 pCurrent++; 247 248 /* Skip spaces and tabs. Note that this is a deviation from the current spec 249 (see SWIS). Spec must be updated */ 250 while (pCurrent < pEnd) 251 { 252 if (*pCurrent == ' ' || *pCurrent == '\t') 253 { 254 pCurrent++; 255 } 256 else if (*pCurrent == CHAR_CR || *pCurrent == CHAR_LF) 257 { 258 /* End of line */ 259 break; 260 } 261 else 262 { 263 LOG_ERROR(pContext, "Non-space character follows a sectino header: 0x02X", *pCurrent); 264 } 265 } 266 pContext->nOffset = pCurrent - pContext->pManifestContent; 267 pContext->nSectionStartOffset = pContext->nOffset; 268 return S_SUCCESS; 269 } 270 /* Check section name character */ 271 if (!static_checkCharacter(*pCurrent, CHARACTER_SECTION_NAME)) 272 { 273 LOG_ERROR(pContext, "Invalid character for a section name: 0x%02X", *pCurrent); 274 goto error; 275 } 276 pCurrent++; 277 } 278 } 279 280 if (pContext->nSectionStartOffset == 0) 281 { 282 /* No section has been found yet. This is a bad format */ 283 LOG_ERROR(pContext, "Property found outside any section"); 284 goto error; 285 } 286 } 287 288 *ppName = pCurrent; 289 290 /* Check first character of name is in [A-Za-z0-9] */ 291 if (!static_checkCharacter(*pCurrent, CHARACTER_NAME_FIRST)) 292 { 293 LOG_ERROR(pContext, "Invalid first character for a property name: 0x%02X", *pCurrent); 294 goto error; 295 } 296 pCurrent++; 297 pLastNonWhitespaceChar = pCurrent; 298 while (true) 299 { 300 if (pCurrent == pEnd) 301 { 302 LOG_ERROR(pContext, "EOF reached within a property name"); 303 goto error; 304 } 305 if (*pCurrent == ':') 306 { 307 /* Colon reached */ 308 break; 309 } 310 if (pContext->nType != LIB_MANIFEST2_TYPE_COMPILED) 311 { 312 /* In source manifest, allow space characters before the colon. 313 This is a deviation from the spec. Spec must be updated */ 314 if (*pCurrent == ' ' || *pCurrent == '\t') 315 { 316 pCurrent++; 317 continue; 318 } 319 } 320 if (!static_checkCharacter(*pCurrent, CHARACTER_NAME_SUBSEQUENT)) 321 { 322 LOG_ERROR(pContext, "Invalid character for a property name: 0x%02X", *pCurrent); 323 goto error; 324 } 325 if (pContext->nType != LIB_MANIFEST2_TYPE_COMPILED) 326 { 327 /* Even in a source manifest, property name cannot contain spaces! */ 328 if (pCurrent != pLastNonWhitespaceChar) 329 { 330 LOG_ERROR(pContext, "Property name cannot contain spaces"); 331 goto error; 332 } 333 } 334 pCurrent++; 335 pLastNonWhitespaceChar = pCurrent; 336 } 337 *pNameLength = pLastNonWhitespaceChar - *ppName; 338 pCurrent++; 339 /* Skip spaces and tabs on the right of the colon */ 340 while (pCurrent < pEnd && (*pCurrent == ' ' || *pCurrent == '\t')) 341 { 342 pCurrent++; 343 } 344 *ppValue = pCurrent; 345 pLastNonWhitespaceChar = pCurrent-1; 346 347 nCurrentSequenceCount = 0; 348 nCurrentChar = 0; 349 350 while (pCurrent < pEnd) 351 { 352 uint32_t x; 353 x = *pCurrent; 354 if ((x & 0x80) == 0) 355 { 356 if (nCurrentSequenceCount != 0) 357 { 358 /* We were expecting a 10xxxxxx byte: ill-formed UTF-8 */ 359 LOG_ERROR(pContext, "Invalid UTF-8 sequence"); 360 goto error; 361 } 362 else if (x == 0) 363 { 364 /* The null character is forbidden */ 365 LOG_ERROR(pContext, "NUL character forbidden"); 366 goto error; 367 } 368 /* We have a well-formed Unicode character */ 369 nCurrentChar = x; 370 } 371 else if ((x & 0xC0) == 0xC0) 372 { 373 /* Start of a sequence */ 374 if (nCurrentSequenceCount != 0) 375 { 376 /* We were expecting a 10xxxxxx byte: ill-formed UTF-8 */ 377 LOG_ERROR(pContext, "Invalid UTF-8 sequence"); 378 goto error; 379 } 380 else if ((x & 0xE0) == 0xC0) 381 { 382 /* 1 byte follows */ 383 nCurrentChar = x & 0x1F; 384 nCurrentSequenceCount = 1; 385 if ((x & 0x1E) == 0) 386 { 387 /* Illegal UTF-8: overlong encoding of character in the [0x00-0x7F] range 388 (must use 1-byte encoding, not a 2-byte encoding) */ 389 LOG_ERROR(pContext, "Invalid UTF-8 sequence"); 390 goto error; 391 } 392 } 393 else if ((x & 0xF0) == 0xE0) 394 { 395 /* 2 bytes follow */ 396 nCurrentChar = x & 0x0F; 397 nCurrentSequenceCount = 2; 398 } 399 else if ((x & 0xF8) == 0xF0) 400 { 401 /* 3 bytes follow */ 402 nCurrentChar = x & 0x07; 403 nCurrentSequenceCount = 3; 404 } 405 else 406 { 407 /* Illegal start of sequence */ 408 LOG_ERROR(pContext, "Invalid UTF-8 sequence"); 409 goto error; 410 } 411 } 412 else if ((x & 0xC0) == 0x80) 413 { 414 /* Continuation byte */ 415 if (nCurrentSequenceCount == 0) 416 { 417 /* We were expecting a sequence start, not a continuation byte */ 418 LOG_ERROR(pContext, "Invalid UTF-8 sequence"); 419 goto error; 420 } 421 else 422 { 423 if (nCurrentSequenceCount == 2) 424 { 425 /* We're in a 3-byte sequence, check that we're not using an overlong sequence */ 426 if (nCurrentChar == 0 && (x & 0x20) == 0) 427 { 428 /* The character starts with at least 5 zero bits, so has fewer than 11 bits. It should 429 have used a 2-byte sequence, not a 3-byte sequence */ 430 LOG_ERROR(pContext, "Invalid UTF-8 sequence"); 431 goto error; 432 } 433 } 434 else if (nCurrentSequenceCount == 3) 435 { 436 if (nCurrentChar == 0 && (x & 0x30) == 0) 437 { 438 /* The character starts with at least 5 zero bits, so has fewer than 16 bits. It should 439 have used a 3-byte sequence, not a 4-byte sequence */ 440 LOG_ERROR(pContext, "Invalid UTF-8 sequence"); 441 goto error; 442 } 443 } 444 nCurrentSequenceCount--; 445 nCurrentChar = (nCurrentChar << 6) | (x & 0x3F); 446 } 447 } 448 else 449 { 450 /* Illegal byte */ 451 LOG_ERROR(pContext, "Invalid UTF-8 sequence"); 452 goto error; 453 } 454 if (nCurrentSequenceCount == 0) 455 { 456 /* nCurrentChar contains the current Unicode character */ 457 /* check character */ 458 if ((nCurrentChar >= 0xD800 && nCurrentChar < 0xE000) || nCurrentChar >= 0x110000) 459 { 460 /* Illegal code point */ 461 LOG_ERROR(pContext, "Invalid UTF-8 code point 0x%X", nCurrentChar); 462 goto error; 463 } 464 465 if (*pCurrent == CHAR_CR) 466 { 467 if (pContext->nType == LIB_MANIFEST2_TYPE_COMPILED) 468 { 469 /* Check if a LF follows */ 470 pCurrent++; 471 if (pCurrent < pEnd && *pCurrent == CHAR_LF) 472 { 473 pCurrent++; 474 } 475 pContext->nLine++; 476 } 477 goto end; 478 } 479 else if (*pCurrent == CHAR_LF) 480 { 481 if (pContext->nType == LIB_MANIFEST2_TYPE_COMPILED) 482 { 483 pCurrent++; 484 pContext->nLine++; 485 } 486 goto end; 487 } 488 } 489 if (*pCurrent != ' ' && *pCurrent != CHAR_TAB) 490 { 491 /* It's a non-whitespace char */ 492 pLastNonWhitespaceChar = pCurrent; 493 } 494 pCurrent++; 495 } 496 497 /* Hit the end of the manifest; Check that we're not in the middle of a sequence */ 498 if (nCurrentSequenceCount != 0) 499 { 500 LOG_ERROR(pContext, "File ends in the middle of an UTF-8 sequence"); 501 goto error; 502 } 503 504 end: 505 506 *pValueLength = pLastNonWhitespaceChar - *ppValue + 1; 507 pContext->nOffset = pCurrent - pContext->pManifestContent; 508 509 return S_SUCCESS; 510 511 error: 512 *ppName = NULL; 513 *pNameLength = 0; 514 *ppValue = NULL; 515 *pValueLength = 0; 516 return nResult; 517 } 518 519 S_RESULT libManifest2GetNextItem( 520 LIB_MANIFEST2_CONTEXT* pContext, 521 OUT uint8_t** ppName, 522 OUT uint32_t* pNameLength, 523 OUT uint8_t** ppValue, 524 OUT uint32_t* pValueLength) 525 { 526 if (pContext->nType == LIB_MANIFEST2_TYPE_COMPILED) 527 { 528 /* Don't check for duplicates in binary manifests */ 529 return static_libManifest2GetNextItemInternal( 530 pContext, 531 ppName, 532 pNameLength, 533 ppValue, 534 pValueLength); 535 } 536 else 537 { 538 uint32_t nOriginalOffset = pContext->nOffset; 539 uint32_t nOffset; 540 uint32_t nLine; 541 uint32_t nSectionStartOffset; 542 S_RESULT nResult; 543 uint8_t* pDupName; 544 uint32_t nDupNameLength; 545 uint8_t* pDupValue; 546 uint32_t nDupValueLength; 547 548 /* First get the item */ 549 nResult = static_libManifest2GetNextItemInternal( 550 pContext, 551 ppName, 552 pNameLength, 553 ppValue, 554 pValueLength); 555 if (nResult != S_SUCCESS) 556 { 557 return nResult; 558 } 559 /* Save the state of the parser */ 560 nOffset = pContext->nOffset; 561 nLine = pContext->nLine; 562 nSectionStartOffset = pContext->nSectionStartOffset; 563 if (pContext->nType == LIB_MANIFEST2_TYPE_SOURCE) 564 { 565 pContext->nOffset = 0; 566 } 567 else if (*ppValue == NULL) 568 { 569 /* The item was a section header. Iterate on all section headers and 570 check for duplicates */ 571 pContext->nOffset = 0; 572 } 573 else 574 { 575 if (nSectionStartOffset == 0) 576 { 577 LOG_ERROR(pContext, "Property definition outside any section"); 578 goto bad_format; 579 } 580 /* Iterate only on the properties in the section */ 581 pContext->nOffset = nSectionStartOffset; 582 } 583 while (pContext->nOffset < nOriginalOffset) 584 { 585 static_libManifest2GetNextItemInternal( 586 pContext, 587 &pDupName, 588 &nDupNameLength, 589 &pDupValue, 590 &nDupValueLength); 591 if (pContext->nType == LIB_MANIFEST2_TYPE_SOURCE_WITH_SECTIONS && *ppValue == NULL) 592 { 593 /* Check for duplicate section names */ 594 if (pDupValue == NULL 595 && 596 static_sectionNameEqualCaseInsensitive( 597 *ppName, 598 *pNameLength, 599 pDupName, 600 nDupNameLength)) 601 { 602 pContext->nOffset = nOffset; 603 pContext->nLine = nLine; 604 pContext->nSectionStartOffset = nSectionStartOffset; 605 LOG_ERROR(pContext, "Duplicate section %.*s", nDupNameLength, pDupName); 606 goto bad_format; 607 } 608 } 609 else 610 { 611 /* Check for duplicate property name */ 612 if (nDupNameLength == *pNameLength && 613 memcmp(pDupName, *ppName, nDupNameLength) == 0) 614 { 615 /* Duplicated property */ 616 pContext->nOffset = nOffset; 617 pContext->nLine = nLine; 618 pContext->nSectionStartOffset = nSectionStartOffset; 619 LOG_ERROR(pContext,"Duplicate property %.*s", nDupNameLength, pDupName); 620 goto bad_format; 621 } 622 } 623 } 624 /* Everything's fine. restore context and exit */ 625 /* Restore the context */ 626 pContext->nOffset = nOffset; 627 pContext->nLine = nLine; 628 pContext->nSectionStartOffset = nSectionStartOffset; 629 630 return S_SUCCESS; 631 bad_format: 632 *ppName = NULL; 633 *pNameLength = 0; 634 *ppValue = NULL; 635 *pValueLength = 0; 636 return S_ERROR_BAD_FORMAT; 637 } 638 } 639 640 641 S_RESULT libManifest2CheckFormat( 642 LIB_MANIFEST2_CONTEXT* pContext, 643 uint32_t* pnItemCount) 644 { 645 uint32_t nPropertyCount = 0; 646 uint8_t* pName; 647 uint32_t nNameLength; 648 uint8_t* pValue; 649 uint32_t nValueLength; 650 S_RESULT nResult; 651 652 pContext->nOffset = 0; 653 pContext->nLine = 1; 654 pContext->nSectionStartOffset = 0; 655 656 while (true) 657 { 658 nResult = libManifest2GetNextItem( 659 pContext, 660 &pName, 661 &nNameLength, 662 &pValue, 663 &nValueLength); 664 if (nResult == S_ERROR_ITEM_NOT_FOUND) 665 { 666 if (pnItemCount != NULL) 667 { 668 *pnItemCount = nPropertyCount; 669 } 670 return S_SUCCESS; 671 } 672 if (nResult != S_SUCCESS) 673 { 674 return nResult; 675 } 676 nPropertyCount++; 677 } 678 } 679