1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2003-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Author: Alan Liu 9 * Created: July 10 2003 10 * Since: ICU 2.8 11 ********************************************************************** 12 */ 13 #include "tzfile.h" // from Olson tzcode archive, copied to this dir 14 15 #ifdef WIN32 16 17 #include <windows.h> 18 #undef min // windows.h/STL conflict 19 #undef max // windows.h/STL conflict 20 // "identifier was truncated to 'number' characters" warning 21 #pragma warning(disable: 4786) 22 23 #else 24 25 #include <unistd.h> 26 #include <stdio.h> 27 #include <dirent.h> 28 #include <string.h> 29 #include <sys/stat.h> 30 31 #endif 32 33 #include <algorithm> 34 #include <cassert> 35 #include <ctime> 36 #include <fstream> 37 #include <iomanip> 38 #include <iostream> 39 #include <iterator> 40 #include <limits> 41 #include <map> 42 #include <set> 43 #include <sstream> 44 #include <sstream> 45 #include <stdexcept> 46 #include <string> 47 #include <vector> 48 49 #include "tz2icu.h" 50 #include "unicode/uversion.h" 51 52 using namespace std; 53 54 bool ICU44PLUS = TRUE; 55 string TZ_RESOURCE_NAME = ICU_TZ_RESOURCE; 56 57 //-------------------------------------------------------------------- 58 // Time utilities 59 //-------------------------------------------------------------------- 60 61 const int64_t SECS_PER_YEAR = 31536000; // 365 days 62 const int64_t SECS_PER_LEAP_YEAR = 31622400; // 366 days 63 const int64_t LOWEST_TIME32 = (int64_t)((int32_t)0x80000000); 64 const int64_t HIGHEST_TIME32 = (int64_t)((int32_t)0x7fffffff); 65 66 bool isLeap(int32_t y) { 67 return (y%4 == 0) && ((y%100 != 0) || (y%400 == 0)); // Gregorian 68 } 69 70 int64_t secsPerYear(int32_t y) { 71 return isLeap(y) ? SECS_PER_LEAP_YEAR : SECS_PER_YEAR; 72 } 73 74 /** 75 * Given a calendar year, return the GMT epoch seconds for midnight 76 * GMT of January 1 of that year. yearToSeconds(1970) == 0. 77 */ 78 int64_t yearToSeconds(int32_t year) { 79 // inefficient but foolproof 80 int64_t s = 0; 81 int32_t y = 1970; 82 while (y < year) { 83 s += secsPerYear(y++); 84 } 85 while (y > year) { 86 s -= secsPerYear(--y); 87 } 88 return s; 89 } 90 91 /** 92 * Given 1970 GMT epoch seconds, return the calendar year containing 93 * that time. secondsToYear(0) == 1970. 94 */ 95 int32_t secondsToYear(int64_t seconds) { 96 // inefficient but foolproof 97 int32_t y = 1970; 98 int64_t s = 0; 99 if (seconds >= 0) { 100 for (;;) { 101 s += secsPerYear(y++); 102 if (s > seconds) break; 103 } 104 --y; 105 } else { 106 for (;;) { 107 s -= secsPerYear(--y); 108 if (s <= seconds) break; 109 } 110 } 111 return y; 112 } 113 114 //-------------------------------------------------------------------- 115 // Types 116 //-------------------------------------------------------------------- 117 118 struct FinalZone; 119 struct FinalRule; 120 struct SimplifiedZoneType; 121 122 // A transition from one ZoneType to another 123 // Minimal size = 5 bytes (4+1) 124 struct Transition { 125 int64_t time; // seconds, 1970 epoch 126 int32_t type; // index into 'ZoneInfo.types' 0..255 127 Transition(int64_t _time, int32_t _type) { 128 time = _time; 129 type = _type; 130 } 131 }; 132 133 // A behavior mode (what zic calls a 'type') of a time zone. 134 // Minimal size = 6 bytes (4+1+3bits) 135 // SEE: SimplifiedZoneType 136 struct ZoneType { 137 int64_t rawoffset; // raw seconds offset from GMT 138 int64_t dstoffset; // dst seconds offset from GMT 139 140 // We don't really need any of the following, but they are 141 // retained for possible future use. See SimplifiedZoneType. 142 int32_t abbr; // index into ZoneInfo.abbrs 0..n-1 143 bool isdst; 144 bool isstd; 145 bool isgmt; 146 147 ZoneType(const SimplifiedZoneType&); // used by optimizeTypeList 148 149 ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {} 150 151 // A restricted equality, of just the raw and dst offset 152 bool matches(const ZoneType& other) { 153 return rawoffset == other.rawoffset && 154 dstoffset == other.dstoffset; 155 } 156 }; 157 158 // A collection of transitions from one ZoneType to another, together 159 // with a list of the ZoneTypes. A ZoneInfo object may have a long 160 // list of transitions between a smaller list of ZoneTypes. 161 // 162 // This object represents the contents of a single zic-created 163 // zoneinfo file. 164 struct ZoneInfo { 165 vector<Transition> transitions; 166 vector<ZoneType> types; 167 vector<string> abbrs; 168 169 string finalRuleID; 170 int32_t finalOffset; 171 int32_t finalYear; // -1 if none 172 173 // If this is an alias, then all other fields are meaningless, and 174 // this field will point to the "real" zone 0..n-1. 175 int32_t aliasTo; // -1 if this is a "real" zone 176 177 // If there are aliases TO this zone, then the following set will 178 // contain their index numbers (each index >= 0). 179 set<int32_t> aliases; 180 181 ZoneInfo() : finalYear(-1), aliasTo(-1) {} 182 183 void mergeFinalData(const FinalZone& fz); 184 185 void optimizeTypeList(); 186 187 // Set this zone to be an alias TO another zone. 188 void setAliasTo(int32_t index); 189 190 // Clear the list of aliases OF this zone. 191 void clearAliases(); 192 193 // Add an alias to the list of aliases OF this zone. 194 void addAlias(int32_t index); 195 196 // Is this an alias to another zone? 197 bool isAlias() const { 198 return aliasTo >= 0; 199 } 200 201 // Retrieve alias list 202 const set<int32_t>& getAliases() const { 203 return aliases; 204 } 205 206 void print(ostream& os, const string& id) const; 207 }; 208 209 void ZoneInfo::clearAliases() { 210 assert(aliasTo < 0); 211 aliases.clear(); 212 } 213 214 void ZoneInfo::addAlias(int32_t index) { 215 assert(aliasTo < 0 && index >= 0 && aliases.find(index) == aliases.end()); 216 aliases.insert(index); 217 } 218 219 void ZoneInfo::setAliasTo(int32_t index) { 220 assert(index >= 0); 221 assert(aliases.size() == 0); 222 aliasTo = index; 223 } 224 225 typedef map<string, ZoneInfo> ZoneMap; 226 227 typedef ZoneMap::const_iterator ZoneMapIter; 228 229 //-------------------------------------------------------------------- 230 // ZONEINFO 231 //-------------------------------------------------------------------- 232 233 // Global map holding all our ZoneInfo objects, indexed by id. 234 ZoneMap ZONEINFO; 235 236 //-------------------------------------------------------------------- 237 // zoneinfo file parsing 238 //-------------------------------------------------------------------- 239 240 // Read zic-coded 32-bit integer from file 241 int64_t readcoded(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(), 242 int64_t maxv=numeric_limits<int64_t>::max()) { 243 unsigned char buf[4]; // must be UNSIGNED 244 int64_t val=0; 245 file.read((char*)buf, 4); 246 for(int32_t i=0,shift=24;i<4;++i,shift-=8) { 247 val |= buf[i] << shift; 248 } 249 if (val < minv || val > maxv) { 250 ostringstream os; 251 os << "coded value out-of-range: " << val << ", expected [" 252 << minv << ", " << maxv << "]"; 253 throw out_of_range(os.str()); 254 } 255 return val; 256 } 257 258 // Read zic-coded 64-bit integer from file 259 int64_t readcoded64(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(), 260 int64_t maxv=numeric_limits<int64_t>::max()) { 261 unsigned char buf[8]; // must be UNSIGNED 262 int64_t val=0; 263 file.read((char*)buf, 8); 264 for(int32_t i=0,shift=56;i<8;++i,shift-=8) { 265 val |= (int64_t)buf[i] << shift; 266 } 267 if (val < minv || val > maxv) { 268 ostringstream os; 269 os << "coded value out-of-range: " << val << ", expected [" 270 << minv << ", " << maxv << "]"; 271 throw out_of_range(os.str()); 272 } 273 return val; 274 } 275 276 // Read a boolean value 277 bool readbool(ifstream& file) { 278 char c; 279 file.read(&c, 1); 280 if (c!=0 && c!=1) { 281 ostringstream os; 282 os << "boolean value out-of-range: " << (int32_t)c; 283 throw out_of_range(os.str()); 284 } 285 return (c!=0); 286 } 287 288 /** 289 * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo 290 * @param file an already-open file stream 291 */ 292 void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData) { 293 int32_t i; 294 295 // Check for TZ_ICU_MAGIC signature at file start. If we get a 296 // signature mismatch, it means we're trying to read a file which 297 // isn't a ICU-modified-zic-created zoneinfo file. Typically this 298 // means the user is passing in a "normal" zoneinfo directory, or 299 // a zoneinfo directory that is polluted with other files, or that 300 // the user passed in the wrong directory. 301 char buf[32]; 302 file.read(buf, 4); 303 if (strncmp(buf, TZ_ICU_MAGIC, 4) != 0) { 304 throw invalid_argument("TZ_ICU_MAGIC signature missing"); 305 } 306 // skip additional Olson byte version 307 file.read(buf, 1); 308 // if '\0', we have just one copy of data, if '2' or '3', there is additional 309 // 64 bit version at the end. 310 if(buf[0]!=0 && buf[0]!='2' && buf[0]!='3') { 311 throw invalid_argument("Bad Olson version info"); 312 } 313 314 // Read reserved bytes. The first of these will be a version byte. 315 file.read(buf, 15); 316 if (*(ICUZoneinfoVersion*)&buf != TZ_ICU_VERSION) { 317 throw invalid_argument("File version mismatch"); 318 } 319 320 // Read array sizes 321 int64_t isgmtcnt = readcoded(file, 0); 322 int64_t isdstcnt = readcoded(file, 0); 323 int64_t leapcnt = readcoded(file, 0); 324 int64_t timecnt = readcoded(file, 0); 325 int64_t typecnt = readcoded(file, 0); 326 int64_t charcnt = readcoded(file, 0); 327 328 // Confirm sizes that we assume to be equal. These assumptions 329 // are drawn from a reading of the zic source (2003a), so they 330 // should hold unless the zic source changes. 331 if (isgmtcnt != typecnt || isdstcnt != typecnt) { 332 throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisdstcnt, tth_typecnt"); 333 } 334 335 // Used temporarily to store transition times and types. We need 336 // to do this because the times and types are stored in two 337 // separate arrays. 338 vector<int64_t> transitionTimes(timecnt, -1); // temporary 339 vector<int32_t> transitionTypes(timecnt, -1); // temporary 340 341 // Read transition times 342 for (i=0; i<timecnt; ++i) { 343 if (is64bitData) { 344 transitionTimes[i] = readcoded64(file); 345 } else { 346 transitionTimes[i] = readcoded(file); 347 } 348 } 349 350 // Read transition types 351 for (i=0; i<timecnt; ++i) { 352 unsigned char c; 353 file.read((char*) &c, 1); 354 int32_t t = (int32_t) c; 355 if (t < 0 || t >= typecnt) { 356 ostringstream os; 357 os << "illegal type: " << t << ", expected [0, " << (typecnt-1) << "]"; 358 throw out_of_range(os.str()); 359 } 360 transitionTypes[i] = t; 361 } 362 363 // Build transitions vector out of corresponding times and types. 364 bool insertInitial = false; 365 if (is64bitData && !ICU44PLUS) { 366 if (timecnt > 0) { 367 int32_t minidx = -1; 368 for (i=0; i<timecnt; ++i) { 369 if (transitionTimes[i] < LOWEST_TIME32) { 370 if (minidx == -1 || transitionTimes[i] > transitionTimes[minidx]) { 371 // Preserve the latest transition before the 32bit minimum time 372 minidx = i; 373 } 374 } else if (transitionTimes[i] > HIGHEST_TIME32) { 375 // Skipping the rest of the transition data. We cannot put such 376 // transitions into zoneinfo.res, because data is limited to singed 377 // 32bit int by the ICU resource bundle. 378 break; 379 } else { 380 info.transitions.push_back(Transition(transitionTimes[i], transitionTypes[i])); 381 } 382 } 383 384 if (minidx != -1) { 385 // If there are any transitions before the 32bit minimum time, 386 // put the type information with the 32bit minimum time 387 vector<Transition>::iterator itr = info.transitions.begin(); 388 info.transitions.insert(itr, Transition(LOWEST_TIME32, transitionTypes[minidx])); 389 } else { 390 // Otherwise, we need insert the initial type later 391 insertInitial = true; 392 } 393 } 394 } else { 395 for (i=0; i<timecnt; ++i) { 396 info.transitions.push_back(Transition(transitionTimes[i], transitionTypes[i])); 397 } 398 } 399 400 // Read types (except for the isdst and isgmt flags, which come later (why??)) 401 for (i=0; i<typecnt; ++i) { 402 ZoneType type; 403 404 type.rawoffset = readcoded(file); 405 type.dstoffset = readcoded(file); 406 type.isdst = readbool(file); 407 408 unsigned char c; 409 file.read((char*) &c, 1); 410 type.abbr = (int32_t) c; 411 412 if (type.isdst != (type.dstoffset != 0)) { 413 throw invalid_argument("isdst does not reflect dstoffset"); 414 } 415 416 info.types.push_back(type); 417 } 418 419 assert(info.types.size() == (unsigned) typecnt); 420 421 if (insertInitial) { 422 assert(timecnt > 0); 423 assert(typecnt > 0); 424 425 int32_t initialTypeIdx = -1; 426 427 // Check if the first type is not dst 428 if (info.types.at(0).dstoffset != 0) { 429 // Initial type's rawoffset is same with the rawoffset after the 430 // first transition, but no DST is observed. 431 int64_t rawoffset0 = (info.types.at(info.transitions.at(0).type)).rawoffset; 432 // Look for matching type 433 for (i=0; i<(int32_t)info.types.size(); ++i) { 434 if (info.types.at(i).rawoffset == rawoffset0 435 && info.types.at(i).dstoffset == 0) { 436 initialTypeIdx = i; 437 break; 438 } 439 } 440 } else { 441 initialTypeIdx = 0; 442 } 443 assert(initialTypeIdx >= 0); 444 // Add the initial type associated with the lowest int32 time 445 vector<Transition>::iterator itr = info.transitions.begin(); 446 info.transitions.insert(itr, Transition(LOWEST_TIME32, initialTypeIdx)); 447 } 448 449 450 // Read the abbreviation string 451 if (charcnt) { 452 // All abbreviations are concatenated together, with a 0 at 453 // the end of each abbr. 454 char* str = new char[charcnt + 8]; 455 file.read(str, charcnt); 456 457 // Split abbreviations apart into individual strings. Record 458 // offset of each abbr in a vector. 459 vector<int32_t> abbroffset; 460 char *limit=str+charcnt; 461 for (char* p=str; p<limit; ++p) { 462 char* start = p; 463 while (*p != 0) ++p; 464 info.abbrs.push_back(string(start, p-start)); 465 abbroffset.push_back(start-str); 466 } 467 468 // Remap all the abbrs. Old value is offset into concatenated 469 // raw abbr strings. New value is index into vector of 470 // strings. E.g., 0,5,10,14 => 0,1,2,3. 471 472 // Keep track of which abbreviations get used. 473 vector<bool> abbrseen(abbroffset.size(), false); 474 475 for (vector<ZoneType>::iterator it=info.types.begin(); 476 it!=info.types.end(); 477 ++it) { 478 vector<int32_t>::const_iterator x= 479 find(abbroffset.begin(), abbroffset.end(), it->abbr); 480 if (x==abbroffset.end()) { 481 // TODO: Modify code to add a new string to the end of 482 // the abbr list when a middle offset is given, e.g., 483 // "abc*def*" where * == '\0', take offset of 1 and 484 // make the array "abc", "def", "bc", and translate 1 485 // => 2. NOT CRITICAL since we don't even use the 486 // abbr at this time. 487 #if 0 488 // TODO: Re-enable this warning if we start using 489 // the Olson abbr data, or if the above TODO is completed. 490 ostringstream os; 491 os << "Warning: unusual abbr offset " << it->abbr 492 << ", expected one of"; 493 for (vector<int32_t>::const_iterator y=abbroffset.begin(); 494 y!=abbroffset.end(); ++y) { 495 os << ' ' << *y; 496 } 497 cerr << os.str() << "; using 0" << endl; 498 #endif 499 it->abbr = 0; 500 } else { 501 int32_t index = x - abbroffset.begin(); 502 it->abbr = index; 503 abbrseen[index] = true; 504 } 505 } 506 507 for (int32_t ii=0;ii<(int32_t) abbrseen.size();++ii) { 508 if (!abbrseen[ii]) { 509 cerr << "Warning: unused abbreviation: " << ii << endl; 510 } 511 } 512 } 513 514 // Read leap second info, if any. 515 // *** We discard leap second data. *** 516 for (i=0; i<leapcnt; ++i) { 517 readcoded(file); // transition time 518 readcoded(file); // total correction after above 519 } 520 521 // Read isstd flags 522 for (i=0; i<typecnt; ++i) info.types[i].isstd = readbool(file); 523 524 // Read isgmt flags 525 for (i=0; i<typecnt; ++i) info.types[i].isgmt = readbool(file); 526 } 527 528 //-------------------------------------------------------------------- 529 // Directory and file reading 530 //-------------------------------------------------------------------- 531 532 /** 533 * Process a single zoneinfo file, adding the data to ZONEINFO 534 * @param path the full path to the file, e.g., ".\zoneinfo\America\Los_Angeles" 535 * @param id the zone ID, e.g., "America/Los_Angeles" 536 */ 537 void handleFile(string path, string id) { 538 // Check for duplicate id 539 if (ZONEINFO.find(id) != ZONEINFO.end()) { 540 ostringstream os; 541 os << "duplicate zone ID: " << id; 542 throw invalid_argument(os.str()); 543 } 544 545 ifstream file(path.c_str(), ios::in | ios::binary); 546 if (!file) { 547 throw invalid_argument("can't open file"); 548 } 549 550 // eat 32bit data part 551 ZoneInfo info; 552 readzoneinfo(file, info, false); 553 554 // Check for errors 555 if (!file) { 556 throw invalid_argument("read error"); 557 } 558 559 // we only use 64bit part 560 ZoneInfo info64; 561 readzoneinfo(file, info64, true); 562 563 bool alldone = false; 564 int64_t eofPos = (int64_t) file.tellg(); 565 566 // '\n' + <envvar string> + '\n' after the 64bit version data 567 char ch = file.get(); 568 if (ch == 0x0a) { 569 bool invalidchar = false; 570 while (file.get(ch)) { 571 if (ch == 0x0a) { 572 break; 573 } 574 if (ch < 0x20) { 575 // must be printable ascii 576 invalidchar = true; 577 break; 578 } 579 } 580 if (!invalidchar) { 581 eofPos = (int64_t) file.tellg(); 582 file.seekg(0, ios::end); 583 eofPos = eofPos - (int64_t) file.tellg(); 584 if (eofPos == 0) { 585 alldone = true; 586 } 587 } 588 } 589 if (!alldone) { 590 ostringstream os; 591 os << (-eofPos) << " unprocessed bytes at end"; 592 throw invalid_argument(os.str()); 593 } 594 595 ZONEINFO[id] = info64; 596 } 597 598 /** 599 * Recursively scan the given directory, calling handleFile() for each 600 * file in the tree. The user should call with the root directory and 601 * a prefix of "". The function will call itself with non-empty 602 * prefix values. 603 */ 604 #ifdef WIN32 605 606 void scandir(string dirname, string prefix="") { 607 HANDLE hList; 608 WIN32_FIND_DATA FileData; 609 610 // Get the first file 611 hList = FindFirstFile((dirname + "\\*").c_str(), &FileData); 612 if (hList == INVALID_HANDLE_VALUE) { 613 cerr << "Error: Invalid directory: " << dirname << endl; 614 exit(1); 615 } 616 for (;;) { 617 string name(FileData.cFileName); 618 string path(dirname + "\\" + name); 619 if (FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { 620 if (name != "." && name != "..") { 621 scandir(path, prefix + name + "/"); 622 } 623 } else { 624 try { 625 string id = prefix + name; 626 handleFile(path, id); 627 } catch (const exception& e) { 628 cerr << "Error: While processing \"" << path << "\", " 629 << e.what() << endl; 630 exit(1); 631 } 632 } 633 634 if (!FindNextFile(hList, &FileData)) { 635 if (GetLastError() == ERROR_NO_MORE_FILES) { 636 break; 637 } // else...? 638 } 639 } 640 FindClose(hList); 641 } 642 643 #else 644 645 void scandir(string dir, string prefix="") { 646 DIR *dp; 647 struct dirent *dir_entry; 648 struct stat stat_info; 649 char pwd[512]; 650 vector<string> subdirs; 651 vector<string> subfiles; 652 653 if ((dp = opendir(dir.c_str())) == NULL) { 654 cerr << "Error: Invalid directory: " << dir << endl; 655 exit(1); 656 } 657 if (!getcwd(pwd, sizeof(pwd))) { 658 cerr << "Error: Directory name too long" << endl; 659 exit(1); 660 } 661 chdir(dir.c_str()); 662 while ((dir_entry = readdir(dp)) != NULL) { 663 string name = dir_entry->d_name; 664 string path = dir + "/" + name; 665 lstat(dir_entry->d_name,&stat_info); 666 if (S_ISDIR(stat_info.st_mode)) { 667 if (name != "." && name != "..") { 668 subdirs.push_back(path); 669 subdirs.push_back(prefix + name + "/"); 670 // scandir(path, prefix + name + "/"); 671 } 672 } else { 673 try { 674 string id = prefix + name; 675 subfiles.push_back(path); 676 subfiles.push_back(id); 677 // handleFile(path, id); 678 } catch (const exception& e) { 679 cerr << "Error: While processing \"" << path << "\", " 680 << e.what() << endl; 681 exit(1); 682 } 683 } 684 } 685 closedir(dp); 686 chdir(pwd); 687 688 for(int32_t i=0;i<(int32_t)subfiles.size();i+=2) { 689 try { 690 handleFile(subfiles[i], subfiles[i+1]); 691 } catch (const exception& e) { 692 cerr << "Error: While processing \"" << subfiles[i] << "\", " 693 << e.what() << endl; 694 exit(1); 695 } 696 } 697 for(int32_t i=0;i<(int32_t)subdirs.size();i+=2) { 698 scandir(subdirs[i], subdirs[i+1]); 699 } 700 } 701 702 #endif 703 704 //-------------------------------------------------------------------- 705 // Final zone and rule info 706 //-------------------------------------------------------------------- 707 708 /** 709 * Read and discard the current line. 710 */ 711 void consumeLine(istream& in) { 712 int32_t c; 713 do { 714 c = in.get(); 715 } while (c != EOF && c != '\n'); 716 } 717 718 enum { 719 DOM = 0, 720 DOWGEQ = 1, 721 DOWLEQ = 2 722 }; 723 724 const char* TIME_MODE[] = {"w", "s", "u"}; 725 726 // Allow 29 days in February because zic outputs February 29 727 // for rules like "last Sunday in February". 728 const int32_t MONTH_LEN[] = {31,29,31,30,31,30,31,31,30,31,30,31}; 729 730 const int32_t HOUR = 3600; 731 732 struct FinalZone { 733 int32_t offset; // raw offset 734 int32_t year; // takes effect for y >= year 735 string ruleid; 736 set<string> aliases; 737 FinalZone(int32_t _offset, int32_t _year, const string& _ruleid) : 738 offset(_offset), year(_year), ruleid(_ruleid) { 739 if (offset <= -16*HOUR || offset >= 16*HOUR) { 740 ostringstream os; 741 os << "Invalid input offset " << offset 742 << " for year " << year 743 << " and rule ID " << ruleid; 744 throw invalid_argument(os.str()); 745 } 746 if (year < 1900) { 747 ostringstream os; 748 os << "Invalid input year " << year 749 << " with offset " << offset 750 << " and rule ID " << ruleid; 751 throw invalid_argument(os.str()); 752 } 753 } 754 FinalZone() : offset(-1), year(-1) {} 755 void addLink(const string& alias) { 756 if (aliases.find(alias) != aliases.end()) { 757 ostringstream os; 758 os << "Duplicate alias " << alias; 759 throw invalid_argument(os.str()); 760 } 761 aliases.insert(alias); 762 } 763 }; 764 765 struct FinalRulePart { 766 int32_t mode; 767 int32_t month; 768 int32_t dom; 769 int32_t dow; 770 int32_t time; 771 int32_t offset; // dst offset, usually either 0 or 1:00 772 773 // Isstd and isgmt only have 3 valid states, corresponding to local 774 // wall time, local standard time, and GMT standard time. 775 // Here is how the isstd & isgmt flags are set by zic: 776 //| case 's': /* Standard */ 777 //| rp->r_todisstd = TRUE; 778 //| rp->r_todisgmt = FALSE; 779 //| case 'w': /* Wall */ 780 //| rp->r_todisstd = FALSE; 781 //| rp->r_todisgmt = FALSE; 782 //| case 'g': /* Greenwich */ 783 //| case 'u': /* Universal */ 784 //| case 'z': /* Zulu */ 785 //| rp->r_todisstd = TRUE; 786 //| rp->r_todisgmt = TRUE; 787 bool isstd; 788 bool isgmt; 789 790 bool isset; // used during building; later ignored 791 792 FinalRulePart() : isset(false) {} 793 void set(const string& id, 794 const string& _mode, 795 int32_t _month, 796 int32_t _dom, 797 int32_t _dow, 798 int32_t _time, 799 bool _isstd, 800 bool _isgmt, 801 int32_t _offset) { 802 if (isset) { 803 throw invalid_argument("FinalRulePart set twice"); 804 } 805 isset = true; 806 if (_mode == "DOWLEQ") { 807 mode = DOWLEQ; 808 } else if (_mode == "DOWGEQ") { 809 mode = DOWGEQ; 810 } else if (_mode == "DOM") { 811 mode = DOM; 812 } else { 813 throw invalid_argument("Unrecognized FinalRulePart mode"); 814 } 815 month = _month; 816 dom = _dom; 817 dow = _dow; 818 time = _time; 819 isstd = _isstd; 820 isgmt = _isgmt; 821 offset = _offset; 822 823 ostringstream os; 824 if (month < 0 || month >= 12) { 825 os << "Invalid input month " << month; 826 } 827 if (dom < 1 || dom > MONTH_LEN[month]) { 828 os << "Invalid input day of month " << dom; 829 } 830 if (mode != DOM && (dow < 0 || dow >= 7)) { 831 os << "Invalid input day of week " << dow; 832 } 833 if (offset < 0 || offset > (2 * HOUR)) { 834 os << "Invalid input offset " << offset; 835 } 836 if (isgmt && !isstd) { 837 os << "Invalid input isgmt && !isstd"; 838 } 839 if (!os.str().empty()) { 840 os << " for rule " 841 << id 842 << _mode 843 << month << dom << dow << time 844 << isstd << isgmt 845 << offset; 846 throw invalid_argument(os.str()); 847 } 848 } 849 850 /** 851 * Return the time mode as an ICU SimpleTimeZone int from 0..2; 852 * see simpletz.h. 853 */ 854 int32_t timemode() const { 855 if (isgmt) { 856 assert(isstd); 857 return 2; // gmt standard 858 } 859 if (isstd) { 860 return 1; // local standard 861 } 862 return 0; // local wall 863 } 864 865 // The SimpleTimeZone encoding method for rules is as follows: 866 // stz_dowim stz_dow 867 // DOM: dom 0 868 // DOWGEQ: dom -(dow+1) 869 // DOWLEQ: -dom -(dow+1) 870 // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2 871 // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2 872 // to encode 7, use stz_dowim=7, stz_dow=0 873 // Note that for this program and for SimpleTimeZone, 0==Jan, 874 // but for this program 0==Sun while for SimpleTimeZone 1==Sun. 875 876 /** 877 * Return a "dowim" param suitable for SimpleTimeZone. 878 */ 879 int32_t stz_dowim() const { 880 return (mode == DOWLEQ) ? -dom : dom; 881 } 882 883 /** 884 * Return a "dow" param suitable for SimpleTimeZone. 885 */ 886 int32_t stz_dow() const { 887 return (mode == DOM) ? 0 : -(dow+1); 888 } 889 }; 890 891 struct FinalRule { 892 FinalRulePart part[2]; 893 894 bool isset() const { 895 return part[0].isset && part[1].isset; 896 } 897 898 void print(ostream& os) const; 899 }; 900 901 map<string,FinalZone> finalZones; 902 map<string,FinalRule> finalRules; 903 904 map<string, set<string> > links; 905 map<string, string> reverseLinks; 906 map<string, string> linkSource; // id => "Olson link" or "ICU alias" 907 908 /** 909 * Predicate used to find FinalRule objects that do not have both 910 * sub-parts set (indicating an error in the input file). 911 */ 912 bool isNotSet(const pair<const string,FinalRule>& p) { 913 return !p.second.isset(); 914 } 915 916 /** 917 * Predicate used to find FinalZone objects that do not map to a known 918 * rule (indicating an error in the input file). 919 */ 920 bool mapsToUnknownRule(const pair<const string,FinalZone>& p) { 921 return finalRules.find(p.second.ruleid) == finalRules.end(); 922 } 923 924 /** 925 * This set is used to make sure each rule in finalRules is used at 926 * least once. First we populate it with all the rules from 927 * finalRules; then we remove all the rules referred to in 928 * finaleZones. 929 */ 930 set<string> ruleIDset; 931 932 void insertRuleID(const pair<string,FinalRule>& p) { 933 ruleIDset.insert(p.first); 934 } 935 936 void eraseRuleID(const pair<string,FinalZone>& p) { 937 ruleIDset.erase(p.second.ruleid); 938 } 939 940 /** 941 * Populate finalZones and finalRules from the given istream. 942 */ 943 void readFinalZonesAndRules(istream& in) { 944 945 for (;;) { 946 string token; 947 in >> token; 948 if (in.eof() || !in) { 949 break; 950 } else if (token == "zone") { 951 // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0) 952 string id, ruleid; 953 int32_t offset, year; 954 in >> id >> offset >> year >> ruleid; 955 consumeLine(in); 956 finalZones[id] = FinalZone(offset, year, ruleid); 957 } else if (token == "rule") { 958 // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600 959 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0 960 string id, mode; 961 int32_t month, dom, dow, time, offset; 962 bool isstd, isgmt; 963 in >> id >> mode >> month >> dom >> dow >> time >> isstd >> isgmt >> offset; 964 consumeLine(in); 965 FinalRule& fr = finalRules[id]; 966 int32_t p = fr.part[0].isset ? 1 : 0; 967 fr.part[p].set(id, mode, month, dom, dow, time, isstd, isgmt, offset); 968 } else if (token == "link") { 969 string fromid, toid; // fromid == "real" zone, toid == alias 970 in >> fromid >> toid; 971 // DO NOT consumeLine(in); 972 if (finalZones.find(toid) != finalZones.end()) { 973 throw invalid_argument("Bad link: `to' id is a \"real\" zone"); 974 } 975 976 links[fromid].insert(toid); 977 reverseLinks[toid] = fromid; 978 979 linkSource[fromid] = "Olson link"; 980 linkSource[toid] = "Olson link"; 981 } else if (token.length() > 0 && token[0] == '#') { 982 consumeLine(in); 983 } else { 984 throw invalid_argument("Unrecognized keyword"); 985 } 986 } 987 988 if (!in.eof() && !in) { 989 throw invalid_argument("Parse failure"); 990 } 991 992 // Perform validity check: Each rule should have data for 2 parts. 993 if (count_if(finalRules.begin(), finalRules.end(), isNotSet) != 0) { 994 throw invalid_argument("One or more incomplete rule pairs"); 995 } 996 997 // Perform validity check: Each zone should map to a known rule. 998 if (count_if(finalZones.begin(), finalZones.end(), mapsToUnknownRule) != 0) { 999 throw invalid_argument("One or more zones refers to an unknown rule"); 1000 } 1001 1002 // Perform validity check: Each rule should be referred to by a zone. 1003 ruleIDset.clear(); 1004 for_each(finalRules.begin(), finalRules.end(), insertRuleID); 1005 for_each(finalZones.begin(), finalZones.end(), eraseRuleID); 1006 if (ruleIDset.size() != 0) { 1007 throw invalid_argument("Unused rules"); 1008 } 1009 } 1010 1011 //-------------------------------------------------------------------- 1012 // Resource bundle output 1013 //-------------------------------------------------------------------- 1014 1015 // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT 1016 1017 void ZoneInfo::print(ostream& os, const string& id) const { 1018 // Implement compressed format #2: 1019 os << " /* " << id << " */ "; 1020 1021 if (aliasTo >= 0) { 1022 assert(aliases.size() == 0); 1023 os << ":int { " << aliasTo << " } "; // No endl - save room for comment. 1024 return; 1025 } 1026 1027 if (ICU44PLUS) { 1028 os << ":table {" << endl; 1029 } else { 1030 os << ":array {" << endl; 1031 } 1032 1033 vector<Transition>::const_iterator trn; 1034 vector<ZoneType>::const_iterator typ; 1035 1036 bool first; 1037 1038 if (ICU44PLUS) { 1039 trn = transitions.begin(); 1040 1041 // pre 32bit transitions 1042 if (trn != transitions.end() && trn->time < LOWEST_TIME32) { 1043 os << " transPre32:intvector { "; 1044 for (first = true; trn != transitions.end() && trn->time < LOWEST_TIME32; ++trn) { 1045 if (!first) { 1046 os<< ", "; 1047 } 1048 first = false; 1049 os << (int32_t)(trn->time >> 32) << ", " << (int32_t)(trn->time & 0x00000000ffffffff); 1050 } 1051 os << " }" << endl; 1052 } 1053 1054 // 32bit transtions 1055 if (trn != transitions.end() && trn->time < HIGHEST_TIME32) { 1056 os << " trans:intvector { "; 1057 for (first = true; trn != transitions.end() && trn->time < HIGHEST_TIME32; ++trn) { 1058 if (!first) { 1059 os << ", "; 1060 } 1061 first = false; 1062 os << trn->time; 1063 } 1064 os << " }" << endl; 1065 } 1066 1067 // post 32bit transitons 1068 if (trn != transitions.end()) { 1069 os << " transPost32:intvector { "; 1070 for (first = true; trn != transitions.end(); ++trn) { 1071 if (!first) { 1072 os<< ", "; 1073 } 1074 first = false; 1075 os << (int32_t)(trn->time >> 32) << ", " << (int32_t)(trn->time & 0x00000000ffffffff); 1076 } 1077 os << " }" << endl; 1078 } 1079 } else { 1080 os << " :intvector { "; 1081 for (trn = transitions.begin(), first = true; trn != transitions.end(); ++trn) { 1082 if (!first) os << ", "; 1083 first = false; 1084 os << trn->time; 1085 } 1086 os << " }" << endl; 1087 } 1088 1089 1090 first=true; 1091 if (ICU44PLUS) { 1092 os << " typeOffsets:intvector { "; 1093 } else { 1094 os << " :intvector { "; 1095 } 1096 for (typ = types.begin(); typ != types.end(); ++typ) { 1097 if (!first) os << ", "; 1098 first = false; 1099 os << typ->rawoffset << ", " << typ->dstoffset; 1100 } 1101 os << " }" << endl; 1102 1103 if (ICU44PLUS) { 1104 if (transitions.size() != 0) { 1105 os << " typeMap:bin { \"" << hex << setfill('0'); 1106 for (trn = transitions.begin(); trn != transitions.end(); ++trn) { 1107 os << setw(2) << trn->type; 1108 } 1109 os << dec << "\" }" << endl; 1110 } 1111 } else { 1112 os << " :bin { \"" << hex << setfill('0'); 1113 for (trn = transitions.begin(); trn != transitions.end(); ++trn) { 1114 os << setw(2) << trn->type; 1115 } 1116 os << dec << "\" }" << endl; 1117 } 1118 1119 // Final zone info, if any 1120 if (finalYear != -1) { 1121 if (ICU44PLUS) { 1122 os << " finalRule { \"" << finalRuleID << "\" }" << endl; 1123 os << " finalRaw:int { " << finalOffset << " }" << endl; 1124 os << " finalYear:int { " << finalYear << " }" << endl; 1125 } else { 1126 os << " \"" << finalRuleID << "\"" << endl; 1127 os << " :intvector { " << finalOffset << ", " 1128 << finalYear << " }" << endl; 1129 } 1130 } 1131 1132 // Alias list, if any 1133 if (aliases.size() != 0) { 1134 first = true; 1135 if (ICU44PLUS) { 1136 os << " links:intvector { "; 1137 } else { 1138 os << " :intvector { "; 1139 } 1140 for (set<int32_t>::const_iterator i=aliases.begin(); i!=aliases.end(); ++i) { 1141 if (!first) os << ", "; 1142 first = false; 1143 os << *i; 1144 } 1145 os << " }" << endl; 1146 } 1147 1148 os << " } "; // no trailing 'endl', so comments can be placed. 1149 } 1150 1151 inline ostream& 1152 operator<<(ostream& os, const ZoneMap& zoneinfo) { 1153 int32_t c = 0; 1154 for (ZoneMapIter it = zoneinfo.begin(); 1155 it != zoneinfo.end(); 1156 ++it) { 1157 if(c && !ICU44PLUS) os << ","; 1158 it->second.print(os, it->first); 1159 os << "//Z#" << c++ << endl; 1160 } 1161 return os; 1162 } 1163 1164 // print the string list 1165 ostream& printStringList( ostream& os, const ZoneMap& zoneinfo) { 1166 int32_t n = 0; // count 1167 int32_t col = 0; // column 1168 os << " Names {" << endl 1169 << " "; 1170 for (ZoneMapIter it = zoneinfo.begin(); 1171 it != zoneinfo.end(); 1172 ++it) { 1173 if(n) { 1174 os << ","; 1175 col ++; 1176 } 1177 const string& id = it->first; 1178 os << "\"" << id << "\""; 1179 col += id.length() + 2; 1180 if(col >= 50) { 1181 os << " // " << n << endl 1182 << " "; 1183 col = 0; 1184 } 1185 n++; 1186 } 1187 os << " // " << (n-1) << endl 1188 << " }" << endl; 1189 1190 return os; 1191 } 1192 1193 //-------------------------------------------------------------------- 1194 // main 1195 //-------------------------------------------------------------------- 1196 1197 // Unary predicate for finding transitions after a given time 1198 bool isAfter(const Transition t, int64_t thresh) { 1199 return t.time >= thresh; 1200 } 1201 1202 /** 1203 * A zone type that contains only the raw and dst offset. Used by the 1204 * optimizeTypeList() method. 1205 */ 1206 struct SimplifiedZoneType { 1207 int64_t rawoffset; 1208 int64_t dstoffset; 1209 SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {} 1210 SimplifiedZoneType(const ZoneType& t) : rawoffset(t.rawoffset), 1211 dstoffset(t.dstoffset) {} 1212 bool operator<(const SimplifiedZoneType& t) const { 1213 return rawoffset < t.rawoffset || 1214 (rawoffset == t.rawoffset && 1215 dstoffset < t.dstoffset); 1216 } 1217 }; 1218 1219 /** 1220 * Construct a ZoneType from a SimplifiedZoneType. Note that this 1221 * discards information; the new ZoneType will have meaningless 1222 * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate, 1223 * since ignoring these is how we do optimization (we have no use for 1224 * these in historical transitions). 1225 */ 1226 ZoneType::ZoneType(const SimplifiedZoneType& t) : 1227 rawoffset(t.rawoffset), dstoffset(t.dstoffset), 1228 abbr(-1), isdst(false), isstd(false), isgmt(false) {} 1229 1230 /** 1231 * Optimize the type list to remove excess entries. The type list may 1232 * contain entries that are distinct only in terms of their dst, std, 1233 * or gmt flags. Since we don't care about those flags, we can reduce 1234 * the type list to a set of unique raw/dst offset pairs, and remap 1235 * the type indices in the transition list, which stores, for each 1236 * transition, a transition time and a type index. 1237 */ 1238 void ZoneInfo::optimizeTypeList() { 1239 // Assemble set of unique types; only those in the `transitions' 1240 // list, since there may be unused types in the `types' list 1241 // corresponding to transitions that have been trimmed (during 1242 // merging of final data). 1243 1244 if (aliasTo >= 0) return; // Nothing to do for aliases 1245 1246 if (!ICU44PLUS) { 1247 // This is the old logic which has a bug, which occasionally removes 1248 // the type before the first transition. The problem was fixed 1249 // by inserting the dummy transition indirectly. 1250 1251 // If there are zero transitions and one type, then leave that as-is. 1252 if (transitions.size() == 0) { 1253 if (types.size() != 1) { 1254 cerr << "Error: transition count = 0, type count = " << types.size() << endl; 1255 } 1256 return; 1257 } 1258 1259 set<SimplifiedZoneType> simpleset; 1260 for (vector<Transition>::const_iterator i=transitions.begin(); 1261 i!=transitions.end(); ++i) { 1262 assert(i->type < (int32_t)types.size()); 1263 simpleset.insert(types[i->type]); 1264 } 1265 1266 // Map types to integer indices 1267 map<SimplifiedZoneType,int32_t> simplemap; 1268 int32_t n=0; 1269 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); 1270 i!=simpleset.end(); ++i) { 1271 simplemap[*i] = n++; 1272 } 1273 1274 // Remap transitions 1275 for (vector<Transition>::iterator i=transitions.begin(); 1276 i!=transitions.end(); ++i) { 1277 assert(i->type < (int32_t)types.size()); 1278 ZoneType oldtype = types[i->type]; 1279 SimplifiedZoneType newtype(oldtype); 1280 assert(simplemap.find(newtype) != simplemap.end()); 1281 i->type = simplemap[newtype]; 1282 } 1283 1284 // Replace type list 1285 types.clear(); 1286 copy(simpleset.begin(), simpleset.end(), back_inserter(types)); 1287 1288 } else { 1289 if (types.size() > 1) { 1290 // Note: localtime uses the very first non-dst type as initial offsets. 1291 // If all types are DSTs, the very first type is treated as the initial offsets. 1292 1293 // Decide a type used as the initial offsets. ICU put the type at index 0. 1294 ZoneType initialType = types[0]; 1295 for (vector<ZoneType>::const_iterator i=types.begin(); i!=types.end(); ++i) { 1296 if (i->dstoffset == 0) { 1297 initialType = *i; 1298 break; 1299 } 1300 } 1301 1302 SimplifiedZoneType initialSimplifiedType(initialType); 1303 1304 // create a set of unique types, but ignoring fields which we're not interested in 1305 set<SimplifiedZoneType> simpleset; 1306 simpleset.insert(initialSimplifiedType); 1307 for (vector<Transition>::const_iterator i=transitions.begin(); i!=transitions.end(); ++i) { 1308 assert(i->type < (int32_t)types.size()); 1309 simpleset.insert(types[i->type]); 1310 } 1311 1312 // Map types to integer indices, however, keeping the first type at offset 0 1313 map<SimplifiedZoneType,int32_t> simplemap; 1314 simplemap[initialSimplifiedType] = 0; 1315 int32_t n = 1; 1316 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); i!=simpleset.end(); ++i) { 1317 if (*i < initialSimplifiedType || initialSimplifiedType < *i) { 1318 simplemap[*i] = n++; 1319 } 1320 } 1321 1322 // Remap transitions 1323 for (vector<Transition>::iterator i=transitions.begin(); 1324 i!=transitions.end(); ++i) { 1325 assert(i->type < (int32_t)types.size()); 1326 ZoneType oldtype = types[i->type]; 1327 SimplifiedZoneType newtype(oldtype); 1328 assert(simplemap.find(newtype) != simplemap.end()); 1329 i->type = simplemap[newtype]; 1330 } 1331 1332 // Replace type list 1333 types.clear(); 1334 types.push_back(initialSimplifiedType); 1335 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); i!=simpleset.end(); ++i) { 1336 if (*i < initialSimplifiedType || initialSimplifiedType < *i) { 1337 types.push_back(*i); 1338 } 1339 } 1340 1341 // Reiterating transitions to remove any transitions which 1342 // do not actually change the raw/dst offsets 1343 int32_t prevTypeIdx = 0; 1344 for (vector<Transition>::iterator i=transitions.begin(); i!=transitions.end();) { 1345 if (i->type == prevTypeIdx) { 1346 // this is not a time transition, probably just name change 1347 // e.g. America/Resolute after 2006 in 2010b 1348 transitions.erase(i); 1349 } else { 1350 prevTypeIdx = i->type; 1351 i++; 1352 } 1353 } 1354 } 1355 } 1356 1357 } 1358 1359 /** 1360 * Merge final zone data into this zone. 1361 */ 1362 void ZoneInfo::mergeFinalData(const FinalZone& fz) { 1363 int32_t year = fz.year; 1364 int64_t seconds = yearToSeconds(year); 1365 1366 if (!ICU44PLUS) { 1367 if (seconds > HIGHEST_TIME32) { 1368 // Avoid transitions beyond signed 32bit max second. 1369 // This may result incorrect offset computation around 1370 // HIGHEST_TIME32. This is a limitation of ICU 1371 // before 4.4. 1372 seconds = HIGHEST_TIME32; 1373 } 1374 } 1375 1376 vector<Transition>::iterator it = 1377 find_if(transitions.begin(), transitions.end(), 1378 bind2nd(ptr_fun(isAfter), seconds)); 1379 transitions.erase(it, transitions.end()); 1380 1381 if (finalYear != -1) { 1382 throw invalid_argument("Final zone already merged in"); 1383 } 1384 finalYear = fz.year; 1385 finalOffset = fz.offset; 1386 finalRuleID = fz.ruleid; 1387 } 1388 1389 /** 1390 * Merge the data from the given final zone into the core zone data by 1391 * calling the ZoneInfo member function mergeFinalData. 1392 */ 1393 void mergeOne(const string& zoneid, const FinalZone& fz) { 1394 if (ZONEINFO.find(zoneid) == ZONEINFO.end()) { 1395 throw invalid_argument("Unrecognized final zone ID"); 1396 } 1397 ZONEINFO[zoneid].mergeFinalData(fz); 1398 } 1399 1400 /** 1401 * Visitor function that merges the final zone data into the main zone 1402 * data structures. It calls mergeOne for each final zone and its 1403 * list of aliases. 1404 */ 1405 void mergeFinalZone(const pair<string,FinalZone>& p) { 1406 const string& id = p.first; 1407 const FinalZone& fz = p.second; 1408 1409 mergeOne(id, fz); 1410 } 1411 1412 /** 1413 * Print this rule in resource bundle format to os. ID and enclosing 1414 * braces handled elsewhere. 1415 */ 1416 void FinalRule::print(ostream& os) const { 1417 // First print the rule part that enters DST; then the rule part 1418 // that exits it. 1419 int32_t whichpart = (part[0].offset != 0) ? 0 : 1; 1420 assert(part[whichpart].offset != 0); 1421 assert(part[1-whichpart].offset == 0); 1422 1423 os << " "; 1424 for (int32_t i=0; i<2; ++i) { 1425 const FinalRulePart& p = part[whichpart]; 1426 whichpart = 1-whichpart; 1427 os << p.month << ", " << p.stz_dowim() << ", " << p.stz_dow() << ", " 1428 << p.time << ", " << p.timemode() << ", "; 1429 } 1430 os << part[whichpart].offset << endl; 1431 } 1432 1433 int main(int argc, char *argv[]) { 1434 string rootpath, zonetab, version; 1435 bool validArgs = FALSE; 1436 1437 if (argc == 4 || argc == 5) { 1438 validArgs = TRUE; 1439 rootpath = argv[1]; 1440 zonetab = argv[2]; 1441 version = argv[3]; 1442 if (argc == 5) { 1443 if (strcmp(argv[4], "--old") == 0) { 1444 ICU44PLUS = FALSE; 1445 TZ_RESOURCE_NAME = ICU_TZ_RESOURCE_OLD; 1446 } else { 1447 validArgs = FALSE; 1448 } 1449 } 1450 } 1451 if (!validArgs) { 1452 cout << "Usage: tz2icu <dir> <cmap> <tzver> [--old]" << endl 1453 << " <dir> path to zoneinfo file tree generated by" << endl 1454 << " ICU-patched version of zic" << endl 1455 << " <cmap> country map, from tzdata archive," << endl 1456 << " typically named \"zone.tab\"" << endl 1457 << " <tzver> version string, such as \"2003e\"" << endl 1458 << " --old generating resource format before ICU4.4" << endl; 1459 exit(1); 1460 } 1461 1462 cout << "Olson data version: " << version << endl; 1463 cout << "ICU 4.4+ format: " << (ICU44PLUS ? "Yes" : "No") << endl; 1464 1465 try { 1466 ifstream finals(ICU_ZONE_FILE); 1467 if (finals) { 1468 readFinalZonesAndRules(finals); 1469 1470 cout << "Finished reading " << finalZones.size() 1471 << " final zones and " << finalRules.size() 1472 << " final rules from " ICU_ZONE_FILE << endl; 1473 } else { 1474 cerr << "Error: Unable to open " ICU_ZONE_FILE << endl; 1475 return 1; 1476 } 1477 } catch (const exception& error) { 1478 cerr << "Error: While reading " ICU_ZONE_FILE ": " << error.what() << endl; 1479 return 1; 1480 } 1481 1482 try { 1483 // Recursively scan all files below the given path, accumulating 1484 // their data into ZONEINFO. All files must be TZif files. Any 1485 // failure along the way will result in a call to exit(1). 1486 scandir(rootpath); 1487 } catch (const exception& error) { 1488 cerr << "Error: While scanning " << rootpath << ": " << error.what() << endl; 1489 return 1; 1490 } 1491 1492 cout << "Finished reading " << ZONEINFO.size() << " zoneinfo files [" 1493 << (ZONEINFO.begin())->first << ".." 1494 << (--ZONEINFO.end())->first << "]" << endl; 1495 1496 try { 1497 for_each(finalZones.begin(), finalZones.end(), mergeFinalZone); 1498 } catch (const exception& error) { 1499 cerr << "Error: While merging final zone data: " << error.what() << endl; 1500 return 1; 1501 } 1502 1503 // Process links (including ICU aliases). For each link set we have 1504 // a canonical ID (e.g., America/Los_Angeles) and a set of one or more 1505 // aliases (e.g., PST, PST8PDT, ...). 1506 1507 // 1. Add all aliases as zone objects in ZONEINFO 1508 for (map<string,set<string> >::const_iterator i = links.begin(); 1509 i!=links.end(); ++i) { 1510 const string& olson = i->first; 1511 const set<string>& aliases = i->second; 1512 if (ZONEINFO.find(olson) == ZONEINFO.end()) { 1513 cerr << "Error: Invalid " << linkSource[olson] << " to non-existent \"" 1514 << olson << "\"" << endl; 1515 return 1; 1516 } 1517 for (set<string>::const_iterator j=aliases.begin(); 1518 j!=aliases.end(); ++j) { 1519 ZONEINFO[*j] = ZoneInfo(); 1520 } 1521 } 1522 1523 // 2. Create a mapping from zones to index numbers 0..n-1. 1524 map<string,int32_t> zoneIDs; 1525 vector<string> zoneIDlist; 1526 int32_t z=0; 1527 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1528 zoneIDs[i->first] = z++; 1529 zoneIDlist.push_back(i->first); 1530 } 1531 assert(z == (int32_t) ZONEINFO.size()); 1532 1533 // 3. Merge aliases. Sometimes aliases link to other aliases; we 1534 // resolve these into simplest possible sets. 1535 map<string,set<string> > links2; 1536 map<string,string> reverse2; 1537 for (map<string,set<string> >::const_iterator i = links.begin(); 1538 i!=links.end(); ++i) { 1539 string olson = i->first; 1540 while (reverseLinks.find(olson) != reverseLinks.end()) { 1541 olson = reverseLinks[olson]; 1542 } 1543 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) { 1544 links2[olson].insert(*j); 1545 reverse2[*j] = olson; 1546 } 1547 } 1548 links = links2; 1549 reverseLinks = reverse2; 1550 1551 if (false) { // Debugging: Emit link map 1552 for (map<string,set<string> >::const_iterator i = links.begin(); 1553 i!=links.end(); ++i) { 1554 cout << i->first << ": "; 1555 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) { 1556 cout << *j << ", "; 1557 } 1558 cout << endl; 1559 } 1560 } 1561 1562 // 4. Update aliases 1563 for (map<string,set<string> >::const_iterator i = links.begin(); 1564 i!=links.end(); ++i) { 1565 const string& olson = i->first; 1566 const set<string>& aliases = i->second; 1567 ZONEINFO[olson].clearAliases(); 1568 ZONEINFO[olson].addAlias(zoneIDs[olson]); 1569 for (set<string>::const_iterator j=aliases.begin(); 1570 j!=aliases.end(); ++j) { 1571 assert(zoneIDs.find(olson) != zoneIDs.end()); 1572 assert(zoneIDs.find(*j) != zoneIDs.end()); 1573 assert(ZONEINFO.find(*j) != ZONEINFO.end()); 1574 ZONEINFO[*j].setAliasTo(zoneIDs[olson]); 1575 ZONEINFO[olson].addAlias(zoneIDs[*j]); 1576 } 1577 } 1578 1579 // Once merging of final data is complete, we can optimize the type list 1580 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1581 i->second.optimizeTypeList(); 1582 } 1583 1584 // Create the country map 1585 map<string, string> icuRegions; // ICU's custom zone -> country override 1586 map<string, set<string> > countryMap; // country -> set of zones 1587 map<string, string> reverseCountryMap; // zone -> country 1588 1589 try { 1590 // Read icuregions file to collect ICU's own zone-region mapping data. 1591 ifstream frg(ICU_REGIONS); 1592 if (frg) { 1593 string line; 1594 while (getline(frg, line)) { 1595 if (line[0] == '#') continue; 1596 1597 string zone, country; 1598 istringstream is(line); 1599 is >> zone >> country; 1600 if (zone.size() == 0) continue; 1601 if (country.size() < 2) { 1602 cerr << "Error: Can't parse " << line << " in " << ICU_REGIONS << endl; 1603 return 1; 1604 } 1605 icuRegions[zone] = country; 1606 } 1607 } else { 1608 cout << "No custom region map [icuregions]" << endl; 1609 } 1610 } catch (const exception& error) { 1611 cerr << "Error: While reading " << ICU_REGIONS << ": " << error.what() << endl; 1612 return 1; 1613 } 1614 1615 try { 1616 ifstream f(zonetab.c_str()); 1617 if (!f) { 1618 cerr << "Error: Unable to open " << zonetab << endl; 1619 return 1; 1620 } 1621 int32_t n = 0; 1622 string line; 1623 while (getline(f, line)) { 1624 string::size_type lb = line.find('#'); 1625 if (lb != string::npos) { 1626 line.resize(lb); // trim comments 1627 } 1628 string country, coord, zone; 1629 istringstream is(line); 1630 is >> country >> coord >> zone; 1631 if (country.size() == 0) continue; 1632 if (country.size() != 2 || zone.size() < 1) { 1633 cerr << "Error: Can't parse " << line << " in " << zonetab << endl; 1634 return 1; 1635 } 1636 if (ZONEINFO.find(zone) == ZONEINFO.end()) { 1637 cerr << "Error: Country maps to invalid zone " << zone 1638 << " in " << zonetab << endl; 1639 return 1; 1640 } 1641 if (icuRegions.find(zone) != icuRegions.end()) { 1642 // Custom override 1643 string customCountry = icuRegions[zone]; 1644 cout << "Region Mapping: custom override for " << zone 1645 << " " << country << " -> " << customCountry << endl; 1646 country = customCountry; 1647 } 1648 countryMap[country].insert(zone); 1649 reverseCountryMap[zone] = country; 1650 //cerr << (n+1) << ": " << country << " <=> " << zone << endl; 1651 ++n; 1652 } 1653 cout << "Finished reading " << n 1654 << " country entries from " << zonetab << endl; 1655 } catch (const exception& error) { 1656 cerr << "Error: While reading " << zonetab << ": " << error.what() << endl; 1657 return 1; 1658 } 1659 1660 // Merge ICU's own zone-region mapping data 1661 for (map<string,string>::const_iterator i = icuRegions.begin(); 1662 i != icuRegions.end(); ++i) { 1663 const string& zid(i->first); 1664 if (reverseCountryMap.find(zid) != reverseCountryMap.end()) { 1665 continue; 1666 } 1667 cout << "Region Mapping: custom data zone=" << zid 1668 << ", region=" << i->second << endl; 1669 1670 reverseCountryMap[zid] = i->second; 1671 countryMap[i->second].insert(zid); 1672 } 1673 1674 // Merge ICU aliases into country map. Don't merge any alias 1675 // that already has a country map, since that doesn't make sense. 1676 // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we 1677 // should cross-map the countries between these two zones. 1678 for (map<string,set<string> >::const_iterator i = links.begin(); 1679 i!=links.end(); ++i) { 1680 const string& olson(i->first); 1681 if (reverseCountryMap.find(olson) == reverseCountryMap.end()) { 1682 continue; 1683 } 1684 string c = reverseCountryMap[olson]; 1685 const set<string>& aliases(i->second); 1686 for (set<string>::const_iterator j=aliases.begin(); 1687 j != aliases.end(); ++j) { 1688 if (reverseCountryMap.find(*j) == reverseCountryMap.end()) { 1689 countryMap[c].insert(*j); 1690 reverseCountryMap[*j] = c; 1691 //cerr << "Aliased country: " << c << " <=> " << *j << endl; 1692 } 1693 } 1694 } 1695 1696 // Create a pseudo-country containing all zones belonging to no country 1697 set<string> nocountry; 1698 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1699 if (reverseCountryMap.find(i->first) == reverseCountryMap.end()) { 1700 nocountry.insert(i->first); 1701 } 1702 } 1703 countryMap[""] = nocountry; 1704 1705 // Get local time & year for below 1706 time_t sec; 1707 time(&sec); 1708 struct tm* now = localtime(&sec); 1709 int32_t thisYear = now->tm_year + 1900; 1710 1711 string filename = TZ_RESOURCE_NAME + ".txt"; 1712 // Write out a resource-bundle source file containing data for 1713 // all zones. 1714 ofstream file(filename.c_str()); 1715 if (file) { 1716 file << "//---------------------------------------------------------" << endl 1717 << "// Copyright (C) 2016 and later: Unicode, Inc. and others." << endl 1718 << "// License & terms of use: http://www.unicode.org/copyright.html#License" << endl 1719 << "//---------------------------------------------------------" << endl 1720 << "// Build tool: tz2icu" << endl 1721 << "// Build date: " << asctime(now) /* << endl -- asctime emits CR */ 1722 << "// tz database: ftp://ftp.iana.org/tz/" << endl 1723 << "// tz version: " << version << endl 1724 << "// ICU version: " << U_ICU_VERSION << endl 1725 << "//---------------------------------------------------------" << endl 1726 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl 1727 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl 1728 << "//---------------------------------------------------------" << endl 1729 << endl 1730 << TZ_RESOURCE_NAME << ":table(nofallback) {" << endl 1731 << " TZVersion { \"" << version << "\" }" << endl 1732 << " Zones:array { " << endl 1733 << ZONEINFO // Zones (the actual data) 1734 << " }" << endl; 1735 1736 // Names correspond to the Zones list, used for binary searching. 1737 printStringList ( file, ZONEINFO ); // print the Names list 1738 1739 // Final Rules are used if requested by the zone 1740 file << " Rules { " << endl; 1741 // Emit final rules 1742 int32_t frc = 0; 1743 for(map<string,FinalRule>::iterator i=finalRules.begin(); 1744 i!=finalRules.end(); ++i) { 1745 const string& id = i->first; 1746 const FinalRule& r = i->second; 1747 file << " " << id << ":intvector {" << endl; 1748 r.print(file); 1749 file << " } //_#" << frc++ << endl; 1750 } 1751 file << " }" << endl; 1752 1753 // Emit country (region) map. 1754 if (ICU44PLUS) { 1755 file << " Regions:array {" << endl; 1756 int32_t zn = 0; 1757 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1758 map<string, string>::iterator cit = reverseCountryMap.find(i->first); 1759 if (cit == reverseCountryMap.end()) { 1760 file << " \"001\","; 1761 } else { 1762 file << " \"" << cit->second << "\", "; 1763 } 1764 file << "//Z#" << zn++ << " " << i->first << endl; 1765 } 1766 file << " }" << endl; 1767 } else { 1768 file << " Regions { " << endl; 1769 int32_t rc = 0; 1770 for (map<string, set<string> >::const_iterator i=countryMap.begin(); 1771 i != countryMap.end(); ++i) { 1772 string country = i->first; 1773 const set<string>& zones(i->second); 1774 file << " "; 1775 if(country[0]==0) { 1776 file << "Default"; 1777 } 1778 file << country << ":intvector { "; 1779 bool first = true; 1780 for (set<string>::const_iterator j=zones.begin(); 1781 j != zones.end(); ++j) { 1782 if (!first) file << ", "; 1783 first = false; 1784 if (zoneIDs.find(*j) == zoneIDs.end()) { 1785 cerr << "Error: Nonexistent zone in country map: " << *j << endl; 1786 return 1; 1787 } 1788 file << zoneIDs[*j]; // emit the zone's index number 1789 } 1790 file << " } //R#" << rc++ << endl; 1791 } 1792 file << " }" << endl; 1793 } 1794 1795 file << "}" << endl; 1796 } 1797 1798 file.close(); 1799 1800 if (file) { // recheck error bit 1801 cout << "Finished writing " << TZ_RESOURCE_NAME << ".txt" << endl; 1802 } else { 1803 cerr << "Error: Unable to open/write to " << TZ_RESOURCE_NAME << ".txt" << endl; 1804 return 1; 1805 } 1806 } 1807 //eof 1808