1 2 /* 3 ********************************************************************** 4 * Copyright (c) 2003-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 ********************************************************************** 7 * Author: Alan Liu 8 * Created: July 10 2003 9 * Since: ICU 2.8 10 ********************************************************************** 11 */ 12 #include "tzfile.h" // from Olson tzcode archive, copied to this dir 13 14 #ifdef WIN32 15 16 #include <windows.h> 17 #undef min // windows.h/STL conflict 18 #undef max // windows.h/STL conflict 19 // "identifier was truncated to 'number' characters" warning 20 #pragma warning(disable: 4786) 21 22 #else 23 24 #include <unistd.h> 25 #include <stdio.h> 26 #include <dirent.h> 27 #include <string.h> 28 #include <sys/stat.h> 29 30 #endif 31 32 #include <algorithm> 33 #include <cassert> 34 #include <ctime> 35 #include <fstream> 36 #include <iomanip> 37 #include <iostream> 38 #include <iterator> 39 #include <limits> 40 #include <map> 41 #include <set> 42 #include <sstream> 43 #include <sstream> 44 #include <stdexcept> 45 #include <string> 46 #include <vector> 47 48 #include "tz2icu.h" 49 #include "unicode/uversion.h" 50 51 #define USE64BITDATA 52 53 using namespace std; 54 55 //-------------------------------------------------------------------- 56 // Time utilities 57 //-------------------------------------------------------------------- 58 59 const int64_t SECS_PER_YEAR = 31536000; // 365 days 60 const int64_t SECS_PER_LEAP_YEAR = 31622400; // 366 days 61 const int64_t LOWEST_TIME32 = (int64_t)((int32_t)0x80000000); 62 const int64_t HIGHEST_TIME32 = (int64_t)((int32_t)0x7fffffff); 63 64 bool isLeap(int32_t y) { 65 return (y%4 == 0) && ((y%100 != 0) || (y%400 == 0)); // Gregorian 66 } 67 68 int64_t secsPerYear(int32_t y) { 69 return isLeap(y) ? SECS_PER_LEAP_YEAR : SECS_PER_YEAR; 70 } 71 72 /** 73 * Given a calendar year, return the GMT epoch seconds for midnight 74 * GMT of January 1 of that year. yearToSeconds(1970) == 0. 75 */ 76 int64_t yearToSeconds(int32_t year) { 77 // inefficient but foolproof 78 int64_t s = 0; 79 int32_t y = 1970; 80 while (y < year) { 81 s += secsPerYear(y++); 82 } 83 while (y > year) { 84 s -= secsPerYear(--y); 85 } 86 return s; 87 } 88 89 /** 90 * Given 1970 GMT epoch seconds, return the calendar year containing 91 * that time. secondsToYear(0) == 1970. 92 */ 93 int32_t secondsToYear(int64_t seconds) { 94 // inefficient but foolproof 95 int32_t y = 1970; 96 int64_t s = 0; 97 if (seconds >= 0) { 98 for (;;) { 99 s += secsPerYear(y++); 100 if (s > seconds) break; 101 } 102 --y; 103 } else { 104 for (;;) { 105 s -= secsPerYear(--y); 106 if (s <= seconds) break; 107 } 108 } 109 return y; 110 } 111 112 //-------------------------------------------------------------------- 113 // Types 114 //-------------------------------------------------------------------- 115 116 struct FinalZone; 117 struct FinalRule; 118 struct SimplifiedZoneType; 119 120 // A transition from one ZoneType to another 121 // Minimal size = 5 bytes (4+1) 122 struct Transition { 123 int64_t time; // seconds, 1970 epoch 124 int32_t type; // index into 'ZoneInfo.types' 0..255 125 Transition(int64_t _time, int32_t _type) { 126 time = _time; 127 type = _type; 128 } 129 }; 130 131 // A behavior mode (what zic calls a 'type') of a time zone. 132 // Minimal size = 6 bytes (4+1+3bits) 133 // SEE: SimplifiedZoneType 134 struct ZoneType { 135 int64_t rawoffset; // raw seconds offset from GMT 136 int64_t dstoffset; // dst seconds offset from GMT 137 138 // We don't really need any of the following, but they are 139 // retained for possible future use. See SimplifiedZoneType. 140 int32_t abbr; // index into ZoneInfo.abbrs 0..n-1 141 bool isdst; 142 bool isstd; 143 bool isgmt; 144 145 ZoneType(const SimplifiedZoneType&); // used by optimizeTypeList 146 147 ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {} 148 149 // A restricted equality, of just the raw and dst offset 150 bool matches(const ZoneType& other) { 151 return rawoffset == other.rawoffset && 152 dstoffset == other.dstoffset; 153 } 154 }; 155 156 // A collection of transitions from one ZoneType to another, together 157 // with a list of the ZoneTypes. A ZoneInfo object may have a long 158 // list of transitions between a smaller list of ZoneTypes. 159 // 160 // This object represents the contents of a single zic-created 161 // zoneinfo file. 162 struct ZoneInfo { 163 vector<Transition> transitions; 164 vector<ZoneType> types; 165 vector<string> abbrs; 166 167 string finalRuleID; 168 int32_t finalOffset; 169 int32_t finalYear; // -1 if none 170 171 // If this is an alias, then all other fields are meaningless, and 172 // this field will point to the "real" zone 0..n-1. 173 int32_t aliasTo; // -1 if this is a "real" zone 174 175 // If there are aliases TO this zone, then the following set will 176 // contain their index numbers (each index >= 0). 177 set<int32_t> aliases; 178 179 ZoneInfo() : finalYear(-1), aliasTo(-1) {} 180 181 void mergeFinalData(const FinalZone& fz); 182 183 void optimizeTypeList(); 184 185 // Set this zone to be an alias TO another zone. 186 void setAliasTo(int32_t index); 187 188 // Clear the list of aliases OF this zone. 189 void clearAliases(); 190 191 // Add an alias to the list of aliases OF this zone. 192 void addAlias(int32_t index); 193 194 // Is this an alias to another zone? 195 bool isAlias() const { 196 return aliasTo >= 0; 197 } 198 199 // Retrieve alias list 200 const set<int32_t>& getAliases() const { 201 return aliases; 202 } 203 204 void print(ostream& os, const string& id) const; 205 }; 206 207 void ZoneInfo::clearAliases() { 208 assert(aliasTo < 0); 209 aliases.clear(); 210 } 211 212 void ZoneInfo::addAlias(int32_t index) { 213 assert(aliasTo < 0 && index >= 0 && aliases.find(index) == aliases.end()); 214 aliases.insert(index); 215 } 216 217 void ZoneInfo::setAliasTo(int32_t index) { 218 assert(index >= 0); 219 assert(aliases.size() == 0); 220 aliasTo = index; 221 } 222 223 typedef map<string, ZoneInfo> ZoneMap; 224 225 typedef ZoneMap::const_iterator ZoneMapIter; 226 227 //-------------------------------------------------------------------- 228 // ZONEINFO 229 //-------------------------------------------------------------------- 230 231 // Global map holding all our ZoneInfo objects, indexed by id. 232 ZoneMap ZONEINFO; 233 234 //-------------------------------------------------------------------- 235 // zoneinfo file parsing 236 //-------------------------------------------------------------------- 237 238 // Read zic-coded 32-bit integer from file 239 int64_t readcoded(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(), 240 int64_t maxv=numeric_limits<int64_t>::max()) { 241 unsigned char buf[4]; // must be UNSIGNED 242 int64_t val=0; 243 file.read((char*)buf, 4); 244 for(int32_t i=0,shift=24;i<4;++i,shift-=8) { 245 val |= buf[i] << shift; 246 } 247 if (val < minv || val > maxv) { 248 ostringstream os; 249 os << "coded value out-of-range: " << val << ", expected [" 250 << minv << ", " << maxv << "]"; 251 throw out_of_range(os.str()); 252 } 253 return val; 254 } 255 256 // Read zic-coded 64-bit integer from file 257 int64_t readcoded64(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(), 258 int64_t maxv=numeric_limits<int64_t>::max()) { 259 unsigned char buf[8]; // must be UNSIGNED 260 int64_t val=0; 261 file.read((char*)buf, 8); 262 for(int32_t i=0,shift=56;i<8;++i,shift-=8) { 263 val |= (int64_t)buf[i] << shift; 264 } 265 if (val < minv || val > maxv) { 266 ostringstream os; 267 os << "coded value out-of-range: " << val << ", expected [" 268 << minv << ", " << maxv << "]"; 269 throw out_of_range(os.str()); 270 } 271 return val; 272 } 273 274 // Read a boolean value 275 bool readbool(ifstream& file) { 276 char c; 277 file.read(&c, 1); 278 if (c!=0 && c!=1) { 279 ostringstream os; 280 os << "boolean value out-of-range: " << (int32_t)c; 281 throw out_of_range(os.str()); 282 } 283 return (c!=0); 284 } 285 286 /** 287 * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo 288 * @param file an already-open file stream 289 */ 290 void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData=false) { 291 int32_t i; 292 293 // Check for TZ_ICU_MAGIC signature at file start. If we get a 294 // signature mismatch, it means we're trying to read a file which 295 // isn't a ICU-modified-zic-created zoneinfo file. Typically this 296 // means the user is passing in a "normal" zoneinfo directory, or 297 // a zoneinfo directory that is polluted with other files, or that 298 // the user passed in the wrong directory. 299 char buf[32]; 300 file.read(buf, 4); 301 if (strncmp(buf, TZ_ICU_MAGIC, 4) != 0) { 302 throw invalid_argument("TZ_ICU_MAGIC signature missing"); 303 } 304 // skip additional Olson byte version 305 file.read(buf, 1); 306 // if '\0', we have just one copy of data, if '2', there is additional 307 // 64 bit version at the end. 308 if(buf[0]!=0 && buf[0]!='2') { 309 throw invalid_argument("Bad Olson version info"); 310 } 311 312 // Read reserved bytes. The first of these will be a version byte. 313 file.read(buf, 15); 314 if (*(ICUZoneinfoVersion*)&buf != TZ_ICU_VERSION) { 315 throw invalid_argument("File version mismatch"); 316 } 317 318 // Read array sizes 319 int64_t isgmtcnt = readcoded(file, 0); 320 int64_t isdstcnt = readcoded(file, 0); 321 int64_t leapcnt = readcoded(file, 0); 322 int64_t timecnt = readcoded(file, 0); 323 int64_t typecnt = readcoded(file, 0); 324 int64_t charcnt = readcoded(file, 0); 325 326 // Confirm sizes that we assume to be equal. These assumptions 327 // are drawn from a reading of the zic source (2003a), so they 328 // should hold unless the zic source changes. 329 if (isgmtcnt != typecnt || isdstcnt != typecnt) { 330 throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisdstcnt, tth_typecnt"); 331 } 332 333 // Used temporarily to store transition times and types. We need 334 // to do this because the times and types are stored in two 335 // separate arrays. 336 vector<int64_t> transitionTimes(timecnt, -1); // temporary 337 vector<int32_t> transitionTypes(timecnt, -1); // temporary 338 339 // Read transition times 340 for (i=0; i<timecnt; ++i) { 341 if (is64bitData) { 342 transitionTimes[i] = readcoded64(file); 343 } else { 344 transitionTimes[i] = readcoded(file); 345 } 346 } 347 348 // Read transition types 349 for (i=0; i<timecnt; ++i) { 350 unsigned char c; 351 file.read((char*) &c, 1); 352 int32_t t = (int32_t) c; 353 if (t < 0 || t >= typecnt) { 354 ostringstream os; 355 os << "illegal type: " << t << ", expected [0, " << (typecnt-1) << "]"; 356 throw out_of_range(os.str()); 357 } 358 transitionTypes[i] = t; 359 } 360 361 // Build transitions vector out of corresponding times and types. 362 bool insertInitial = false; 363 if (is64bitData) { 364 if (timecnt > 0) { 365 int32_t minidx = -1; 366 for (i=0; i<timecnt; ++i) { 367 if (transitionTimes[i] < LOWEST_TIME32) { 368 if (minidx == -1 || transitionTimes[i] > transitionTimes[minidx]) { 369 // Preserve the latest transition before the 32bit minimum time 370 minidx = i; 371 } 372 } else if (transitionTimes[i] > HIGHEST_TIME32) { 373 // Skipping the rest of the transition data. We cannot put such 374 // transitions into zoneinfo.res, because data is limited to singed 375 // 32bit int by the ICU resource bundle. 376 break; 377 } else { 378 info.transitions.push_back(Transition(transitionTimes[i], transitionTypes[i])); 379 } 380 } 381 382 if (minidx != -1) { 383 // If there are any transitions before the 32bit minimum time, 384 // put the type information with the 32bit minimum time 385 vector<Transition>::iterator itr = info.transitions.begin(); 386 info.transitions.insert(itr, Transition(LOWEST_TIME32, transitionTypes[minidx])); 387 } else { 388 // Otherwise, we need insert the initial type later 389 insertInitial = true; 390 } 391 } 392 } else { 393 for (i=0; i<timecnt; ++i) { 394 info.transitions.push_back(Transition(transitionTimes[i], transitionTypes[i])); 395 } 396 } 397 398 // Read types (except for the isdst and isgmt flags, which come later (why??)) 399 for (i=0; i<typecnt; ++i) { 400 ZoneType type; 401 402 type.rawoffset = readcoded(file); 403 type.dstoffset = readcoded(file); 404 type.isdst = readbool(file); 405 406 unsigned char c; 407 file.read((char*) &c, 1); 408 type.abbr = (int32_t) c; 409 410 if (type.isdst != (type.dstoffset != 0)) { 411 throw invalid_argument("isdst does not reflect dstoffset"); 412 } 413 414 info.types.push_back(type); 415 } 416 417 assert(info.types.size() == (unsigned) typecnt); 418 419 if (insertInitial) { 420 assert(timecnt > 0); 421 assert(typecnt > 0); 422 423 int32_t initialTypeIdx = -1; 424 425 // Check if the first type is not dst 426 if (info.types.at(0).dstoffset != 0) { 427 // Initial type's rawoffset is same with the rawoffset after the 428 // first transition, but no DST is observed. 429 int64_t rawoffset0 = (info.types.at(info.transitions.at(0).type)).rawoffset; 430 // Look for matching type 431 for (i=0; i<(int32_t)info.types.size(); ++i) { 432 if (info.types.at(i).rawoffset == rawoffset0 433 && info.types.at(i).dstoffset == 0) { 434 initialTypeIdx = i; 435 break; 436 } 437 } 438 } else { 439 initialTypeIdx = 0; 440 } 441 assert(initialTypeIdx >= 0); 442 // Add the initial type associated with the lowest int32 time 443 vector<Transition>::iterator itr = info.transitions.begin(); 444 info.transitions.insert(itr, Transition(LOWEST_TIME32, initialTypeIdx)); 445 } 446 447 448 // Read the abbreviation string 449 if (charcnt) { 450 // All abbreviations are concatenated together, with a 0 at 451 // the end of each abbr. 452 char* str = new char[charcnt + 8]; 453 file.read(str, charcnt); 454 455 // Split abbreviations apart into individual strings. Record 456 // offset of each abbr in a vector. 457 vector<int32_t> abbroffset; 458 char *limit=str+charcnt; 459 for (char* p=str; p<limit; ++p) { 460 char* start = p; 461 while (*p != 0) ++p; 462 info.abbrs.push_back(string(start, p-start)); 463 abbroffset.push_back(start-str); 464 } 465 466 // Remap all the abbrs. Old value is offset into concatenated 467 // raw abbr strings. New value is index into vector of 468 // strings. E.g., 0,5,10,14 => 0,1,2,3. 469 470 // Keep track of which abbreviations get used. 471 vector<bool> abbrseen(abbroffset.size(), false); 472 473 for (vector<ZoneType>::iterator it=info.types.begin(); 474 it!=info.types.end(); 475 ++it) { 476 vector<int32_t>::const_iterator x= 477 find(abbroffset.begin(), abbroffset.end(), it->abbr); 478 if (x==abbroffset.end()) { 479 // TODO: Modify code to add a new string to the end of 480 // the abbr list when a middle offset is given, e.g., 481 // "abc*def*" where * == '\0', take offset of 1 and 482 // make the array "abc", "def", "bc", and translate 1 483 // => 2. NOT CRITICAL since we don't even use the 484 // abbr at this time. 485 #if 0 486 // TODO: Re-enable this warning if we start using 487 // the Olson abbr data, or if the above TODO is completed. 488 ostringstream os; 489 os << "Warning: unusual abbr offset " << it->abbr 490 << ", expected one of"; 491 for (vector<int32_t>::const_iterator y=abbroffset.begin(); 492 y!=abbroffset.end(); ++y) { 493 os << ' ' << *y; 494 } 495 cerr << os.str() << "; using 0" << endl; 496 #endif 497 it->abbr = 0; 498 } else { 499 int32_t index = x - abbroffset.begin(); 500 it->abbr = index; 501 abbrseen[index] = true; 502 } 503 } 504 505 for (int32_t ii=0;ii<(int32_t) abbrseen.size();++ii) { 506 if (!abbrseen[ii]) { 507 cerr << "Warning: unused abbreviation: " << ii << endl; 508 } 509 } 510 } 511 512 // Read leap second info, if any. 513 // *** We discard leap second data. *** 514 for (i=0; i<leapcnt; ++i) { 515 readcoded(file); // transition time 516 readcoded(file); // total correction after above 517 } 518 519 // Read isstd flags 520 for (i=0; i<typecnt; ++i) info.types[i].isstd = readbool(file); 521 522 // Read isgmt flags 523 for (i=0; i<typecnt; ++i) info.types[i].isgmt = readbool(file); 524 } 525 526 //-------------------------------------------------------------------- 527 // Directory and file reading 528 //-------------------------------------------------------------------- 529 530 /** 531 * Process a single zoneinfo file, adding the data to ZONEINFO 532 * @param path the full path to the file, e.g., ".\zoneinfo\America\Los_Angeles" 533 * @param id the zone ID, e.g., "America/Los_Angeles" 534 */ 535 void handleFile(string path, string id) { 536 // Check for duplicate id 537 if (ZONEINFO.find(id) != ZONEINFO.end()) { 538 ostringstream os; 539 os << "duplicate zone ID: " << id; 540 throw invalid_argument(os.str()); 541 } 542 543 ifstream file(path.c_str(), ios::in | ios::binary); 544 if (!file) { 545 throw invalid_argument("can't open file"); 546 } 547 548 ZoneInfo info; 549 readzoneinfo(file, info); 550 551 // Check for errors 552 if (!file) { 553 throw invalid_argument("read error"); 554 } 555 556 #ifdef USE64BITDATA 557 ZoneInfo info64; 558 readzoneinfo(file, info64, true); 559 560 bool alldone = false; 561 int64_t eofPos = (int64_t) file.tellg(); 562 563 // '\n' + <envvar string> + '\n' after the 64bit version data 564 char ch = file.get(); 565 if (ch == 0x0a) { 566 bool invalidchar = false; 567 while (file.get(ch)) { 568 if (ch == 0x0a) { 569 break; 570 } 571 if (ch < 0x20) { 572 // must be printable ascii 573 invalidchar = true; 574 break; 575 } 576 } 577 if (!invalidchar) { 578 eofPos = (int64_t) file.tellg(); 579 file.seekg(0, ios::end); 580 eofPos = eofPos - (int64_t) file.tellg(); 581 if (eofPos == 0) { 582 alldone = true; 583 } 584 } 585 } 586 if (!alldone) { 587 ostringstream os; 588 os << (-eofPos) << " unprocessed bytes at end"; 589 throw invalid_argument(os.str()); 590 } 591 592 ZONEINFO[id] = info64; 593 594 #else 595 // Check eof-relative pos (there may be a cleaner way to do this) 596 int64_t eofPos = (int64_t) file.tellg(); 597 char buf[32]; 598 file.read(buf, 4); 599 file.seekg(0, ios::end); 600 eofPos = eofPos - (int64_t) file.tellg(); 601 if (eofPos) { 602 // 2006c merged 32 and 64 bit versions in a fat binary 603 // 64 version starts at the end of 32 bit version. 604 // Therefore, if the file is *not* consumed, check 605 // if it is maybe being restarted. 606 if (strncmp(buf, TZ_ICU_MAGIC, 4) != 0) { 607 ostringstream os; 608 os << (-eofPos) << " unprocessed bytes at end"; 609 throw invalid_argument(os.str()); 610 } 611 } 612 ZONEINFO[id] = info; 613 #endif 614 } 615 616 /** 617 * Recursively scan the given directory, calling handleFile() for each 618 * file in the tree. The user should call with the root directory and 619 * a prefix of "". The function will call itself with non-empty 620 * prefix values. 621 */ 622 #ifdef WIN32 623 624 void scandir(string dirname, string prefix="") { 625 HANDLE hList; 626 WIN32_FIND_DATA FileData; 627 628 // Get the first file 629 hList = FindFirstFile((dirname + "\\*").c_str(), &FileData); 630 if (hList == INVALID_HANDLE_VALUE) { 631 cerr << "Error: Invalid directory: " << dirname << endl; 632 exit(1); 633 } 634 for (;;) { 635 string name(FileData.cFileName); 636 string path(dirname + "\\" + name); 637 if (FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { 638 if (name != "." && name != "..") { 639 scandir(path, prefix + name + "/"); 640 } 641 } else { 642 try { 643 string id = prefix + name; 644 handleFile(path, id); 645 } catch (const exception& e) { 646 cerr << "Error: While processing \"" << path << "\", " 647 << e.what() << endl; 648 exit(1); 649 } 650 } 651 652 if (!FindNextFile(hList, &FileData)) { 653 if (GetLastError() == ERROR_NO_MORE_FILES) { 654 break; 655 } // else...? 656 } 657 } 658 FindClose(hList); 659 } 660 661 #else 662 663 void scandir(string dir, string prefix="") { 664 DIR *dp; 665 struct dirent *dir_entry; 666 struct stat stat_info; 667 char pwd[512]; 668 vector<string> subdirs; 669 vector<string> subfiles; 670 671 if ((dp = opendir(dir.c_str())) == NULL) { 672 cerr << "Error: Invalid directory: " << dir << endl; 673 exit(1); 674 } 675 if (!getcwd(pwd, sizeof(pwd))) { 676 cerr << "Error: Directory name too long" << endl; 677 exit(1); 678 } 679 chdir(dir.c_str()); 680 while ((dir_entry = readdir(dp)) != NULL) { 681 string name = dir_entry->d_name; 682 string path = dir + "/" + name; 683 lstat(dir_entry->d_name,&stat_info); 684 if (S_ISDIR(stat_info.st_mode)) { 685 if (name != "." && name != "..") { 686 subdirs.push_back(path); 687 subdirs.push_back(prefix + name + "/"); 688 // scandir(path, prefix + name + "/"); 689 } 690 } else { 691 try { 692 string id = prefix + name; 693 subfiles.push_back(path); 694 subfiles.push_back(id); 695 // handleFile(path, id); 696 } catch (const exception& e) { 697 cerr << "Error: While processing \"" << path << "\", " 698 << e.what() << endl; 699 exit(1); 700 } 701 } 702 } 703 closedir(dp); 704 chdir(pwd); 705 706 for(int32_t i=0;i<(int32_t)subfiles.size();i+=2) { 707 try { 708 handleFile(subfiles[i], subfiles[i+1]); 709 } catch (const exception& e) { 710 cerr << "Error: While processing \"" << subfiles[i] << "\", " 711 << e.what() << endl; 712 exit(1); 713 } 714 } 715 for(int32_t i=0;i<(int32_t)subdirs.size();i+=2) { 716 scandir(subdirs[i], subdirs[i+1]); 717 } 718 } 719 720 #endif 721 722 //-------------------------------------------------------------------- 723 // Final zone and rule info 724 //-------------------------------------------------------------------- 725 726 /** 727 * Read and discard the current line. 728 */ 729 void consumeLine(istream& in) { 730 int32_t c; 731 do { 732 c = in.get(); 733 } while (c != EOF && c != '\n'); 734 } 735 736 enum { 737 DOM = 0, 738 DOWGEQ = 1, 739 DOWLEQ = 2 740 }; 741 742 const char* TIME_MODE[] = {"w", "s", "u"}; 743 744 // Allow 29 days in February because zic outputs February 29 745 // for rules like "last Sunday in February". 746 const int32_t MONTH_LEN[] = {31,29,31,30,31,30,31,31,30,31,30,31}; 747 748 const int32_t HOUR = 3600; 749 750 struct FinalZone { 751 int32_t offset; // raw offset 752 int32_t year; // takes effect for y >= year 753 string ruleid; 754 set<string> aliases; 755 FinalZone(int32_t _offset, int32_t _year, const string& _ruleid) : 756 offset(_offset), year(_year), ruleid(_ruleid) { 757 if (offset <= -16*HOUR || offset >= 16*HOUR) { 758 ostringstream os; 759 os << "Invalid input offset " << offset 760 << " for year " << year 761 << " and rule ID " << ruleid; 762 throw invalid_argument(os.str()); 763 } 764 if (year < 1900 || year >= 2050) { 765 ostringstream os; 766 os << "Invalid input year " << year 767 << " with offset " << offset 768 << " and rule ID " << ruleid; 769 throw invalid_argument(os.str()); 770 } 771 } 772 FinalZone() : offset(-1), year(-1) {} 773 void addLink(const string& alias) { 774 if (aliases.find(alias) != aliases.end()) { 775 ostringstream os; 776 os << "Duplicate alias " << alias; 777 throw invalid_argument(os.str()); 778 } 779 aliases.insert(alias); 780 } 781 }; 782 783 struct FinalRulePart { 784 int32_t mode; 785 int32_t month; 786 int32_t dom; 787 int32_t dow; 788 int32_t time; 789 int32_t offset; // dst offset, usually either 0 or 1:00 790 791 // Isstd and isgmt only have 3 valid states, corresponding to local 792 // wall time, local standard time, and GMT standard time. 793 // Here is how the isstd & isgmt flags are set by zic: 794 //| case 's': /* Standard */ 795 //| rp->r_todisstd = TRUE; 796 //| rp->r_todisgmt = FALSE; 797 //| case 'w': /* Wall */ 798 //| rp->r_todisstd = FALSE; 799 //| rp->r_todisgmt = FALSE; 800 //| case 'g': /* Greenwich */ 801 //| case 'u': /* Universal */ 802 //| case 'z': /* Zulu */ 803 //| rp->r_todisstd = TRUE; 804 //| rp->r_todisgmt = TRUE; 805 bool isstd; 806 bool isgmt; 807 808 bool isset; // used during building; later ignored 809 810 FinalRulePart() : isset(false) {} 811 void set(const string& id, 812 const string& _mode, 813 int32_t _month, 814 int32_t _dom, 815 int32_t _dow, 816 int32_t _time, 817 bool _isstd, 818 bool _isgmt, 819 int32_t _offset) { 820 if (isset) { 821 throw invalid_argument("FinalRulePart set twice"); 822 } 823 isset = true; 824 if (_mode == "DOWLEQ") { 825 mode = DOWLEQ; 826 } else if (_mode == "DOWGEQ") { 827 mode = DOWGEQ; 828 } else if (_mode == "DOM") { 829 mode = DOM; 830 } else { 831 throw invalid_argument("Unrecognized FinalRulePart mode"); 832 } 833 month = _month; 834 dom = _dom; 835 dow = _dow; 836 time = _time; 837 isstd = _isstd; 838 isgmt = _isgmt; 839 offset = _offset; 840 841 ostringstream os; 842 if (month < 0 || month >= 12) { 843 os << "Invalid input month " << month; 844 } 845 if (dom < 1 || dom > MONTH_LEN[month]) { 846 os << "Invalid input day of month " << dom; 847 } 848 if (mode != DOM && (dow < 0 || dow >= 7)) { 849 os << "Invalid input day of week " << dow; 850 } 851 if (offset < 0 || offset > HOUR) { 852 os << "Invalid input offset " << offset; 853 } 854 if (isgmt && !isstd) { 855 os << "Invalid input isgmt && !isstd"; 856 } 857 if (!os.str().empty()) { 858 os << " for rule " 859 << id 860 << _mode 861 << month << dom << dow << time 862 << isstd << isgmt 863 << offset; 864 throw invalid_argument(os.str()); 865 } 866 } 867 868 /** 869 * Return the time mode as an ICU SimpleTimeZone int from 0..2; 870 * see simpletz.h. 871 */ 872 int32_t timemode() const { 873 if (isgmt) { 874 assert(isstd); 875 return 2; // gmt standard 876 } 877 if (isstd) { 878 return 1; // local standard 879 } 880 return 0; // local wall 881 } 882 883 // The SimpleTimeZone encoding method for rules is as follows: 884 // stz_dowim stz_dow 885 // DOM: dom 0 886 // DOWGEQ: dom -(dow+1) 887 // DOWLEQ: -dom -(dow+1) 888 // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2 889 // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2 890 // to encode 7, use stz_dowim=7, stz_dow=0 891 // Note that for this program and for SimpleTimeZone, 0==Jan, 892 // but for this program 0==Sun while for SimpleTimeZone 1==Sun. 893 894 /** 895 * Return a "dowim" param suitable for SimpleTimeZone. 896 */ 897 int32_t stz_dowim() const { 898 return (mode == DOWLEQ) ? -dom : dom; 899 } 900 901 /** 902 * Return a "dow" param suitable for SimpleTimeZone. 903 */ 904 int32_t stz_dow() const { 905 return (mode == DOM) ? 0 : -(dow+1); 906 } 907 }; 908 909 struct FinalRule { 910 FinalRulePart part[2]; 911 912 bool isset() const { 913 return part[0].isset && part[1].isset; 914 } 915 916 void print(ostream& os) const; 917 }; 918 919 map<string,FinalZone> finalZones; 920 map<string,FinalRule> finalRules; 921 922 map<string, set<string> > links; 923 map<string, string> reverseLinks; 924 map<string, string> linkSource; // id => "Olson link" or "ICU alias" 925 926 /** 927 * Predicate used to find FinalRule objects that do not have both 928 * sub-parts set (indicating an error in the input file). 929 */ 930 bool isNotSet(const pair<const string,FinalRule>& p) { 931 return !p.second.isset(); 932 } 933 934 /** 935 * Predicate used to find FinalZone objects that do not map to a known 936 * rule (indicating an error in the input file). 937 */ 938 bool mapsToUnknownRule(const pair<const string,FinalZone>& p) { 939 return finalRules.find(p.second.ruleid) == finalRules.end(); 940 } 941 942 /** 943 * This set is used to make sure each rule in finalRules is used at 944 * least once. First we populate it with all the rules from 945 * finalRules; then we remove all the rules referred to in 946 * finaleZones. 947 */ 948 set<string> ruleIDset; 949 950 void insertRuleID(const pair<string,FinalRule>& p) { 951 ruleIDset.insert(p.first); 952 } 953 954 void eraseRuleID(const pair<string,FinalZone>& p) { 955 ruleIDset.erase(p.second.ruleid); 956 } 957 958 /** 959 * Populate finalZones and finalRules from the given istream. 960 */ 961 void readFinalZonesAndRules(istream& in) { 962 963 for (;;) { 964 string token; 965 in >> token; 966 if (in.eof() || !in) { 967 break; 968 } else if (token == "zone") { 969 // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 7200, year >= 1995, rule Egypt (0) 970 string id, ruleid; 971 int32_t offset, year; 972 in >> id >> offset >> year >> ruleid; 973 consumeLine(in); 974 finalZones[id] = FinalZone(offset, year, ruleid); 975 } else if (token == "rule") { 976 // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northamerica, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, offset 3600 977 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica, line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, offset 0 978 string id, mode; 979 int32_t month, dom, dow, time, offset; 980 bool isstd, isgmt; 981 in >> id >> mode >> month >> dom >> dow >> time >> isstd >> isgmt >> offset; 982 consumeLine(in); 983 FinalRule& fr = finalRules[id]; 984 int32_t p = fr.part[0].isset ? 1 : 0; 985 fr.part[p].set(id, mode, month, dom, dow, time, isstd, isgmt, offset); 986 } else if (token == "link") { 987 string fromid, toid; // fromid == "real" zone, toid == alias 988 in >> fromid >> toid; 989 // DO NOT consumeLine(in); 990 if (finalZones.find(toid) != finalZones.end()) { 991 throw invalid_argument("Bad link: `to' id is a \"real\" zone"); 992 } 993 994 links[fromid].insert(toid); 995 reverseLinks[toid] = fromid; 996 997 linkSource[fromid] = "Olson link"; 998 linkSource[toid] = "Olson link"; 999 } else if (token.length() > 0 && token[0] == '#') { 1000 consumeLine(in); 1001 } else { 1002 throw invalid_argument("Unrecognized keyword"); 1003 } 1004 } 1005 1006 if (!in.eof() && !in) { 1007 throw invalid_argument("Parse failure"); 1008 } 1009 1010 // Perform validity check: Each rule should have data for 2 parts. 1011 if (count_if(finalRules.begin(), finalRules.end(), isNotSet) != 0) { 1012 throw invalid_argument("One or more incomplete rule pairs"); 1013 } 1014 1015 // Perform validity check: Each zone should map to a known rule. 1016 if (count_if(finalZones.begin(), finalZones.end(), mapsToUnknownRule) != 0) { 1017 throw invalid_argument("One or more zones refers to an unknown rule"); 1018 } 1019 1020 // Perform validity check: Each rule should be referred to by a zone. 1021 ruleIDset.clear(); 1022 for_each(finalRules.begin(), finalRules.end(), insertRuleID); 1023 for_each(finalZones.begin(), finalZones.end(), eraseRuleID); 1024 if (ruleIDset.size() != 0) { 1025 throw invalid_argument("Unused rules"); 1026 } 1027 } 1028 1029 //-------------------------------------------------------------------- 1030 // Resource bundle output 1031 //-------------------------------------------------------------------- 1032 1033 // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT 1034 1035 void ZoneInfo::print(ostream& os, const string& id) const { 1036 // Implement compressed format #2: 1037 1038 os << " /* " << id << " */ "; 1039 1040 if (aliasTo >= 0) { 1041 assert(aliases.size() == 0); 1042 os << ":int { " << aliasTo << " } "; // No endl - save room for comment. 1043 return; 1044 } 1045 1046 os << ":array {" << endl; 1047 1048 vector<Transition>::const_iterator trn; 1049 vector<ZoneType>::const_iterator typ; 1050 1051 bool first=true; 1052 os << " :intvector { "; 1053 for (trn = transitions.begin(); trn != transitions.end(); ++trn) { 1054 if (!first) os << ", "; 1055 first = false; 1056 os << trn->time; 1057 } 1058 os << " }" << endl; 1059 1060 first=true; 1061 os << " :intvector { "; 1062 for (typ = types.begin(); typ != types.end(); ++typ) { 1063 if (!first) os << ", "; 1064 first = false; 1065 os << typ->rawoffset << ", " << typ->dstoffset; 1066 } 1067 os << " }" << endl; 1068 1069 os << " :bin { \"" << hex << setfill('0'); 1070 for (trn = transitions.begin(); trn != transitions.end(); ++trn) { 1071 os << setw(2) << trn->type; 1072 } 1073 os << dec << "\" }" << endl; 1074 1075 // Final zone info, if any 1076 if (finalYear != -1) { 1077 os << " \"" << finalRuleID << "\"" << endl; 1078 os << " :intvector { " << finalOffset << ", " 1079 << finalYear << " }" << endl; 1080 } 1081 1082 // Alias list, if any 1083 if (aliases.size() != 0) { 1084 first = true; 1085 os << " :intvector { "; 1086 for (set<int32_t>::const_iterator i=aliases.begin(); i!=aliases.end(); ++i) { 1087 if (!first) os << ", "; 1088 first = false; 1089 os << *i; 1090 } 1091 os << " }" << endl; 1092 } 1093 1094 os << " } "; // no trailing 'endl', so comments can be placed. 1095 } 1096 1097 inline ostream& 1098 operator<<(ostream& os, const ZoneMap& zoneinfo) { 1099 int32_t c = 0; 1100 for (ZoneMapIter it = zoneinfo.begin(); 1101 it != zoneinfo.end(); 1102 ++it) { 1103 if(c) os << ","; 1104 it->second.print(os, it->first); 1105 os << "//Z#" << c++ << endl; 1106 } 1107 return os; 1108 } 1109 1110 // print the string list 1111 ostream& printStringList( ostream& os, const ZoneMap& zoneinfo) { 1112 int32_t n = 0; // count 1113 int32_t col = 0; // column 1114 os << " Names {" << endl 1115 << " "; 1116 for (ZoneMapIter it = zoneinfo.begin(); 1117 it != zoneinfo.end(); 1118 ++it) { 1119 if(n) { 1120 os << ","; 1121 col ++; 1122 } 1123 const string& id = it->first; 1124 os << "\"" << id << "\""; 1125 col += id.length() + 2; 1126 if(col >= 50) { 1127 os << " // " << n << endl 1128 << " "; 1129 col = 0; 1130 } 1131 n++; 1132 } 1133 os << " // " << (n-1) << endl 1134 << " }" << endl; 1135 1136 return os; 1137 } 1138 1139 //-------------------------------------------------------------------- 1140 // main 1141 //-------------------------------------------------------------------- 1142 1143 // Unary predicate for finding transitions after a given time 1144 bool isAfter(const Transition t, int64_t thresh) { 1145 return t.time >= thresh; 1146 } 1147 1148 /** 1149 * A zone type that contains only the raw and dst offset. Used by the 1150 * optimizeTypeList() method. 1151 */ 1152 struct SimplifiedZoneType { 1153 int64_t rawoffset; 1154 int64_t dstoffset; 1155 SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {} 1156 SimplifiedZoneType(const ZoneType& t) : rawoffset(t.rawoffset), 1157 dstoffset(t.dstoffset) {} 1158 bool operator<(const SimplifiedZoneType& t) const { 1159 return rawoffset < t.rawoffset || 1160 (rawoffset == t.rawoffset && 1161 dstoffset < t.dstoffset); 1162 } 1163 }; 1164 1165 /** 1166 * Construct a ZoneType from a SimplifiedZoneType. Note that this 1167 * discards information; the new ZoneType will have meaningless 1168 * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate, 1169 * since ignoring these is how we do optimization (we have no use for 1170 * these in historical transitions). 1171 */ 1172 ZoneType::ZoneType(const SimplifiedZoneType& t) : 1173 rawoffset(t.rawoffset), dstoffset(t.dstoffset), 1174 abbr(-1), isdst(false), isstd(false), isgmt(false) {} 1175 1176 /** 1177 * Optimize the type list to remove excess entries. The type list may 1178 * contain entries that are distinct only in terms of their dst, std, 1179 * or gmt flags. Since we don't care about those flags, we can reduce 1180 * the type list to a set of unique raw/dst offset pairs, and remap 1181 * the type indices in the transition list, which stores, for each 1182 * transition, a transition time and a type index. 1183 */ 1184 void ZoneInfo::optimizeTypeList() { 1185 // Assemble set of unique types; only those in the `transitions' 1186 // list, since there may be unused types in the `types' list 1187 // corresponding to transitions that have been trimmed (during 1188 // merging of final data). 1189 1190 if (aliasTo >= 0) return; // Nothing to do for aliases 1191 1192 // If there are zero transitions and one type, then leave that as-is. 1193 if (transitions.size() == 0) { 1194 if (types.size() != 1) { 1195 cerr << "Error: transition count = 0, type count = " << types.size() << endl; 1196 } 1197 return; 1198 } 1199 1200 set<SimplifiedZoneType> simpleset; 1201 for (vector<Transition>::const_iterator i=transitions.begin(); 1202 i!=transitions.end(); ++i) { 1203 assert(i->type < (int32_t)types.size()); 1204 simpleset.insert(types[i->type]); 1205 } 1206 1207 // Map types to integer indices 1208 map<SimplifiedZoneType,int32_t> simplemap; 1209 int32_t n=0; 1210 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); 1211 i!=simpleset.end(); ++i) { 1212 simplemap[*i] = n++; 1213 } 1214 1215 // Remap transitions 1216 for (vector<Transition>::iterator i=transitions.begin(); 1217 i!=transitions.end(); ++i) { 1218 assert(i->type < (int32_t)types.size()); 1219 ZoneType oldtype = types[i->type]; 1220 SimplifiedZoneType newtype(oldtype); 1221 assert(simplemap.find(newtype) != simplemap.end()); 1222 i->type = simplemap[newtype]; 1223 } 1224 1225 // Replace type list 1226 types.clear(); 1227 copy(simpleset.begin(), simpleset.end(), back_inserter(types)); 1228 } 1229 1230 /** 1231 * Merge final zone data into this zone. 1232 */ 1233 void ZoneInfo::mergeFinalData(const FinalZone& fz) { 1234 int32_t year = fz.year; 1235 int64_t seconds = yearToSeconds(year); 1236 vector<Transition>::iterator it = 1237 find_if(transitions.begin(), transitions.end(), 1238 bind2nd(ptr_fun(isAfter), seconds)); 1239 transitions.erase(it, transitions.end()); 1240 1241 if (finalYear != -1) { 1242 throw invalid_argument("Final zone already merged in"); 1243 } 1244 finalYear = fz.year; 1245 finalOffset = fz.offset; 1246 finalRuleID = fz.ruleid; 1247 } 1248 1249 /** 1250 * Merge the data from the given final zone into the core zone data by 1251 * calling the ZoneInfo member function mergeFinalData. 1252 */ 1253 void mergeOne(const string& zoneid, const FinalZone& fz) { 1254 if (ZONEINFO.find(zoneid) == ZONEINFO.end()) { 1255 throw invalid_argument("Unrecognized final zone ID"); 1256 } 1257 ZONEINFO[zoneid].mergeFinalData(fz); 1258 } 1259 1260 /** 1261 * Visitor function that merges the final zone data into the main zone 1262 * data structures. It calls mergeOne for each final zone and its 1263 * list of aliases. 1264 */ 1265 void mergeFinalZone(const pair<string,FinalZone>& p) { 1266 const string& id = p.first; 1267 const FinalZone& fz = p.second; 1268 1269 mergeOne(id, fz); 1270 } 1271 1272 /** 1273 * Print this rule in resource bundle format to os. ID and enclosing 1274 * braces handled elsewhere. 1275 */ 1276 void FinalRule::print(ostream& os) const { 1277 // First print the rule part that enters DST; then the rule part 1278 // that exits it. 1279 int32_t whichpart = (part[0].offset != 0) ? 0 : 1; 1280 assert(part[whichpart].offset != 0); 1281 assert(part[1-whichpart].offset == 0); 1282 1283 os << " "; 1284 for (int32_t i=0; i<2; ++i) { 1285 const FinalRulePart& p = part[whichpart]; 1286 whichpart = 1-whichpart; 1287 os << p.month << ", " << p.stz_dowim() << ", " << p.stz_dow() << ", " 1288 << p.time << ", " << p.timemode() << ", "; 1289 } 1290 os << part[whichpart].offset << endl; 1291 } 1292 1293 int main(int argc, char *argv[]) { 1294 string rootpath, zonetab, version; 1295 1296 if (argc != 4) { 1297 cout << "Usage: tz2icu <dir> <cmap> <vers>" << endl 1298 << " <dir> path to zoneinfo file tree generated by" << endl 1299 << " ICU-patched version of zic" << endl 1300 << " <cmap> country map, from tzdata archive," << endl 1301 << " typically named \"zone.tab\"" << endl 1302 << " <vers> version string, such as \"2003e\"" << endl; 1303 exit(1); 1304 } else { 1305 rootpath = argv[1]; 1306 zonetab = argv[2]; 1307 version = argv[3]; 1308 } 1309 1310 cout << "Olson data version: " << version << endl; 1311 1312 try { 1313 ifstream finals(ICU_ZONE_FILE); 1314 if (finals) { 1315 readFinalZonesAndRules(finals); 1316 1317 cout << "Finished reading " << finalZones.size() 1318 << " final zones and " << finalRules.size() 1319 << " final rules from " ICU_ZONE_FILE << endl; 1320 } else { 1321 cerr << "Error: Unable to open " ICU_ZONE_FILE << endl; 1322 return 1; 1323 } 1324 } catch (const exception& error) { 1325 cerr << "Error: While reading " ICU_ZONE_FILE ": " << error.what() << endl; 1326 return 1; 1327 } 1328 1329 //############################################################################ 1330 //# Note: We no longer use tz.alias to define alias for legacy ICU time zones. 1331 //# The contents of tz.alias were migrated into zic source format and 1332 //# processed by zic as 'Link'. 1333 //############################################################################ 1334 #if 0 1335 // Read the legacy alias list and process it. Treat the legacy mappings 1336 // like links, but also record them in the "legacy" hash. 1337 try { 1338 ifstream aliases(ICU_TZ_ALIAS); 1339 if (!aliases) { 1340 cerr << "Error: Unable to open " ICU_TZ_ALIAS << endl; 1341 return 1; 1342 } 1343 int32_t n = 0; 1344 string line; 1345 while (getline(aliases, line)) { 1346 string::size_type lb = line.find('#'); 1347 if (lb != string::npos) { 1348 line.resize(lb); // trim comments 1349 } 1350 vector<string> a; 1351 istringstream is(line); 1352 copy(istream_iterator<string>(is),istream_iterator<string>(), 1353 back_inserter(a)); 1354 if (a.size() == 0) continue; // blank line 1355 if (a.size() != 2) { 1356 cerr << "Error: Can't parse \"" << line << "\" in " 1357 ICU_TZ_ALIAS << endl; 1358 exit(1); 1359 } 1360 ++n; 1361 1362 string alias(a[0]), olson(a[1]); 1363 if (links.find(alias) != links.end()) { 1364 cerr << "Error: Alias \"" << alias 1365 << "\" is an Olson zone in " 1366 ICU_TZ_ALIAS << endl; 1367 return 1; 1368 } 1369 if (reverseLinks.find(alias) != reverseLinks.end()) { 1370 cerr << "Error: Alias \"" << alias 1371 << "\" is an Olson link to \"" << reverseLinks[olson] 1372 << "\" in " << ICU_TZ_ALIAS << endl; 1373 return 1; 1374 } 1375 1376 // Record source for error reporting 1377 if (linkSource.find(olson) == linkSource.end()) { 1378 linkSource[olson] = "ICU alias"; 1379 } 1380 assert(linkSource.find(alias) == linkSource.end()); 1381 linkSource[alias] = "ICU alias"; 1382 1383 links[olson].insert(alias); 1384 reverseLinks[alias] = olson; 1385 } 1386 cout << "Finished reading " << n 1387 << " aliases from " ICU_TZ_ALIAS << endl; 1388 } catch (const exception& error) { 1389 cerr << "Error: While reading " ICU_TZ_ALIAS ": " << error.what() << endl; 1390 return 1; 1391 } 1392 #endif 1393 try { 1394 // Recursively scan all files below the given path, accumulating 1395 // their data into ZONEINFO. All files must be TZif files. Any 1396 // failure along the way will result in a call to exit(1). 1397 scandir(rootpath); 1398 } catch (const exception& error) { 1399 cerr << "Error: While scanning " << rootpath << ": " << error.what() << endl; 1400 return 1; 1401 } 1402 1403 cout << "Finished reading " << ZONEINFO.size() << " zoneinfo files [" 1404 << (ZONEINFO.begin())->first << ".." 1405 << (--ZONEINFO.end())->first << "]" << endl; 1406 1407 try { 1408 for_each(finalZones.begin(), finalZones.end(), mergeFinalZone); 1409 } catch (const exception& error) { 1410 cerr << "Error: While merging final zone data: " << error.what() << endl; 1411 return 1; 1412 } 1413 1414 // Process links (including ICU aliases). For each link set we have 1415 // a canonical ID (e.g., America/Los_Angeles) and a set of one or more 1416 // aliases (e.g., PST, PST8PDT, ...). 1417 1418 // 1. Add all aliases as zone objects in ZONEINFO 1419 for (map<string,set<string> >::const_iterator i = links.begin(); 1420 i!=links.end(); ++i) { 1421 const string& olson = i->first; 1422 const set<string>& aliases = i->second; 1423 if (ZONEINFO.find(olson) == ZONEINFO.end()) { 1424 cerr << "Error: Invalid " << linkSource[olson] << " to non-existent \"" 1425 << olson << "\"" << endl; 1426 return 1; 1427 } 1428 for (set<string>::const_iterator j=aliases.begin(); 1429 j!=aliases.end(); ++j) { 1430 ZONEINFO[*j] = ZoneInfo(); 1431 } 1432 } 1433 1434 // 2. Create a mapping from zones to index numbers 0..n-1. 1435 map<string,int32_t> zoneIDs; 1436 vector<string> zoneIDlist; 1437 int32_t z=0; 1438 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1439 zoneIDs[i->first] = z++; 1440 zoneIDlist.push_back(i->first); 1441 } 1442 assert(z == (int32_t) ZONEINFO.size()); 1443 1444 // 3. Merge aliases. Sometimes aliases link to other aliases; we 1445 // resolve these into simplest possible sets. 1446 map<string,set<string> > links2; 1447 map<string,string> reverse2; 1448 for (map<string,set<string> >::const_iterator i = links.begin(); 1449 i!=links.end(); ++i) { 1450 string olson = i->first; 1451 while (reverseLinks.find(olson) != reverseLinks.end()) { 1452 olson = reverseLinks[olson]; 1453 } 1454 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) { 1455 links2[olson].insert(*j); 1456 reverse2[*j] = olson; 1457 } 1458 } 1459 links = links2; 1460 reverseLinks = reverse2; 1461 1462 if (false) { // Debugging: Emit link map 1463 for (map<string,set<string> >::const_iterator i = links.begin(); 1464 i!=links.end(); ++i) { 1465 cout << i->first << ": "; 1466 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) { 1467 cout << *j << ", "; 1468 } 1469 cout << endl; 1470 } 1471 } 1472 1473 // 4. Update aliases 1474 for (map<string,set<string> >::const_iterator i = links.begin(); 1475 i!=links.end(); ++i) { 1476 const string& olson = i->first; 1477 const set<string>& aliases = i->second; 1478 ZONEINFO[olson].clearAliases(); 1479 ZONEINFO[olson].addAlias(zoneIDs[olson]); 1480 for (set<string>::const_iterator j=aliases.begin(); 1481 j!=aliases.end(); ++j) { 1482 assert(zoneIDs.find(olson) != zoneIDs.end()); 1483 assert(zoneIDs.find(*j) != zoneIDs.end()); 1484 assert(ZONEINFO.find(*j) != ZONEINFO.end()); 1485 ZONEINFO[*j].setAliasTo(zoneIDs[olson]); 1486 ZONEINFO[olson].addAlias(zoneIDs[*j]); 1487 } 1488 } 1489 1490 // Once merging of final data is complete, we can optimize the type list 1491 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1492 i->second.optimizeTypeList(); 1493 } 1494 1495 // Create the country map 1496 map<string, set<string> > countryMap; // country -> set of zones 1497 map<string, string> reverseCountryMap; // zone -> country 1498 try { 1499 ifstream f(zonetab.c_str()); 1500 if (!f) { 1501 cerr << "Error: Unable to open " << zonetab << endl; 1502 return 1; 1503 } 1504 int32_t n = 0; 1505 string line; 1506 while (getline(f, line)) { 1507 string::size_type lb = line.find('#'); 1508 if (lb != string::npos) { 1509 line.resize(lb); // trim comments 1510 } 1511 string country, coord, zone; 1512 istringstream is(line); 1513 is >> country >> coord >> zone; 1514 if (country.size() == 0) continue; 1515 if (country.size() != 2 || zone.size() < 1) { 1516 cerr << "Error: Can't parse " << line << " in " << zonetab << endl; 1517 return 1; 1518 } 1519 if (ZONEINFO.find(zone) == ZONEINFO.end()) { 1520 cerr << "Error: Country maps to invalid zone " << zone 1521 << " in " << zonetab << endl; 1522 return 1; 1523 } 1524 countryMap[country].insert(zone); 1525 reverseCountryMap[zone] = country; 1526 //cerr << (n+1) << ": " << country << " <=> " << zone << endl; 1527 ++n; 1528 } 1529 cout << "Finished reading " << n 1530 << " country entries from " << zonetab << endl; 1531 } catch (const exception& error) { 1532 cerr << "Error: While reading " << zonetab << ": " << error.what() << endl; 1533 return 1; 1534 } 1535 1536 // Merge ICU aliases into country map. Don't merge any alias 1537 // that already has a country map, since that doesn't make sense. 1538 // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we 1539 // should cross-map the countries between these two zones. 1540 for (map<string,set<string> >::const_iterator i = links.begin(); 1541 i!=links.end(); ++i) { 1542 const string& olson(i->first); 1543 if (reverseCountryMap.find(olson) == reverseCountryMap.end()) { 1544 continue; 1545 } 1546 string c = reverseCountryMap[olson]; 1547 const set<string>& aliases(i->second); 1548 for (set<string>::const_iterator j=aliases.begin(); 1549 j != aliases.end(); ++j) { 1550 if (reverseCountryMap.find(*j) == reverseCountryMap.end()) { 1551 countryMap[c].insert(*j); 1552 reverseCountryMap[*j] = c; 1553 //cerr << "Aliased country: " << c << " <=> " << *j << endl; 1554 } 1555 } 1556 } 1557 1558 // Create a pseudo-country containing all zones belonging to no country 1559 set<string> nocountry; 1560 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1561 if (reverseCountryMap.find(i->first) == reverseCountryMap.end()) { 1562 nocountry.insert(i->first); 1563 } 1564 } 1565 countryMap[""] = nocountry; 1566 1567 // Get local time & year for below 1568 time_t sec; 1569 time(&sec); 1570 struct tm* now = localtime(&sec); 1571 int32_t thisYear = now->tm_year + 1900; 1572 1573 // Write out a resource-bundle source file containing data for 1574 // all zones. 1575 ofstream file(ICU_TZ_RESOURCE ".txt"); 1576 if (file) { 1577 file << "//---------------------------------------------------------" << endl 1578 << "// Copyright (C) 2003"; 1579 if (thisYear > 2003) { 1580 file << "-" << thisYear; 1581 } 1582 file << ", International Business Machines" << endl 1583 << "// Corporation and others. All Rights Reserved." << endl 1584 << "//---------------------------------------------------------" << endl 1585 << "// Build tool: tz2icu" << endl 1586 << "// Build date: " << asctime(now) /* << endl -- asctime emits CR */ 1587 << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl 1588 << "// Olson version: " << version << endl 1589 << "// ICU version: " << U_ICU_VERSION << endl 1590 << "//---------------------------------------------------------" << endl 1591 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl 1592 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl 1593 << "//---------------------------------------------------------" << endl 1594 << endl 1595 << ICU_TZ_RESOURCE ":table(nofallback) {" << endl 1596 << " TZVersion { \"" << version << "\" }" << endl 1597 << " Zones:array { " << endl 1598 << ZONEINFO // Zones (the actual data) 1599 << " }" << endl; 1600 1601 // Names correspond to the Zones list, used for binary searching. 1602 printStringList ( file, ZONEINFO ); // print the Names list 1603 1604 // Final Rules are used if requested by the zone 1605 file << " Rules { " << endl; 1606 // Emit final rules 1607 int32_t frc = 0; 1608 for(map<string,FinalRule>::iterator i=finalRules.begin(); 1609 i!=finalRules.end(); ++i) { 1610 const string& id = i->first; 1611 const FinalRule& r = i->second; 1612 file << " " << id << ":intvector {" << endl; 1613 r.print(file); 1614 file << " } //_#" << frc++ << endl; 1615 } 1616 file << " }" << endl; 1617 1618 // Emit country (region) map. Emitting the string zone IDs results 1619 // in a 188 kb binary resource; emitting the zone index numbers 1620 // trims this to 171 kb. More work for the runtime code, but 1621 // a smaller data footprint. 1622 file << " Regions { " << endl; 1623 int32_t rc = 0; 1624 for (map<string, set<string> >::const_iterator i=countryMap.begin(); 1625 i != countryMap.end(); ++i) { 1626 string country = i->first; 1627 const set<string>& zones(i->second); 1628 file << " "; 1629 if(country[0]==0) { 1630 file << "Default"; 1631 } 1632 file << country << ":intvector { "; 1633 bool first = true; 1634 for (set<string>::const_iterator j=zones.begin(); 1635 j != zones.end(); ++j) { 1636 if (!first) file << ", "; 1637 first = false; 1638 if (zoneIDs.find(*j) == zoneIDs.end()) { 1639 cerr << "Error: Nonexistent zone in country map: " << *j << endl; 1640 return 1; 1641 } 1642 file << zoneIDs[*j]; // emit the zone's index number 1643 } 1644 file << " } //R#" << rc++ << endl; 1645 } 1646 file << " }" << endl; 1647 1648 file << "}" << endl; 1649 } 1650 1651 file.close(); 1652 1653 if (file) { // recheck error bit 1654 cout << "Finished writing " ICU_TZ_RESOURCE ".txt" << endl; 1655 } else { 1656 cerr << "Error: Unable to open/write to " ICU_TZ_RESOURCE ".txt" << endl; 1657 return 1; 1658 } 1659 1660 #define ICU4J_TZ_CLASS "ZoneMetaData" 1661 1662 // Write out a Java source file containing only a few pieces of 1663 // meta-data missing from the core JDK: the equivalency lists and 1664 // the country map. 1665 ofstream java(ICU4J_TZ_CLASS ".java"); 1666 if (java) { 1667 java << "//---------------------------------------------------------" << endl 1668 << "// Copyright (C) 2003"; 1669 if (thisYear > 2003) { 1670 java << "-" << thisYear; 1671 } 1672 java << ", International Business Machines" << endl 1673 << "// Corporation and others. All Rights Reserved." << endl 1674 << "//---------------------------------------------------------" << endl 1675 << "// Build tool: tz2icu" << endl 1676 << "// Build date: " << asctime(now) /* << endl -- asctime emits CR */ 1677 << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl 1678 << "// Olson version: " << version << endl 1679 << "// ICU version: " << U_ICU_VERSION << endl 1680 << "//---------------------------------------------------------" << endl 1681 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl 1682 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl 1683 << "//---------------------------------------------------------" << endl 1684 << endl 1685 << "package com.ibm.icu.impl;" << endl 1686 << endl 1687 << "public final class " ICU4J_TZ_CLASS " {" << endl; 1688 1689 // Emit equivalency lists 1690 bool first1 = true; 1691 java << " public static final String VERSION = \"" + version + "\";" << endl; 1692 java << " public static final String[][] EQUIV = {" << endl; 1693 for (ZoneMap::const_iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) { 1694 if (i->second.isAlias() || i->second.getAliases().size() == 0) { 1695 continue; 1696 } 1697 if (!first1) java << "," << endl; 1698 first1 = false; 1699 // The ID of this zone (the canonical zone, to which the 1700 // aliases point) will be sorted into the list, so it 1701 // won't be at position 0. If we want to know which is 1702 // the canonical zone, we should move it to position 0. 1703 java << " { "; 1704 bool first2 = true; 1705 const set<int32_t>& s = i->second.getAliases(); 1706 for (set<int32_t>::const_iterator j=s.begin(); j!=s.end(); ++j) { 1707 if (!first2) java << ", "; 1708 java << '"' << zoneIDlist[*j] << '"'; 1709 first2 = false; 1710 } 1711 java << " }"; 1712 } 1713 java << endl 1714 << " };" << endl; 1715 1716 // Emit country map. 1717 first1 = true; 1718 java << " public static final String[][] COUNTRY = {" << endl; 1719 for (map<string, set<string> >::const_iterator i=countryMap.begin(); 1720 i != countryMap.end(); ++i) { 1721 if (!first1) java << "," << endl; 1722 first1 = false; 1723 string country = i->first; 1724 const set<string>& zones(i->second); 1725 java << " { \"" << country << '"'; 1726 for (set<string>::const_iterator j=zones.begin(); 1727 j != zones.end(); ++j) { 1728 java << ", \"" << *j << '"'; 1729 } 1730 java << " }"; 1731 } 1732 java << endl 1733 << " };" << endl; 1734 1735 java << "}" << endl; 1736 } 1737 1738 java.close(); 1739 1740 if (java) { // recheck error bit 1741 cout << "Finished writing " ICU4J_TZ_CLASS ".java" << endl; 1742 } else { 1743 cerr << "Error: Unable to open/write to " ICU4J_TZ_CLASS ".java" << endl; 1744 return 1; 1745 } 1746 1747 return 0; 1748 } 1749 1750 //eof 1751