1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * dayperiodrules.cpp 9 * 10 * created on: 2016-01-20 11 * created by: kazede 12 */ 13 14 #include "dayperiodrules.h" 15 16 #include "unicode/ures.h" 17 #include "charstr.h" 18 #include "cstring.h" 19 #include "ucln_in.h" 20 #include "uhash.h" 21 #include "umutex.h" 22 #include "uresimp.h" 23 24 25 U_NAMESPACE_BEGIN 26 27 namespace { 28 29 struct DayPeriodRulesData : public UMemory { 30 DayPeriodRulesData() : localeToRuleSetNumMap(NULL), rules(NULL), maxRuleSetNum(0) {} 31 32 UHashtable *localeToRuleSetNumMap; 33 DayPeriodRules *rules; 34 int32_t maxRuleSetNum; 35 } *data = NULL; 36 37 enum CutoffType { 38 CUTOFF_TYPE_UNKNOWN = -1, 39 CUTOFF_TYPE_BEFORE, 40 CUTOFF_TYPE_AFTER, // TODO: AFTER is deprecated in CLDR 29. Remove. 41 CUTOFF_TYPE_FROM, 42 CUTOFF_TYPE_AT 43 }; 44 45 } // namespace 46 47 struct DayPeriodRulesDataSink : public ResourceSink { 48 DayPeriodRulesDataSink() { 49 for (int32_t i = 0; i < UPRV_LENGTHOF(cutoffs); ++i) { cutoffs[i] = 0; } 50 } 51 virtual ~DayPeriodRulesDataSink(); 52 53 virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) { 54 ResourceTable dayPeriodData = value.getTable(errorCode); 55 if (U_FAILURE(errorCode)) { return; } 56 57 for (int32_t i = 0; dayPeriodData.getKeyAndValue(i, key, value); ++i) { 58 if (uprv_strcmp(key, "locales") == 0) { 59 ResourceTable locales = value.getTable(errorCode); 60 if (U_FAILURE(errorCode)) { return; } 61 62 for (int32_t j = 0; locales.getKeyAndValue(j, key, value); ++j) { 63 UnicodeString setNum_str = value.getUnicodeString(errorCode); 64 int32_t setNum = parseSetNum(setNum_str, errorCode); 65 uhash_puti(data->localeToRuleSetNumMap, const_cast<char *>(key), setNum, &errorCode); 66 } 67 } else if (uprv_strcmp(key, "rules") == 0) { 68 // Allocate one more than needed to skip [0]. See comment in parseSetNum(). 69 data->rules = new DayPeriodRules[data->maxRuleSetNum + 1]; 70 if (data->rules == NULL) { 71 errorCode = U_MEMORY_ALLOCATION_ERROR; 72 return; 73 } 74 ResourceTable rules = value.getTable(errorCode); 75 processRules(rules, key, value, errorCode); 76 if (U_FAILURE(errorCode)) { return; } 77 } 78 } 79 } 80 81 void processRules(const ResourceTable &rules, const char *key, 82 ResourceValue &value, UErrorCode &errorCode) { 83 if (U_FAILURE(errorCode)) { return; } 84 85 for (int32_t i = 0; rules.getKeyAndValue(i, key, value); ++i) { 86 ruleSetNum = parseSetNum(key, errorCode); 87 ResourceTable ruleSet = value.getTable(errorCode); 88 if (U_FAILURE(errorCode)) { return; } 89 90 for (int32_t j = 0; ruleSet.getKeyAndValue(j, key, value); ++j) { 91 period = DayPeriodRules::getDayPeriodFromString(key); 92 if (period == DayPeriodRules::DAYPERIOD_UNKNOWN) { 93 errorCode = U_INVALID_FORMAT_ERROR; 94 return; 95 } 96 ResourceTable periodDefinition = value.getTable(errorCode); 97 if (U_FAILURE(errorCode)) { return; } 98 99 for (int32_t k = 0; periodDefinition.getKeyAndValue(k, key, value); ++k) { 100 if (value.getType() == URES_STRING) { 101 // Key-value pairs (e.g. before{6:00}). 102 CutoffType type = getCutoffTypeFromString(key); 103 addCutoff(type, value.getUnicodeString(errorCode), errorCode); 104 if (U_FAILURE(errorCode)) { return; } 105 } else { 106 // Arrays (e.g. before{6:00, 24:00}). 107 cutoffType = getCutoffTypeFromString(key); 108 ResourceArray cutoffArray = value.getArray(errorCode); 109 if (U_FAILURE(errorCode)) { return; } 110 111 int32_t length = cutoffArray.getSize(); 112 for (int32_t l = 0; l < length; ++l) { 113 cutoffArray.getValue(l, value); 114 addCutoff(cutoffType, value.getUnicodeString(errorCode), errorCode); 115 if (U_FAILURE(errorCode)) { return; } 116 } 117 } 118 } 119 setDayPeriodForHoursFromCutoffs(errorCode); 120 for (int32_t k = 0; k < UPRV_LENGTHOF(cutoffs); ++k) { 121 cutoffs[k] = 0; 122 } 123 } 124 125 if (!data->rules[ruleSetNum].allHoursAreSet()) { 126 errorCode = U_INVALID_FORMAT_ERROR; 127 return; 128 } 129 } 130 } 131 132 // Members. 133 int32_t cutoffs[25]; // [0] thru [24]: 24 is allowed in "before 24". 134 135 // "Path" to data. 136 int32_t ruleSetNum; 137 DayPeriodRules::DayPeriod period; 138 CutoffType cutoffType; 139 140 // Helpers. 141 static int32_t parseSetNum(const UnicodeString &setNumStr, UErrorCode &errorCode) { 142 CharString cs; 143 cs.appendInvariantChars(setNumStr, errorCode); 144 return parseSetNum(cs.data(), errorCode); 145 } 146 147 static int32_t parseSetNum(const char *setNumStr, UErrorCode &errorCode) { 148 if (U_FAILURE(errorCode)) { return -1; } 149 150 if (uprv_strncmp(setNumStr, "set", 3) != 0) { 151 errorCode = U_INVALID_FORMAT_ERROR; 152 return -1; 153 } 154 155 int32_t i = 3; 156 int32_t setNum = 0; 157 while (setNumStr[i] != 0) { 158 int32_t digit = setNumStr[i] - '0'; 159 if (digit < 0 || 9 < digit) { 160 errorCode = U_INVALID_FORMAT_ERROR; 161 return -1; 162 } 163 setNum = 10 * setNum + digit; 164 ++i; 165 } 166 167 // Rule set number must not be zero. (0 is used to indicate "not found" by hashmap.) 168 // Currently ICU data conveniently starts numbering rule sets from 1. 169 if (setNum == 0) { 170 errorCode = U_INVALID_FORMAT_ERROR; 171 return -1; 172 } else { 173 return setNum; 174 } 175 } 176 177 void addCutoff(CutoffType type, const UnicodeString &hour_str, UErrorCode &errorCode) { 178 if (U_FAILURE(errorCode)) { return; } 179 180 if (type == CUTOFF_TYPE_UNKNOWN) { 181 errorCode = U_INVALID_FORMAT_ERROR; 182 return; 183 } 184 185 int32_t hour = parseHour(hour_str, errorCode); 186 if (U_FAILURE(errorCode)) { return; } 187 188 cutoffs[hour] |= 1 << type; 189 } 190 191 // Translate the cutoffs[] array to day period rules. 192 void setDayPeriodForHoursFromCutoffs(UErrorCode &errorCode) { 193 DayPeriodRules &rule = data->rules[ruleSetNum]; 194 195 for (int32_t startHour = 0; startHour <= 24; ++startHour) { 196 // AT cutoffs must be either midnight or noon. 197 if (cutoffs[startHour] & (1 << CUTOFF_TYPE_AT)) { 198 if (startHour == 0 && period == DayPeriodRules::DAYPERIOD_MIDNIGHT) { 199 rule.fHasMidnight = TRUE; 200 } else if (startHour == 12 && period == DayPeriodRules::DAYPERIOD_NOON) { 201 rule.fHasNoon = TRUE; 202 } else { 203 errorCode = U_INVALID_FORMAT_ERROR; // Bad data. 204 return; 205 } 206 } 207 208 // FROM/AFTER and BEFORE must come in a pair. 209 if (cutoffs[startHour] & (1 << CUTOFF_TYPE_FROM) || 210 cutoffs[startHour] & (1 << CUTOFF_TYPE_AFTER)) { 211 for (int32_t hour = startHour + 1;; ++hour) { 212 if (hour == startHour) { 213 // We've gone around the array once and can't find a BEFORE. 214 errorCode = U_INVALID_FORMAT_ERROR; 215 return; 216 } 217 if (hour == 25) { hour = 0; } 218 if (cutoffs[hour] & (1 << CUTOFF_TYPE_BEFORE)) { 219 rule.add(startHour, hour, period); 220 break; 221 } 222 } 223 } 224 } 225 } 226 227 // Translate "before" to CUTOFF_TYPE_BEFORE, for example. 228 static CutoffType getCutoffTypeFromString(const char *type_str) { 229 if (uprv_strcmp(type_str, "from") == 0) { 230 return CUTOFF_TYPE_FROM; 231 } else if (uprv_strcmp(type_str, "before") == 0) { 232 return CUTOFF_TYPE_BEFORE; 233 } else if (uprv_strcmp(type_str, "after") == 0) { 234 return CUTOFF_TYPE_AFTER; 235 } else if (uprv_strcmp(type_str, "at") == 0) { 236 return CUTOFF_TYPE_AT; 237 } else { 238 return CUTOFF_TYPE_UNKNOWN; 239 } 240 } 241 242 // Gets the numerical value of the hour from the Unicode string. 243 static int32_t parseHour(const UnicodeString &time, UErrorCode &errorCode) { 244 if (U_FAILURE(errorCode)) { 245 return 0; 246 } 247 248 int32_t hourLimit = time.length() - 3; 249 // `time` must look like "x:00" or "xx:00". 250 // If length is wrong or `time` doesn't end with ":00", error out. 251 if ((hourLimit != 1 && hourLimit != 2) || 252 time[hourLimit] != 0x3A || time[hourLimit + 1] != 0x30 || 253 time[hourLimit + 2] != 0x30) { 254 errorCode = U_INVALID_FORMAT_ERROR; 255 return 0; 256 } 257 258 // If `time` doesn't begin with a number in [0, 24], error out. 259 // Note: "24:00" is possible in "before 24:00". 260 int32_t hour = time[0] - 0x30; 261 if (hour < 0 || 9 < hour) { 262 errorCode = U_INVALID_FORMAT_ERROR; 263 return 0; 264 } 265 if (hourLimit == 2) { 266 int32_t hourDigit2 = time[1] - 0x30; 267 if (hourDigit2 < 0 || 9 < hourDigit2) { 268 errorCode = U_INVALID_FORMAT_ERROR; 269 return 0; 270 } 271 hour = hour * 10 + hourDigit2; 272 if (hour > 24) { 273 errorCode = U_INVALID_FORMAT_ERROR; 274 return 0; 275 } 276 } 277 278 return hour; 279 } 280 }; // struct DayPeriodRulesDataSink 281 282 struct DayPeriodRulesCountSink : public ResourceSink { 283 virtual ~DayPeriodRulesCountSink(); 284 285 virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) { 286 ResourceTable rules = value.getTable(errorCode); 287 if (U_FAILURE(errorCode)) { return; } 288 289 for (int32_t i = 0; rules.getKeyAndValue(i, key, value); ++i) { 290 int32_t setNum = DayPeriodRulesDataSink::parseSetNum(key, errorCode); 291 if (setNum > data->maxRuleSetNum) { 292 data->maxRuleSetNum = setNum; 293 } 294 } 295 } 296 }; 297 298 // Out-of-line virtual destructors. 299 DayPeriodRulesDataSink::~DayPeriodRulesDataSink() {} 300 DayPeriodRulesCountSink::~DayPeriodRulesCountSink() {} 301 302 namespace { 303 304 UInitOnce initOnce = U_INITONCE_INITIALIZER; 305 306 U_CFUNC UBool U_CALLCONV dayPeriodRulesCleanup() { 307 delete[] data->rules; 308 uhash_close(data->localeToRuleSetNumMap); 309 delete data; 310 data = NULL; 311 return TRUE; 312 } 313 314 } // namespace 315 316 void U_CALLCONV DayPeriodRules::load(UErrorCode &errorCode) { 317 if (U_FAILURE(errorCode)) { 318 return; 319 } 320 321 data = new DayPeriodRulesData(); 322 data->localeToRuleSetNumMap = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 323 LocalUResourceBundlePointer rb_dayPeriods(ures_openDirect(NULL, "dayPeriods", &errorCode)); 324 325 // Get the largest rule set number (so we allocate enough objects). 326 DayPeriodRulesCountSink countSink; 327 ures_getAllItemsWithFallback(rb_dayPeriods.getAlias(), "rules", countSink, errorCode); 328 329 // Populate rules. 330 DayPeriodRulesDataSink sink; 331 ures_getAllItemsWithFallback(rb_dayPeriods.getAlias(), "", sink, errorCode); 332 333 ucln_i18n_registerCleanup(UCLN_I18N_DAYPERIODRULES, dayPeriodRulesCleanup); 334 } 335 336 const DayPeriodRules *DayPeriodRules::getInstance(const Locale &locale, UErrorCode &errorCode) { 337 umtx_initOnce(initOnce, DayPeriodRules::load, errorCode); 338 339 // If the entire day period rules data doesn't conform to spec (even if the part we want 340 // does), return NULL. 341 if(U_FAILURE(errorCode)) { return NULL; } 342 343 const char *localeCode = locale.getBaseName(); 344 char name[ULOC_FULLNAME_CAPACITY]; 345 char parentName[ULOC_FULLNAME_CAPACITY]; 346 347 if (uprv_strlen(localeCode) < ULOC_FULLNAME_CAPACITY) { 348 uprv_strcpy(name, localeCode); 349 350 // Treat empty string as root. 351 if (*name == '\0') { 352 uprv_strcpy(name, "root"); 353 } 354 } else { 355 errorCode = U_BUFFER_OVERFLOW_ERROR; 356 return NULL; 357 } 358 359 int32_t ruleSetNum = 0; // NB there is no rule set 0 and 0 is returned upon lookup failure. 360 while (*name != '\0') { 361 ruleSetNum = uhash_geti(data->localeToRuleSetNumMap, name); 362 if (ruleSetNum == 0) { 363 // name and parentName can't be the same pointer, so fill in parent then copy to child. 364 uloc_getParent(name, parentName, ULOC_FULLNAME_CAPACITY, &errorCode); 365 if (*parentName == '\0') { 366 // Saves a lookup in the hash table. 367 break; 368 } 369 uprv_strcpy(name, parentName); 370 } else { 371 break; 372 } 373 } 374 375 if (ruleSetNum <= 0 || data->rules[ruleSetNum].getDayPeriodForHour(0) == DAYPERIOD_UNKNOWN) { 376 // If day period for hour 0 is UNKNOWN then day period for all hours are UNKNOWN. 377 // Data doesn't exist even with fallback. 378 return NULL; 379 } else { 380 return &data->rules[ruleSetNum]; 381 } 382 } 383 384 DayPeriodRules::DayPeriodRules() : fHasMidnight(FALSE), fHasNoon(FALSE) { 385 for (int32_t i = 0; i < 24; ++i) { 386 fDayPeriodForHour[i] = DayPeriodRules::DAYPERIOD_UNKNOWN; 387 } 388 } 389 390 double DayPeriodRules::getMidPointForDayPeriod( 391 DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const { 392 if (U_FAILURE(errorCode)) { return -1; } 393 394 int32_t startHour = getStartHourForDayPeriod(dayPeriod, errorCode); 395 int32_t endHour = getEndHourForDayPeriod(dayPeriod, errorCode); 396 // Can't obtain startHour or endHour; bail out. 397 if (U_FAILURE(errorCode)) { return -1; } 398 399 double midPoint = (startHour + endHour) / 2.0; 400 401 if (startHour > endHour) { 402 // dayPeriod wraps around midnight. Shift midPoint by 12 hours, in the direction that 403 // lands it in [0, 24). 404 midPoint += 12; 405 if (midPoint >= 24) { 406 midPoint -= 24; 407 } 408 } 409 410 return midPoint; 411 } 412 413 int32_t DayPeriodRules::getStartHourForDayPeriod( 414 DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const { 415 if (U_FAILURE(errorCode)) { return -1; } 416 417 if (dayPeriod == DAYPERIOD_MIDNIGHT) { return 0; } 418 if (dayPeriod == DAYPERIOD_NOON) { return 12; } 419 420 if (fDayPeriodForHour[0] == dayPeriod && fDayPeriodForHour[23] == dayPeriod) { 421 // dayPeriod wraps around midnight. Start hour is later than end hour. 422 for (int32_t i = 22; i >= 1; --i) { 423 if (fDayPeriodForHour[i] != dayPeriod) { 424 return (i + 1); 425 } 426 } 427 } else { 428 for (int32_t i = 0; i <= 23; ++i) { 429 if (fDayPeriodForHour[i] == dayPeriod) { 430 return i; 431 } 432 } 433 } 434 435 // dayPeriod doesn't exist in rule set; set error and exit. 436 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 437 return -1; 438 } 439 440 int32_t DayPeriodRules::getEndHourForDayPeriod( 441 DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const { 442 if (U_FAILURE(errorCode)) { return -1; } 443 444 if (dayPeriod == DAYPERIOD_MIDNIGHT) { return 0; } 445 if (dayPeriod == DAYPERIOD_NOON) { return 12; } 446 447 if (fDayPeriodForHour[0] == dayPeriod && fDayPeriodForHour[23] == dayPeriod) { 448 // dayPeriod wraps around midnight. End hour is before start hour. 449 for (int32_t i = 1; i <= 22; ++i) { 450 if (fDayPeriodForHour[i] != dayPeriod) { 451 // i o'clock is when a new period starts, therefore when the old period ends. 452 return i; 453 } 454 } 455 } else { 456 for (int32_t i = 23; i >= 0; --i) { 457 if (fDayPeriodForHour[i] == dayPeriod) { 458 return (i + 1); 459 } 460 } 461 } 462 463 // dayPeriod doesn't exist in rule set; set error and exit. 464 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 465 return -1; 466 } 467 468 DayPeriodRules::DayPeriod DayPeriodRules::getDayPeriodFromString(const char *type_str) { 469 if (uprv_strcmp(type_str, "midnight") == 0) { 470 return DAYPERIOD_MIDNIGHT; 471 } else if (uprv_strcmp(type_str, "noon") == 0) { 472 return DAYPERIOD_NOON; 473 } else if (uprv_strcmp(type_str, "morning1") == 0) { 474 return DAYPERIOD_MORNING1; 475 } else if (uprv_strcmp(type_str, "afternoon1") == 0) { 476 return DAYPERIOD_AFTERNOON1; 477 } else if (uprv_strcmp(type_str, "evening1") == 0) { 478 return DAYPERIOD_EVENING1; 479 } else if (uprv_strcmp(type_str, "night1") == 0) { 480 return DAYPERIOD_NIGHT1; 481 } else if (uprv_strcmp(type_str, "morning2") == 0) { 482 return DAYPERIOD_MORNING2; 483 } else if (uprv_strcmp(type_str, "afternoon2") == 0) { 484 return DAYPERIOD_AFTERNOON2; 485 } else if (uprv_strcmp(type_str, "evening2") == 0) { 486 return DAYPERIOD_EVENING2; 487 } else if (uprv_strcmp(type_str, "night2") == 0) { 488 return DAYPERIOD_NIGHT2; 489 } else if (uprv_strcmp(type_str, "am") == 0) { 490 return DAYPERIOD_AM; 491 } else if (uprv_strcmp(type_str, "pm") == 0) { 492 return DAYPERIOD_PM; 493 } else { 494 return DAYPERIOD_UNKNOWN; 495 } 496 } 497 498 void DayPeriodRules::add(int32_t startHour, int32_t limitHour, DayPeriod period) { 499 for (int32_t i = startHour; i != limitHour; ++i) { 500 if (i == 24) { i = 0; } 501 fDayPeriodForHour[i] = period; 502 } 503 } 504 505 UBool DayPeriodRules::allHoursAreSet() { 506 for (int32_t i = 0; i < 24; ++i) { 507 if (fDayPeriodForHour[i] == DAYPERIOD_UNKNOWN) { return FALSE; } 508 } 509 510 return TRUE; 511 } 512 513 514 515 U_NAMESPACE_END 516