Home | History | Annotate | Download | only in fst
      1 // icu.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: roubert (at) google.com (Fredrik Roubert)
     17 
     18 // Wrapper class for UErrorCode, with conversion operators for direct use in
     19 // ICU C and C++ APIs.
     20 //
     21 // Features:
     22 // - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
     23 //   removing one common source of errors.
     24 // - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking
     25 //   UErrorCode& (reference), via conversion operators.
     26 // - Automatic checking for success when it goes out of scope. On failure,
     27 //   the destructor will FSTERROR() an error message.
     28 //
     29 // Most of ICU will handle errors gracefully and provide sensible fallbacks.
     30 // Using IcuErrorCode, it is therefore possible to write very compact code
     31 // that does sensible things on failure and provides logging for debugging.
     32 //
     33 // Example:
     34 //
     35 // IcuErrorCode icuerrorcode;
     36 // return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL;
     37 
     38 #ifndef FST_LIB_ICU_H_
     39 #define FST_LIB_ICU_H_
     40 
     41 #include <unicode/errorcode.h>
     42 #include <unicode/unistr.h>
     43 #include <unicode/ustring.h>
     44 #include <unicode/utf8.h>
     45 
     46 class IcuErrorCode : public icu::ErrorCode {
     47  public:
     48   IcuErrorCode() {}
     49   virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); }
     50 
     51   // Redefine 'errorName()' in order to be compatible with ICU version 4.2
     52   const char* errorName() const {
     53     return u_errorName(errorCode);
     54   }
     55 
     56  protected:
     57   virtual void handleFailure() const {
     58     FSTERROR() << errorName();
     59 }
     60 
     61  private:
     62   DISALLOW_COPY_AND_ASSIGN(IcuErrorCode);
     63 };
     64 
     65 namespace fst {
     66 
     67 template <class Label>
     68 bool UTF8StringToLabels(const string &str, vector<Label> *labels) {
     69   const char *c_str = str.c_str();
     70   int32_t length = str.size();
     71   UChar32 c;
     72   for (int32_t i = 0; i < length; /* no update */) {
     73     U8_NEXT(c_str, i, length, c);
     74     if (c < 0) {
     75       LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
     76       return false;
     77     }
     78     labels->push_back(c);
     79   }
     80   return true;
     81 }
     82 
     83 template <class Label>
     84 bool LabelsToUTF8String(const vector<Label> &labels, string *str) {
     85   icu::UnicodeString u_str;
     86   char c_str[5];
     87   for (size_t i = 0; i < labels.size(); ++i) {
     88     u_str.setTo(labels[i]);
     89     IcuErrorCode error;
     90     u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error);
     91     if (error.isFailure()) {
     92       LOG(ERROR) << "LabelsToUTF8String: Bad encoding: "
     93                  << error.errorName();
     94       return false;
     95     }
     96     *str += c_str;
     97   }
     98   return true;
     99 }
    100 
    101 }  // namespace fst
    102 
    103 #endif  // FST_LIB_ICU_H_
    104