1 #ifndef MARISA_ALPHA_BASE_H_ 2 #define MARISA_ALPHA_BASE_H_ 3 4 // Visual C++ does not provide stdint.h. 5 #ifndef _MSC_VER 6 #include <stdint.h> 7 #endif // _MSC_VER 8 9 #ifdef __cplusplus 10 #include <cstddef> 11 #include <new> 12 #else // __cplusplus 13 #include <stddef.h> 14 #endif // __cplusplus 15 16 #ifdef __cplusplus 17 extern "C" { 18 #endif // __cplusplus 19 20 #ifdef _MSC_VER 21 typedef unsigned __int8 marisa_alpha_uint8; 22 typedef unsigned __int16 marisa_alpha_uint16; 23 typedef unsigned __int32 marisa_alpha_uint32; 24 typedef unsigned __int64 marisa_alpha_uint64; 25 #else // _MSC_VER 26 typedef uint8_t marisa_alpha_uint8; 27 typedef uint16_t marisa_alpha_uint16; 28 typedef uint32_t marisa_alpha_uint32; 29 typedef uint64_t marisa_alpha_uint64; 30 #endif // _MSC_VER 31 32 #define MARISA_ALPHA_UINT8_MAX ((marisa_alpha_uint8)-1) 33 #define MARISA_ALPHA_UINT16_MAX ((marisa_alpha_uint16)-1) 34 #define MARISA_ALPHA_UINT32_MAX ((marisa_alpha_uint32)-1) 35 #define MARISA_ALPHA_UINT64_MAX ((marisa_alpha_uint64)-1) 36 #define MARISA_ALPHA_SIZE_MAX ((size_t)-1) 37 38 #define MARISA_ALPHA_ZERO_TERMINATED MARISA_ALPHA_UINT32_MAX 39 #define MARISA_ALPHA_NOT_FOUND MARISA_ALPHA_UINT32_MAX 40 #define MARISA_ALPHA_MISMATCH MARISA_ALPHA_UINT32_MAX 41 42 #define MARISA_ALPHA_MAX_LENGTH (MARISA_ALPHA_UINT32_MAX - 1) 43 #define MARISA_ALPHA_MAX_NUM_KEYS (MARISA_ALPHA_UINT32_MAX - 1) 44 45 // marisa_alpha_status provides a list of error codes. Most of functions in 46 // libmarisa throw or return an error code. 47 typedef enum marisa_alpha_status_ { 48 // MARISA_ALPHA_OK means that a requested operation has succeeded. 49 MARISA_ALPHA_OK = 0, 50 51 // MARISA_ALPHA_HANDLE_ERROR means that a given handle is invalid. 52 MARISA_ALPHA_HANDLE_ERROR = 1, 53 54 // MARISA_ALPHA_STATE_ERROR means that an object is not ready for a requested 55 // operation. For example, an operation to modify a fixed container throws 56 // an exception with this error code. 57 MARISA_ALPHA_STATE_ERROR = 2, 58 59 // MARISA_ALPHA_PARAM_ERROR means that a given argument is invalid. For 60 // example, some functions throw an exception with this error code when an 61 // out-of-range value or a NULL pointer is given. 62 MARISA_ALPHA_PARAM_ERROR = 3, 63 64 // MARISA_ALPHA_SIZE_ERROR means that a size exceeds its limit. This error 65 // code is used when a building dictionary is too large or std::length_error 66 // is catched. 67 MARISA_ALPHA_SIZE_ERROR = 4, 68 69 // MARISA_ALPHA_MEMORY_ERROR means that a memory allocation has failed. 70 MARISA_ALPHA_MEMORY_ERROR = 5, 71 72 // MARISA_ALPHA_IO_ERROR means that an I/O failure. 73 MARISA_ALPHA_IO_ERROR = 6, 74 75 // MARISA_ALPHA_UNEXPECTED_ERROR means that an unexpected error has occurred. 76 MARISA_ALPHA_UNEXPECTED_ERROR = 7 77 } marisa_alpha_status; 78 79 // marisa_alpha_strerror() returns a name of an error code. 80 const char *marisa_alpha_strerror(marisa_alpha_status status); 81 82 // Flags and masks for dictionary settings are defined as follows. Please note 83 // that unspecified value/flags will be replaced with default value/flags. 84 typedef enum marisa_alpha_flags_ { 85 // A dictionary consinsts of 3 tries in default. If you want to change the 86 // number of tries, please give it with other flags. 87 MARISA_ALPHA_MIN_NUM_TRIES = 0x00001, 88 MARISA_ALPHA_MAX_NUM_TRIES = 0x000FF, 89 MARISA_ALPHA_DEFAULT_NUM_TRIES = 0x00003, 90 91 // MARISA_ALPHA_PATRICIA_TRIE is usually a better choice. MARISA_ALPHA_PREFIX_TRIE is 92 // provided for comparing prefix/patricia tries. 93 MARISA_ALPHA_PATRICIA_TRIE = 0x00100, 94 MARISA_ALPHA_PREFIX_TRIE = 0x00200, 95 MARISA_ALPHA_DEFAULT_TRIE = MARISA_ALPHA_PATRICIA_TRIE, 96 97 // There are 3 kinds of TAIL implementations. 98 // - MARISA_ALPHA_WITHOUT_TAIL: 99 // builds a dictionary without a TAIL. Its last trie has only 1-byte 100 // labels. 101 // - MARISA_ALPHA_BINARY_TAIL: 102 // builds a dictionary with a binary-mode TAIL. Its last labels are stored 103 // as binary data. 104 // - MARISA_ALPHA_TEXT_TAIL: 105 // builds a dictionary with a text-mode TAIL if its last labels do not 106 // contain NULL characters. The last labels are stored as zero-terminated 107 // string. Otherwise, a dictionary is built with a binary-mode TAIL. 108 MARISA_ALPHA_WITHOUT_TAIL = 0x01000, 109 MARISA_ALPHA_BINARY_TAIL = 0x02000, 110 MARISA_ALPHA_TEXT_TAIL = 0x04000, 111 MARISA_ALPHA_DEFAULT_TAIL = MARISA_ALPHA_TEXT_TAIL, 112 113 // libmarisa arranges nodes in ascending order of their labels 114 // (MARISA_ALPHA_LABEL_ORDER) or in descending order of their weights 115 // (MARISA_ALPHA_WEIGHT_ORDER). MARISA_ALPHA_WEIGHT_ORDER is generally a 116 // better choice because it enables faster lookups, but 117 // MARISA_ALPHA_LABEL_ORDER is still useful if an application needs to 118 // predict keys in label order. 119 MARISA_ALPHA_LABEL_ORDER = 0x10000, 120 MARISA_ALPHA_WEIGHT_ORDER = 0x20000, 121 MARISA_ALPHA_DEFAULT_ORDER = MARISA_ALPHA_WEIGHT_ORDER, 122 123 // The default settings. 0 is equivalent to MARISA_ALPHA_DEFAULT_FLAGS. 124 MARISA_ALPHA_DEFAULT_FLAGS = MARISA_ALPHA_DEFAULT_NUM_TRIES 125 | MARISA_ALPHA_DEFAULT_TRIE | MARISA_ALPHA_DEFAULT_TAIL | MARISA_ALPHA_DEFAULT_ORDER, 126 127 MARISA_ALPHA_NUM_TRIES_MASK = 0x000FF, 128 MARISA_ALPHA_TRIE_MASK = 0x00F00, 129 MARISA_ALPHA_TAIL_MASK = 0x0F000, 130 MARISA_ALPHA_ORDER_MASK = 0xF0000, 131 MARISA_ALPHA_FLAGS_MASK = 0xFFFFF 132 } marisa_alpha_flags; 133 134 #ifdef __cplusplus 135 } // extern "C" 136 #endif // __cplusplus 137 138 #ifdef __cplusplus 139 namespace marisa_alpha { 140 141 typedef ::marisa_alpha_uint8 UInt8; 142 typedef ::marisa_alpha_uint16 UInt16; 143 typedef ::marisa_alpha_uint32 UInt32; 144 typedef ::marisa_alpha_uint64 UInt64; 145 146 typedef ::marisa_alpha_status Status; 147 148 // An exception object stores a filename, a line number and an error code. 149 class Exception { 150 public: 151 Exception(const char *filename, int line, Status status) 152 : filename_(filename), line_(line), status_(status) {} 153 Exception(const Exception &ex) 154 : filename_(ex.filename_), line_(ex.line_), status_(ex.status_) {} 155 156 Exception &operator=(const Exception &rhs) { 157 filename_ = rhs.filename_; 158 line_ = rhs.line_; 159 status_ = rhs.status_; 160 return *this; 161 } 162 163 const char *filename() const { 164 return filename_; 165 } 166 int line() const { 167 return line_; 168 } 169 Status status() const { 170 return status_; 171 } 172 173 // Same as std::exception, what() returns an error message. 174 const char *what() const { 175 return ::marisa_alpha_strerror(status_); 176 } 177 178 private: 179 const char *filename_; 180 int line_; 181 Status status_; 182 }; 183 184 // MARISA_ALPHA_THROW adds a filename and a line number to an exception. 185 #define MARISA_ALPHA_THROW(status) \ 186 (throw Exception(__FILE__, __LINE__, status)) 187 188 // MARISA_ALPHA_THROW_IF throws an exception with `status' if `cond' is true. 189 #define MARISA_ALPHA_THROW_IF(cond, status) \ 190 (void)((!(cond)) || (MARISA_ALPHA_THROW(status), 0)) 191 192 // MARISA_ALPHA_DEBUG_IF is used for debugging. For example, 193 // MARISA_ALPHA_DEBUG_IF is used to find out-of-range accesses in 194 // marisa::Vector, marisa::IntVector, etc. 195 #ifdef _DEBUG 196 #define MARISA_ALPHA_DEBUG_IF(cond, status) \ 197 MARISA_ALPHA_THROW_IF(cond, status) 198 #else 199 #define MARISA_ALPHA_DEBUG_IF(cond, status) 200 #endif 201 202 // To not include <algorithm> only for std::swap(). 203 template <typename T> 204 void Swap(T *lhs, T *rhs) { 205 MARISA_ALPHA_THROW_IF((lhs == NULL) || (rhs == NULL), 206 MARISA_ALPHA_PARAM_ERROR); 207 T temp = *lhs; 208 *lhs = *rhs; 209 *rhs = temp; 210 } 211 212 } // namespace marisa_alpha 213 #endif // __cplusplus 214 215 #endif // MARISA_ALPHA_BASE_H_ 216