1 /* 2 ********************************************************************** 3 * Copyright (C) 2005-2006, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8 #ifndef __CSR2022_H 9 #define __CSR2022_H 10 11 #include "unicode/utypes.h" 12 13 #if !UCONFIG_NO_CONVERSION 14 15 #include "csrecog.h" 16 17 U_NAMESPACE_BEGIN 18 19 /** 20 * class CharsetRecog_2022 part of the ICU charset detection imlementation. 21 * This is a superclass for the individual detectors for 22 * each of the detectable members of the ISO 2022 family 23 * of encodings. 24 * 25 * The separate classes are nested within this class. 26 * 27 * @internal 28 */ 29 class CharsetRecog_2022 : public CharsetRecognizer 30 { 31 32 public: 33 virtual ~CharsetRecog_2022() = 0; 34 35 protected: 36 37 /** 38 * Matching function shared among the 2022 detectors JP, CN and KR 39 * Counts up the number of legal an unrecognized escape sequences in 40 * the sample of text, and computes a score based on the total number & 41 * the proportion that fit the encoding. 42 * 43 * 44 * @param text the byte buffer containing text to analyse 45 * @param textLen the size of the text in the byte. 46 * @param escapeSequences the byte escape sequences to test for. 47 * @return match quality, in the range of 0-100. 48 */ 49 int32_t match_2022(const uint8_t *text, int32_t textLen, const uint8_t escapeSequences[][5], int32_t escapeSequences_length); 50 51 }; 52 53 class CharsetRecog_2022JP :public CharsetRecog_2022 54 { 55 public: 56 virtual ~CharsetRecog_2022JP() {} 57 58 const char *getName() const; 59 60 int32_t match(InputText *textIn); 61 }; 62 63 class CharsetRecog_2022KR :public CharsetRecog_2022 { 64 public: 65 virtual ~CharsetRecog_2022KR() {} 66 67 const char *getName() const; 68 69 int32_t match(InputText *textIn); 70 71 }; 72 73 class CharsetRecog_2022CN :public CharsetRecog_2022 74 { 75 public: 76 virtual ~CharsetRecog_2022CN() {} 77 78 const char* getName() const; 79 80 int32_t match(InputText *textIn); 81 }; 82 83 U_NAMESPACE_END 84 85 #endif 86 #endif /* __CSR2022_H */ 87