Home | History | Annotate | Download | only in i18n
      1 /*
      2  **********************************************************************
      3  *   Copyright (C) 2005-2006, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  */
      7 
      8 #ifndef __CSR2022_H
      9 #define __CSR2022_H
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_CONVERSION
     14 
     15 #include "csrecog.h"
     16 
     17 U_NAMESPACE_BEGIN
     18 
     19 /**
     20  *  class CharsetRecog_2022  part of the ICU charset detection imlementation.
     21  *                           This is a superclass for the individual detectors for
     22  *                           each of the detectable members of the ISO 2022 family
     23  *                           of encodings.
     24  *
     25  *                           The separate classes are nested within this class.
     26  *
     27  * @internal
     28  */
     29 class CharsetRecog_2022 : public CharsetRecognizer
     30 {
     31 
     32 public:
     33     virtual ~CharsetRecog_2022() = 0;
     34 
     35 protected:
     36 
     37     /**
     38      * Matching function shared among the 2022 detectors JP, CN and KR
     39      * Counts up the number of legal an unrecognized escape sequences in
     40      * the sample of text, and computes a score based on the total number &
     41      * the proportion that fit the encoding.
     42      *
     43      *
     44      * @param text the byte buffer containing text to analyse
     45      * @param textLen  the size of the text in the byte.
     46      * @param escapeSequences the byte escape sequences to test for.
     47      * @return match quality, in the range of 0-100.
     48      */
     49     int32_t match_2022(const uint8_t *text, int32_t textLen, const uint8_t escapeSequences[][5], int32_t escapeSequences_length);
     50 
     51 };
     52 
     53 class CharsetRecog_2022JP :public CharsetRecog_2022
     54 {
     55 public:
     56     virtual ~CharsetRecog_2022JP() {}
     57 
     58     const char *getName() const;
     59 
     60     int32_t match(InputText *textIn);
     61 };
     62 
     63 class CharsetRecog_2022KR :public CharsetRecog_2022 {
     64 public:
     65     virtual ~CharsetRecog_2022KR() {}
     66 
     67     const char *getName() const;
     68 
     69     int32_t match(InputText *textIn);
     70 
     71 };
     72 
     73 class CharsetRecog_2022CN :public CharsetRecog_2022
     74 {
     75 public:
     76     virtual ~CharsetRecog_2022CN() {}
     77 
     78     const char* getName() const;
     79 
     80     int32_t match(InputText *textIn);
     81 };
     82 
     83 U_NAMESPACE_END
     84 
     85 #endif
     86 #endif /* __CSR2022_H */
     87