Home | History | Annotate | Download | only in i18n
      1 /*
      2  **********************************************************************
      3  *   Copyright (C) 2005-2015, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  */
      7 
      8 #ifndef __CSR2022_H
      9 #define __CSR2022_H
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_CONVERSION
     14 
     15 #include "csrecog.h"
     16 
     17 U_NAMESPACE_BEGIN
     18 
     19 class CharsetMatch;
     20 
     21 /**
     22  *  class CharsetRecog_2022  part of the ICU charset detection imlementation.
     23  *                           This is a superclass for the individual detectors for
     24  *                           each of the detectable members of the ISO 2022 family
     25  *                           of encodings.
     26  *
     27  *                           The separate classes are nested within this class.
     28  *
     29  * @internal
     30  */
     31 class CharsetRecog_2022 : public CharsetRecognizer
     32 {
     33 
     34 public:
     35     virtual ~CharsetRecog_2022() = 0;
     36 
     37 protected:
     38 
     39     /**
     40      * Matching function shared among the 2022 detectors JP, CN and KR
     41      * Counts up the number of legal an unrecognized escape sequences in
     42      * the sample of text, and computes a score based on the total number &
     43      * the proportion that fit the encoding.
     44      *
     45      *
     46      * @param text the byte buffer containing text to analyse
     47      * @param textLen  the size of the text in the byte.
     48      * @param escapeSequences the byte escape sequences to test for.
     49      * @return match quality, in the range of 0-100.
     50      */
     51     int32_t match_2022(const uint8_t *text,
     52                        int32_t textLen,
     53                        const uint8_t escapeSequences[][5],
     54                        int32_t escapeSequences_length) const;
     55 
     56 };
     57 
     58 class CharsetRecog_2022JP :public CharsetRecog_2022
     59 {
     60 public:
     61     virtual ~CharsetRecog_2022JP();
     62 
     63     const char *getName() const;
     64 
     65     UBool match(InputText *textIn, CharsetMatch *results) const;
     66 };
     67 
     68 #if !UCONFIG_ONLY_HTML_CONVERSION
     69 class CharsetRecog_2022KR :public CharsetRecog_2022 {
     70 public:
     71     virtual ~CharsetRecog_2022KR();
     72 
     73     const char *getName() const;
     74 
     75     UBool match(InputText *textIn, CharsetMatch *results) const;
     76 
     77 };
     78 
     79 class CharsetRecog_2022CN :public CharsetRecog_2022
     80 {
     81 public:
     82     virtual ~CharsetRecog_2022CN();
     83 
     84     const char* getName() const;
     85 
     86     UBool match(InputText *textIn, CharsetMatch *results) const;
     87 };
     88 #endif
     89 
     90 U_NAMESPACE_END
     91 
     92 #endif
     93 #endif /* __CSR2022_H */
     94