Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4  **********************************************************************
      5  *   Copyright (C) 2005-2016, International Business Machines
      6  *   Corporation and others.  All Rights Reserved.
      7  **********************************************************************
      8  */
      9 
     10 #ifndef __CSDETECT_H
     11 #define __CSDETECT_H
     12 
     13 #include "unicode/uobject.h"
     14 
     15 #if !UCONFIG_NO_CONVERSION
     16 
     17 #include "unicode/uenum.h"
     18 
     19 U_NAMESPACE_BEGIN
     20 
     21 class InputText;
     22 class CharsetRecognizer;
     23 class CharsetMatch;
     24 
     25 class CharsetDetector : public UMemory
     26 {
     27 private:
     28     InputText *textIn;
     29     CharsetMatch **resultArray;
     30     int32_t resultCount;
     31     UBool fStripTags;   // If true, setText() will strip tags from input text.
     32     UBool fFreshTextSet;
     33     static void setRecognizers(UErrorCode &status);
     34 
     35     UBool *fEnabledRecognizers;  // If not null, active set of charset recognizers had
     36                                 // been changed from the default. The array index is
     37                                 // corresponding to fCSRecognizers. See setDetectableCharset().
     38 
     39 public:
     40     CharsetDetector(UErrorCode &status);
     41 
     42     ~CharsetDetector();
     43 
     44     void setText(const char *in, int32_t len);
     45 
     46     const CharsetMatch * const *detectAll(int32_t &maxMatchesFound, UErrorCode &status);
     47 
     48     const CharsetMatch *detect(UErrorCode& status);
     49 
     50     void setDeclaredEncoding(const char *encoding, int32_t len) const;
     51 
     52     UBool setStripTagsFlag(UBool flag);
     53 
     54     UBool getStripTagsFlag() const;
     55 
     56 //    const char *getCharsetName(int32_t index, UErrorCode& status) const;
     57 
     58     static int32_t getDetectableCount();
     59 
     60 
     61     static UEnumeration * getAllDetectableCharsets(UErrorCode &status);
     62     UEnumeration * getDetectableCharsets(UErrorCode &status) const;
     63     void setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status);
     64 };
     65 
     66 U_NAMESPACE_END
     67 
     68 #endif
     69 #endif /* __CSDETECT_H */
     70