Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4  ********************************************************************************
      5  *   Copyright (C) 2005-2016, International Business Machines
      6  *   Corporation and others.  All Rights Reserved.
      7  ********************************************************************************
      8  */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_CONVERSION
     13 #include "unicode/ucsdet.h"
     14 #include "csdetect.h"
     15 #include "csmatch.h"
     16 #include "csrsbcs.h"
     17 #include "csrmbcs.h"
     18 #include "csrutf8.h"
     19 #include "csrucode.h"
     20 #include "csr2022.h"
     21 
     22 #include "cmemory.h"
     23 
     24 U_NAMESPACE_USE
     25 
     26 #define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
     27 #define DELETE_ARRAY(array) uprv_free((void *) (array))
     28 
     29 U_CDECL_BEGIN
     30 
     31 U_CAPI UCharsetDetector * U_EXPORT2
     32 ucsdet_open(UErrorCode   *status)
     33 {
     34     if(U_FAILURE(*status)) {
     35         return 0;
     36     }
     37 
     38     CharsetDetector* csd = new CharsetDetector(*status);
     39 
     40     if (U_FAILURE(*status)) {
     41         delete csd;
     42         csd = NULL;
     43     }
     44 
     45     return (UCharsetDetector *) csd;
     46 }
     47 
     48 U_CAPI void U_EXPORT2
     49 ucsdet_close(UCharsetDetector *ucsd)
     50 {
     51     CharsetDetector *csd = (CharsetDetector *) ucsd;
     52     delete csd;
     53 }
     54 
     55 U_CAPI void U_EXPORT2
     56 ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
     57 {
     58     if(U_FAILURE(*status)) {
     59         return;
     60     }
     61 
     62     ((CharsetDetector *) ucsd)->setText(textIn, len);
     63 }
     64 
     65 U_CAPI const char * U_EXPORT2
     66 ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
     67 {
     68     if(U_FAILURE(*status)) {
     69         return NULL;
     70     }
     71 
     72     return ((CharsetMatch *) ucsm)->getName();
     73 }
     74 
     75 U_CAPI int32_t U_EXPORT2
     76 ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
     77 {
     78     if(U_FAILURE(*status)) {
     79         return 0;
     80     }
     81 
     82     return ((CharsetMatch *) ucsm)->getConfidence();
     83 }
     84 
     85 U_CAPI const char * U_EXPORT2
     86 ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
     87 {
     88     if(U_FAILURE(*status)) {
     89         return NULL;
     90     }
     91 
     92     return ((CharsetMatch *) ucsm)->getLanguage();
     93 }
     94 
     95 U_CAPI const UCharsetMatch * U_EXPORT2
     96 ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
     97 {
     98     if(U_FAILURE(*status)) {
     99         return NULL;
    100     }
    101 
    102     return (const UCharsetMatch *) ((CharsetDetector *) ucsd)->detect(*status);
    103 }
    104 
    105 U_CAPI void U_EXPORT2
    106 ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
    107 {
    108     if(U_FAILURE(*status)) {
    109         return;
    110     }
    111 
    112     ((CharsetDetector *) ucsd)->setDeclaredEncoding(encoding,length);
    113 }
    114 
    115 U_CAPI const UCharsetMatch**
    116 ucsdet_detectAll(UCharsetDetector *ucsd,
    117                  int32_t *maxMatchesFound, UErrorCode *status)
    118 {
    119     if(U_FAILURE(*status)) {
    120         return NULL;
    121     }
    122 
    123     CharsetDetector *csd = (CharsetDetector *) ucsd;
    124 
    125     return (const UCharsetMatch**)csd->detectAll(*maxMatchesFound,*status);
    126 }
    127 
    128 // U_CAPI  const char * U_EXPORT2
    129 // ucsdet_getDetectableCharsetName(const UCharsetDetector *csd, int32_t index, UErrorCode *status)
    130 // {
    131 //     if(U_FAILURE(*status)) {
    132 //         return 0;
    133 //     }
    134 //     return csd->getCharsetName(index,*status);
    135 // }
    136 
    137 // U_CAPI  int32_t U_EXPORT2
    138 // ucsdet_getDetectableCharsetsCount(const UCharsetDetector *csd, UErrorCode *status)
    139 // {
    140 //     if(U_FAILURE(*status)) {
    141 //         return -1;
    142 //     }
    143 //     return UCharsetDetector::getDetectableCount();
    144 // }
    145 
    146 U_CAPI  UBool U_EXPORT2
    147 ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
    148 {
    149     // todo: could use an error return...
    150     if (ucsd == NULL) {
    151         return FALSE;
    152     }
    153 
    154     return ((CharsetDetector *) ucsd)->getStripTagsFlag();
    155 }
    156 
    157 U_CAPI  UBool U_EXPORT2
    158 ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
    159 {
    160     // todo: could use an error return...
    161     if (ucsd == NULL) {
    162         return FALSE;
    163     }
    164 
    165     CharsetDetector *csd = (CharsetDetector *) ucsd;
    166     UBool prev = csd->getStripTagsFlag();
    167 
    168     csd->setStripTagsFlag(filter);
    169 
    170     return prev;
    171 }
    172 
    173 U_CAPI  int32_t U_EXPORT2
    174 ucsdet_getUChars(const UCharsetMatch *ucsm,
    175                  UChar *buf, int32_t cap, UErrorCode *status)
    176 {
    177     if(U_FAILURE(*status)) {
    178         return 0;
    179     }
    180 
    181     return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
    182 }
    183 
    184 U_CAPI void U_EXPORT2
    185 ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
    186 {
    187     ((CharsetDetector *)ucsd)->setDetectableCharset(encoding, enabled, *status);
    188 }
    189 
    190 U_CAPI  UEnumeration * U_EXPORT2
    191 ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
    192 {
    193     return CharsetDetector::getAllDetectableCharsets(*status);
    194 }
    195 
    196 U_DRAFT UEnumeration * U_EXPORT2
    197 ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status)
    198 {
    199     return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status);
    200 }
    201 
    202 U_CDECL_END
    203 
    204 
    205 #endif
    206