Home | History | Annotate | Download | only in csdet
      1 /*
      2 ********************************************************************************
      3 *   Copyright (C) 2016 and later: Unicode, Inc. and others.
      4 *   License & terms of use: http://www.unicode.org/copyright.html#License
      5 ********************************************************************************
      6 ********************************************************************************
      7  *   Copyright (C) 2005-2006, International Business Machines
      8  *   Corporation and others.  All Rights Reserved.
      9  *******************************************************************************
     10  */
     11 
     12 #include "unicode/utypes.h"
     13 #include "unicode/ucsdet.h"
     14 
     15 #include <string.h>
     16 #include <stdio.h>
     17 
     18 #define BUFFER_SIZE 8192
     19 
     20 int main(int argc, char *argv[])
     21 {
     22     static char buffer[BUFFER_SIZE];
     23     int32_t arg;
     24 
     25     if( argc <= 1 ) {
     26         printf("Usage: %s [filename]...\n", argv[0]);
     27         return -1;
     28     }
     29 
     30     for(arg = 1; arg < argc; arg += 1) {
     31         FILE *file;
     32         char *filename = argv[arg];
     33         int32_t inputLength, match, matchCount = 0;
     34         UCharsetDetector* csd;
     35         const UCharsetMatch **csm;
     36         UErrorCode status = U_ZERO_ERROR;
     37 
     38         if (arg > 1) {
     39             printf("\n");
     40         }
     41 
     42         file = fopen(filename, "rb");
     43 
     44         if (file == NULL) {
     45             printf("Cannot open file \"%s\"\n\n", filename);
     46             continue;
     47         }
     48 
     49         printf("%s:\n", filename);
     50 
     51         inputLength = (int32_t) fread(buffer, 1, BUFFER_SIZE, file);
     52 
     53         fclose(file);
     54 
     55         csd = ucsdet_open(&status);
     56         ucsdet_setText(csd, buffer, inputLength, &status);
     57 
     58         csm = ucsdet_detectAll(csd, &matchCount, &status);
     59 
     60         for(match = 0; match < matchCount; match += 1) {
     61             const char *name = ucsdet_getName(csm[match], &status);
     62             const char *lang = ucsdet_getLanguage(csm[match], &status);
     63             int32_t confidence = ucsdet_getConfidence(csm[match], &status);
     64 
     65             if (lang == NULL || strlen(lang) == 0) {
     66                 lang = "**";
     67             }
     68 
     69             printf("%s (%s) %d\n", name, lang, confidence);
     70         }
     71 
     72         ucsdet_close(csd);
     73     }
     74 
     75     return 0;
     76 }
     77 
     78