Home | History | Annotate | Download | only in csdet
      1 /*
      2  ********************************************************************************
      3  *   Copyright (C) 2005-2006, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  ********************************************************************************
      6  */
      7 
      8 #include "unicode/utypes.h"
      9 #include "unicode/ucsdet.h"
     10 
     11 #include <string.h>
     12 #include <stdio.h>
     13 
     14 #define BUFFER_SIZE 8192
     15 
     16 int main(int argc, char *argv[])
     17 {
     18     static char buffer[BUFFER_SIZE];
     19     int32_t arg;
     20 
     21     if( argc <= 1 ) {
     22         printf("Usage: %s [filename]...\n", argv[0]);
     23         return -1;
     24     }
     25 
     26     for(arg = 1; arg < argc; arg += 1) {
     27         FILE *file;
     28         char *filename = argv[arg];
     29         int32_t inputLength, match, matchCount = 0;
     30         UCharsetDetector* csd;
     31         const UCharsetMatch **csm;
     32         UErrorCode status = U_ZERO_ERROR;
     33 
     34         if (arg > 1) {
     35             printf("\n");
     36         }
     37 
     38         file = fopen(filename, "rb");
     39 
     40         if (file == NULL) {
     41             printf("Cannot open file \"%s\"\n\n", filename);
     42             continue;
     43         }
     44 
     45         printf("%s:\n", filename);
     46 
     47         inputLength = (int32_t) fread(buffer, 1, BUFFER_SIZE, file);
     48 
     49         fclose(file);
     50 
     51         csd = ucsdet_open(&status);
     52         ucsdet_setText(csd, buffer, inputLength, &status);
     53 
     54         csm = ucsdet_detectAll(csd, &matchCount, &status);
     55 
     56         for(match = 0; match < matchCount; match += 1) {
     57             const char *name = ucsdet_getName(csm[match], &status);
     58             const char *lang = ucsdet_getLanguage(csm[match], &status);
     59             int32_t confidence = ucsdet_getConfidence(csm[match], &status);
     60 
     61             if (lang == NULL || strlen(lang) == 0) {
     62                 lang = "**";
     63             }
     64 
     65             printf("%s (%s) %d\n", name, lang, confidence);
     66         }
     67 
     68         ucsdet_close(csd);
     69     }
     70 
     71     return 0;
     72 }
     73 
     74