Home | History | Annotate | Download | only in i18n
      1 /*
      2  ***************************************************************************
      3  * Copyright (C) 2008-2015, International Business Machines Corporation
      4  * and others. All Rights Reserved.
      5  ***************************************************************************
      6  *   file name:  uspoof_build.cpp
      7  *   encoding:   US-ASCII
      8  *   tab size:   8 (not used)
      9  *   indentation:4
     10  *
     11  *   created on: 2008 Dec 8
     12  *   created by: Andy Heninger
     13  *
     14  *   Unicode Spoof Detection Data Builder
     15  *   Builder-related functions are kept in separate files so that applications not needing
     16  *   the builder can more easily exclude them, typically by means of static linking.
     17  *
     18  *   There are three relatively independent sets of Spoof data,
     19  *      Confusables,
     20  *      Whole Script Confusables
     21  *      ID character extensions.
     22  *
     23  *   The data tables for each are built separately, each from its own definitions
     24  */
     25 
     26 #include "unicode/utypes.h"
     27 #include "unicode/uspoof.h"
     28 #include "unicode/unorm.h"
     29 #include "unicode/uregex.h"
     30 #include "unicode/ustring.h"
     31 #include "cmemory.h"
     32 #include "uspoof_impl.h"
     33 #include "uhash.h"
     34 #include "uvector.h"
     35 #include "uassert.h"
     36 #include "uarrsort.h"
     37 #include "uspoof_conf.h"
     38 #include "uspoof_wsconf.h"
     39 
     40 #if !UCONFIG_NO_NORMALIZATION
     41 
     42 U_NAMESPACE_USE
     43 
     44 // Defined in uspoof.cpp, initializes file-static variables.
     45 U_CFUNC void uspoof_internalInitStatics(UErrorCode *status);
     46 
     47 // The main data building function
     48 
     49 U_CAPI USpoofChecker * U_EXPORT2
     50 uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
     51                       const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
     52                       int32_t *errorType, UParseError *pe, UErrorCode *status) {
     53     uspoof_internalInitStatics(status);
     54     if (U_FAILURE(*status)) {
     55         return NULL;
     56     }
     57 #if UCONFIG_NO_REGULAR_EXPRESSIONS
     58     *status = U_UNSUPPORTED_ERROR;
     59     return NULL;
     60 #else
     61     if (errorType!=NULL) {
     62         *errorType = 0;
     63     }
     64     if (pe != NULL) {
     65         pe->line = 0;
     66         pe->offset = 0;
     67         pe->preContext[0] = 0;
     68         pe->postContext[0] = 0;
     69     }
     70 
     71     // Set up a shell of a spoof detector, with empty data.
     72     SpoofData *newSpoofData = new SpoofData(*status);
     73     SpoofImpl *This = new SpoofImpl(newSpoofData, *status);
     74 
     75     // Compile the binary data from the source (text) format.
     76     ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status);
     77     buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status);
     78 
     79     if (U_FAILURE(*status)) {
     80         delete This;
     81         This = NULL;
     82     }
     83     return (USpoofChecker *)This;
     84 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
     85 }
     86 
     87 #endif
     88