1 /* 2 *************************************************************************** 3 * Copyright (C) 2008-2009, International Business Machines Corporation 4 * and others. All Rights Reserved. 5 *************************************************************************** 6 * file name: uspoof_build.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2008 Dec 8 12 * created by: Andy Heninger 13 * 14 * Unicode Spoof Detection Data Builder 15 * Builder-related functions are kept in separate files so that applications not needing 16 * the builder can more easily exclude them, typically by means of static linking. 17 * 18 * There are three relatively independent sets of Spoof data, 19 * Confusables, 20 * Whole Script Confusables 21 * ID character extensions. 22 * 23 * The data tables for each are built separately, each from its own definitions 24 */ 25 26 #include "unicode/utypes.h" 27 #include "unicode/uspoof.h" 28 #include "unicode/unorm.h" 29 #include "unicode/uregex.h" 30 #include "unicode/ustring.h" 31 #include "cmemory.h" 32 #include "uspoof_impl.h" 33 #include "uhash.h" 34 #include "uvector.h" 35 #include "uassert.h" 36 #include "uarrsort.h" 37 #include "uspoof_conf.h" 38 #include "uspoof_wsconf.h" 39 40 #if !UCONFIG_NO_NORMALIZATION 41 42 U_NAMESPACE_USE 43 44 45 // The main data building function 46 47 U_CAPI USpoofChecker * U_EXPORT2 48 uspoof_openFromSource(const char *confusables, int32_t confusablesLen, 49 const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, 50 int32_t *errorType, UParseError *pe, UErrorCode *status) { 51 52 if (U_FAILURE(*status)) { 53 return NULL; 54 } 55 #if UCONFIG_NO_REGULAR_EXPRESSIONS 56 *status = U_UNSUPPORTED_ERROR; 57 return NULL; 58 #else 59 if (errorType!=NULL) { 60 *errorType = 0; 61 } 62 if (pe != NULL) { 63 pe->line = 0; 64 pe->offset = 0; 65 pe->preContext[0] = 0; 66 pe->postContext[0] = 0; 67 } 68 69 // Set up a shell of a spoof detector, with empty data. 70 SpoofData *newSpoofData = new SpoofData(*status); 71 SpoofImpl *This = new SpoofImpl(newSpoofData, *status); 72 73 // Compile the binary data from the source (text) format. 74 ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); 75 buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status); 76 77 if (U_FAILURE(*status)) { 78 delete This; 79 This = NULL; 80 } 81 return (USpoofChecker *)This; 82 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS 83 } 84 85 #endif 86