1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // This command-line program converts an effective-TLD data file in UTF-8 from 6 // the format provided by Mozilla to the format expected by Chrome. This 7 // program generates an intermediate file which is then used by gperf to 8 // generate a perfect hash map. The benefit of this approach is that no time is 9 // spent on program initialization to generate the map of this data. 10 // 11 // Running this program finds "effective_tld_names.dat" in the expected location 12 // in the source checkout and generates "effective_tld_names.gperf" next to it. 13 // 14 // Any errors or warnings from this program are recorded in tld_cleanup.log. 15 // 16 // In particular, it 17 // * Strips blank lines and comments, as well as notes for individual rules. 18 // * Strips a single leading and/or trailing dot from each rule, if present. 19 // * Logs a warning if a rule contains '!' or '*.' other than at the beginning 20 // of the rule. (This also catches multiple ! or *. at the start of a rule.) 21 // * Logs a warning if GURL reports a rule as invalid, but keeps the rule. 22 // * Canonicalizes each rule's domain by converting it to a GURL and back. 23 // * Adds explicit rules for true TLDs found in any rule. 24 // * Marks entries in the file between "// ===BEGIN PRIVATE DOMAINS===" 25 // and "// ===END PRIVATE DOMAINS===" as private. 26 27 #include "base/at_exit.h" 28 #include "base/command_line.h" 29 #include "base/files/file_path.h" 30 #include "base/files/file_util.h" 31 #include "base/i18n/icu_util.h" 32 #include "base/logging.h" 33 #include "base/path_service.h" 34 #include "base/process/memory.h" 35 #include "net/tools/tld_cleanup/tld_cleanup_util.h" 36 37 int main(int argc, const char* argv[]) { 38 base::EnableTerminationOnHeapCorruption(); 39 if (argc != 1) { 40 fprintf(stderr, "Normalizes and verifies UTF-8 TLD data files\n"); 41 fprintf(stderr, "Usage: %s\n", argv[0]); 42 return 1; 43 } 44 45 // Manages the destruction of singletons. 46 base::AtExitManager exit_manager; 47 48 // Only use OutputDebugString in debug mode. 49 #ifdef NDEBUG 50 logging::LoggingDestination destination = logging::LOG_TO_FILE; 51 #else 52 logging::LoggingDestination destination = 53 logging::LOG_TO_ALL; 54 #endif 55 56 base::CommandLine::Init(argc, argv); 57 58 base::FilePath log_filename; 59 PathService::Get(base::DIR_EXE, &log_filename); 60 log_filename = log_filename.AppendASCII("tld_cleanup.log"); 61 logging::LoggingSettings settings; 62 settings.logging_dest = destination; 63 settings.log_file = log_filename.value().c_str(); 64 settings.delete_old = logging::DELETE_OLD_LOG_FILE; 65 logging::InitLogging(settings); 66 67 base::i18n::InitializeICU(); 68 69 base::FilePath input_file; 70 PathService::Get(base::DIR_SOURCE_ROOT, &input_file); 71 input_file = input_file.Append(FILE_PATH_LITERAL("net")) 72 .Append(FILE_PATH_LITERAL("base")) 73 .Append(FILE_PATH_LITERAL( 74 "registry_controlled_domains")) 75 .Append(FILE_PATH_LITERAL("effective_tld_names.dat")); 76 base::FilePath output_file; 77 PathService::Get(base::DIR_SOURCE_ROOT, &output_file); 78 output_file = output_file.Append(FILE_PATH_LITERAL("net")) 79 .Append(FILE_PATH_LITERAL("base")) 80 .Append(FILE_PATH_LITERAL( 81 "registry_controlled_domains")) 82 .Append(FILE_PATH_LITERAL( 83 "effective_tld_names.gperf")); 84 net::tld_cleanup::NormalizeResult result = 85 net::tld_cleanup::NormalizeFile(input_file, output_file); 86 if (result != net::tld_cleanup::kSuccess) { 87 fprintf(stderr, 88 "Errors or warnings processing file. See log in tld_cleanup.log."); 89 } 90 91 if (result == net::tld_cleanup::kError) 92 return 1; 93 return 0; 94 } 95