1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Unit test compact language detector 6 // 7 // Small version, covering these languages only: 8 // Arabic Bulgarian Catalan Chinese ChineseT Croatian Czech Danish Dutch 9 // English Estonian Finnish French German Greek Hebrew Hindi Hungarian 10 // Icelandic Indonesian Italian Japanese Korean Latvian Lithuanian Norwegian 11 // Polish Portuguese Romanian Russian Serbian Slovak Slovenian Spanish 12 // Swedish Tagalog Thai Turkish Ukrainian Vietnamese 13 14 // Additional single-language scripts recognized for free: 15 // Armenian Cherokee Dhivehi Georgian Gujarati Inuktitut Kannada Khmer 16 // Laothian Malayalam Oriya Punjabi Sinhalese Syriac Telugu Tamil 17 // 18 19 #include <string> 20 #include "testing/gtest/include/gtest/gtest.h" 21 #include "encodings/compact_lang_det/compact_lang_det.h" 22 #include "encodings/compact_lang_det/ext_lang_enc.h" 23 #include "encodings/compact_lang_det/unittest_data.h" 24 25 #include "encodings/compact_lang_det/win/cld_commandlineflags.h" 26 #include "encodings/compact_lang_det/win/cld_google.h" 27 28 DEFINE_bool(html, false, "Print language spans in HTML on stderr"); 29 DEFINE_bool(detail, false, "Print incoming text to stderr"); 30 DEFINE_bool(skipbig, false, "Skip BigInputTests"); 31 32 // Test strings. 33 // These are all included here to make the unit test self-contained. 34 const char* kTeststr_en = 35 "confiscation of goods is assigned as the penalty part most of the courts " 36 "consist of members and when it is necessary to bring public cases before a " 37 "jury of members two courts combine for the purpose the most important cases " 38 "of all are brought jurors or"; 39 40 41 // UTF8 constants. Use a UTF-8 aware editor for this file 42 const char* kTeststr_ks = 43 "\xe0\xa4\xa8\xe0\xa5\x87\xe0\xa4\xaa\xe0\xa4\xbe\xe0\xa4\xb2 \xe0\xa4\x8f" 44 "\xe0\xa4\xb8\xe0\xa4\xbf\xe0\xa4\xaf\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\x82" 45 "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xae\xe0\xa5\x81\xe0\xa4\xb2" 46 "\xe0\xa5\x81\xe0\xa4\x95 \xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa4\xa7" 47 "\xe0\xa4\xbe\xe0\xa4\xa8\xe0\xa5\x80 \xe0\xa4\x95\xe0\xa4\xbe\xe0\xa4\xa0" 48 "\xe0\xa4\xae\xe0\xa4\xbe\xe0\xa4\xa1\xe0\xa5\x8c\xe0\xa4\x82 \xe0\xa4\xa8" 49 "\xe0\xa5\x87\xe0\xa4\xaa\xe0\xa4\xbe\xe0\xa4\xb2 \xe0\xa4\x85\xe0\xa4\xa7" 50 "\xe0\xa4\xbf\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa5\x8d\xe0\xa4\xaf " 51 "\xe0\xa4\xaa\xe0\xa5\x87\xe0\xa4\xb0\xe0\xa5\x87\xe0\xa4\x97\xe0\xa5\x8d" 52 "\xe0\xa4\xb5\xe0\xa4\xbe\xe0\xa4\xaf \xe0\xa4\xa6\xe0\xa4\x95\xe0\xa5\x8d" 53 "\xe0\xa4\xb7\xe0\xa4\xbf\xe0\xa4\xa3 \xe0\xa4\x85\xe0\xa4\xae\xe0\xa5\x87" 54 "\xe0\xa4\xb0\xe0\xa4\xbf\xe0\xa4\x95\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\xb9" 55 "\xe0\xa4\xbe\xe0\xa4\xa6\xe0\xa5\x8d\xe0\xa4\xb5\xe0\xa5\x80\xe0\xa4\xaa" 56 "\xe0\xa5\x87 \xe0\xa4\xae\xe0\xa4\xa7\xe0\xa5\x8d \xe0\xa4\xaf\xe0\xa4\x95" 57 "\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa5\x87\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xb0" 58 "\xe0\xa5\x87 \xe0\xa4\x8f\xe0\xa4\x95 \xe0\xa4\xa6\xe0\xa5\x87\xe0\xa4\xb6 " 59 "\xe0\xa4\x85\xe0\xa4\xb8\xe0\xa5\x8d \xe0\xa4\xa4\xe0\xa4\xbf \xe0\xa4\xab" 60 "\xe0\xa4\xa3\xe0\xa5\x80\xe0\xa4\xb6\xe0\xa5\x8d\xe0\xa4\xb5\xe0\xa4\xb0 " 61 "\xe0\xa4\xa8\xe0\xa4\xbe\xe0\xa4\xa5 \xe0\xa4\xb0\xe0\xa5\x87\xe0\xa4\xa3" 62 "\xe0\xa5\x81 \xe0\xa4\xab\xe0\xa4\xbf\xe0\xa4\x9c\xe0\xa5\x80 \xe0\xa4\x9b" 63 "\xe0\xa5\x81 \xe0\xa4\xa6\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa4\xbf" 64 "\xe0\xa4\xa3 \xe0\xa4\xaa\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa4\xb6\xe0\xa4\xbe" 65 "\xe0\xa4\xa8\xe0\xa5\x8d \xe0\xa4\xa4 \xe0\xa4\xae\xe0\xa4\xb9\xe0\xa4\xbe" 66 "\xe0\xa4\xb8\xe0\xa4\xbe\xe0\xa4\x97\xe0\xa4\xb0 \xe0\xa4\xae\xe0\xa4\x82" 67 "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xa6\xe0\xa5\x87\xe0\xa4\xb6 " 68 "\xe0\xa4\xac\xe0\xa4\xb9\xe0\xa4\xbe\xe0\xa4\xae\xe0\xa4\xbe\xe0\xa4\xb8 " 69 "\xe0\xa4\x9b\xe0\xa5\x81 \xe0\xa4\x95\xe0\xa5\x87\xe0\xa4\xb0\xe0\xa5\x87" 70 "\xe0\xa4\xac\xe0\xa4\xbf\xe0\xa4\xaf\xe0\xa4\xa8 \xe0\xa4\xae\xe0\xa4\x82" 71 "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xae\xe0\xa5\x81\xe0\xa4\xb2" 72 "\xe0\xa5\x81\xe0\xa4\x96 \xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa4\xa7" 73 "\xe0\xa4\xbe\xe0\xa4\xa8\xe0\xa5\x80 \xe0\xa4\xa8\xe0\xa4\xb8\xe0\xa5\x8c " 74 "\xe0\xa4\xb8\xe0\xa4\xae\xe0\xa5\x8d \xe0\xa4\xac\xe0\xa4\xa6\xe0\xa5\x8d" 75 "\xe0\xa4\x98 \xe0\xa4\xb5\xe0\xa4\xbf\xe0\xa4\xb7\xe0\xa4\xaf \xe0\xa4\xac" 76 "\xe0\xa5\x81\xe0\xa4\xb0\xe0\xa5\x81\xe0\xa4\x82\xe0\xa4\xa1\xe0\xa5\x80 " 77 "\xe0\xa4\x85\xe0\xa4\xab\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa5\x80\xe0\xa4\x95" 78 "\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\xb9\xe0\xa4\xbe\xe0\xa4\xa6\xe0\xa5\x8d" 79 "\xe0\xa4\xb5\xe0\xa5\x80\xe0\xa4\xaa\xe0\xa5\x87 \xe0\xa4\xae\xe0\xa4\xa7" 80 "\xe0\xa5\x8d \xe0\xa4\xaf\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa5\x87" 81 "\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa5\x87 \xe0\xa4\xa6\xe0\xa5\x87" 82 "\xe0\xa4\xb6 \xe0\xa4\x85\xe0\xa4\xb8\xe0\xa5\x8d \xe0\xa4\xa4\xe0\xa4\xbf " 83 "\xe0\xa4\xb8\xe0\xa4\xae\xe0\xa5\x8d \xe0\xa4\xac\xe0\xa4\xa6\xe0\xa5\x8d" 84 "\xe0\xa4\x98 \xe0\xa4\xb5\xe0\xa4\xbf\xe0\xa4\xb7\xe0\xa4\xaf"; 85 86 // Test strings. This will be squeezed because of the repetitions. 87 const char* kTeststr_kr_repetitions = 88 "<meta charset=\"utf-8\" />\n\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 89 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 90 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 91 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 92 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 93 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 94 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 95 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 96 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" 97 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" 98 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" 99 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" 100 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" 101 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" 102 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" 103 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" 104 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" 105 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" 106 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" 107 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" 108 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" 109 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" 110 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" 111 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" 112 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" 113 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" 114 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" 115 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" 116 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 117 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 118 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 119 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 120 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 121 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 122 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 123 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 124 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" 125 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" 126 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" 127 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" 128 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" 129 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" 130 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" 131 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" 132 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" 133 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" 134 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" 135 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" 136 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" 137 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" 138 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" 139 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" 140 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" 141 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" 142 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" 143 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" 144 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 145 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 146 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 147 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 148 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 149 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 150 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 151 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 152 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" 153 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" 154 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" 155 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" 156 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" 157 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" 158 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" 159 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" 160 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" 161 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" 162 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" 163 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" 164 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" 165 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" 166 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" 167 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" 168 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" 169 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" 170 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" 171 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" 172 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 173 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 174 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 175 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 176 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 177 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 178 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 179 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 180 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" 181 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" 182 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" 183 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" 184 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" 185 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" 186 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" 187 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" 188 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" 189 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" 190 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" 191 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" 192 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" 193 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" 194 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" 195 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" 196 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" 197 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" 198 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" 199 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" 200 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 201 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 202 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 203 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 204 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 205 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 206 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 207 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 208 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" 209 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" 210 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" 211 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" 212 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" 213 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" 214 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" 215 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" 216 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" 217 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" 218 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" 219 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" 220 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" 221 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" 222 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" 223 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" 224 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" 225 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" 226 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" 227 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" 228 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 229 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 230 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 231 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 232 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 233 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 234 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 235 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 236 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" 237 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" 238 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" 239 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" 240 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" 241 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" 242 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" 243 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" 244 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" 245 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" 246 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" 247 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" 248 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" 249 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" 250 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" 251 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" 252 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" 253 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" 254 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" 255 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" 256 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 257 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 258 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 259 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 260 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 261 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 262 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 263 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 264 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" 265 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" 266 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" 267 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" 268 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" 269 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" 270 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" 271 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" 272 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" 273 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" 274 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" 275 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" 276 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" 277 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" 278 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" 279 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" 280 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" 281 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" 282 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" 283 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" 284 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 285 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 286 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 287 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 288 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 289 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 290 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 291 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 292 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" 293 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" 294 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" 295 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" 296 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" 297 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" 298 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" 299 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" 300 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" 301 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" 302 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" 303 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" 304 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" 305 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" 306 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" 307 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" 308 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" 309 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" 310 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" 311 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" 312 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 313 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 314 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 315 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 316 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 317 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 318 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 319 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 320 "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" 321 "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" 322 "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" 323 "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" 324 "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" 325 "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" 326 "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" 327 "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" 328 "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" 329 "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" 330 "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" 331 "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" 332 "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" 333 "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" 334 "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" 335 "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" 336 "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" 337 "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" 338 "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" 339 "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" 340 "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" 341 "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" 342 "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" 343 "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" 344 "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" 345 "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" 346 "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" 347 "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" 348 "\x9b\x98\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 349 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 350 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 351 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 352 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 353 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 354 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 355 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 356 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 357 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 358 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 359 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 360 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 361 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 362 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 363 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 364 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 365 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 366 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 367 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 368 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 369 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 370 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 371 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 372 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 373 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 374 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 375 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 376 "aaaaaaaaaaaaa"; 377 378 379 // const char* kTeststr_ks = 380 // \u0928\u0947\u092A\u093E\u0932\u0020\u090F\u0938\u093F\u092F\u093E\u0020 381 // \u092E\u0902\u091C\u0020\u0905\u0916\u0020\u092E\u0941\u0932\u0941\u0915 382 // \u0020\u0930\u093E\u091C\u0927\u093E\u0928\u0940\u0020\u0915\u093E\u0920 383 // \u092E\u093E\u0921\u094C\u0902\u0020\u0928\u0947\u092A\u093E\u0932\u0020 384 // \u0905\u0927\u093F\u0930\u093E\u091C\u094D\u092F\u0020\u092A\u0947\u0930 385 // \u0947\u0917\u094D\u0935\u093E\u092F\u0020 386 // \u0926\u0915\u094D\u0937\u093F\u0923\u0020\u0905\u092E\u0947\u0930\u093F 387 // \u0915\u093E\u0020\u092E\u0939\u093E\u0926\u094D\u0935\u0940\u092A\u0947 388 // \u0020\u092E\u0927\u094D\u0020\u092F\u0915\u094D\u0937\u0947\u0924\u094D 389 // \u0930\u0947\u0020\u090F\u0915\u0020\u0926\u0947\u0936\u0020\u0905\u0938 390 // \u094D\u0020\u0924\u093F\u0020\u092B\u0923\u0940\u0936\u094D\u0935\u0930 391 // \u0020\u0928\u093E\u0925\u0020\u0930\u0947\u0923\u0941\u0020 392 // \u092B\u093F\u091C\u0940\u0020\u091B\u0941\u0020\u0926\u0915\u094D\u0937 393 // \u093F\u0923\u0020\u092A\u094D\u0930\u0936\u093E\u0928\u094D\u0020\u0924 394 // \u0020\u092E\u0939\u093E\u0938\u093E\u0917\u0930\u0020\u092E\u0902\u091C 395 // \u0020\u0905\u0916\u0020\u0926\u0947\u0936\u0020\u092C\u0939\u093E\u092E 396 // \u093E\u0938\u0020\u091B\u0941\u0020\u0915\u0947\u0930\u0947\u092C\u093F 397 // \u092F\u0928\u0020\u092E\u0902\u091C\u0020 398 // \u0905\u0916\u0020\u092E\u0941\u0932\u0941\u0916\u0020\u0930\u093E\u091C 399 // \u0927\u093E\u0928\u0940\u0020\u0928\u0938\u094C\u0020\u0938\u092E\u094D 400 // \u0020\u092C\u0926\u094D\u0918\u0020\u0935\u093F\u0937\u092F\u0020\u092C 401 // \u0941\u0930\u0941\u0902\u0921\u0940\u0020\u0905\u092B\u094D\u0930\u0940 402 // \u0915\u093E\u0020\u092E\u0939\u093E\u0926\u094D\u0935\u0940\u092A\u0947 403 // \u0020\u092E\u0927\u094D\u0020 404 // \u092F\u0915\u094D\u0937\u0947\u0924\u094D\u0930\u0947\u0020\u0926\u0947 405 // \u0936\u0020\u0905\u0938\u094D\u0020\u0924\u093F\u0020\u0938\u092E\u094D 406 // \u0020\u092C\u0926\u094D\u0918\u0020\u0935\u093F\u0937\u092F 407 408 409 namespace { 410 411 class CompactLangDetTest : public testing::Test { 412 protected: 413 // Objects declared here can be used by all tests in the test case for Foo. 414 415 // Detect language of plaintext src 416 Language TestCompactLangDetPlain(const char* src) { 417 bool is_plain_text = true; 418 bool is_reliable; 419 420 Language lang = CompactLangDet::DetectLanguage(NULL, src, strlen(src), 421 is_plain_text, 422 &is_reliable); 423 return lang; 424 } 425 426 427 // Detect extended language of plaintext src 428 Language TestExtCompactLangDetPlain(const char* src) { 429 bool is_plain_text = true; 430 Language language3[3]; 431 int percent3[3]; 432 int text_bytes; 433 bool is_reliable; 434 435 Language lang = CompactLangDet::ExtDetectLanguageSummary(NULL, 436 src, strlen(src), 437 is_plain_text, 438 language3, 439 percent3, 440 &text_bytes, 441 &is_reliable); 442 return lang; 443 } 444 }; // end class CompactLangDetTest 445 446 447 TEST_F(CompactLangDetTest, EasyTests) { 448 EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_en)); 449 EXPECT_EQ(HINDI, TestCompactLangDetPlain(kTeststr_hi_Deva)); 450 } 451 452 453 TEST_F(CompactLangDetTest, FullTests) { 454 // Only the tests reflecting the currently used detection tables are enabled. 455 456 // Do all the languages in all their scripts 457 //// EXPECT_EQ(AFAR, TestCompactLangDetPlain(kTeststr_aa_Latn)); 458 //// EXPECT_EQ(ABKHAZIAN, TestCompactLangDetPlain(kTeststr_ab_Cyrl)); 459 EXPECT_EQ(AFRIKAANS, TestCompactLangDetPlain(kTeststr_af_Latn)); 460 //// EXPECT_EQ(AMHARIC, TestCompactLangDetPlain(kTeststr_am_Ethi)); 461 EXPECT_EQ(ARABIC, TestCompactLangDetPlain(kTeststr_ar_Arab)); 462 //// EXPECT_EQ(ASSAMESE, TestCompactLangDetPlain(kTeststr_as_Beng)); 463 //// EXPECT_EQ(AYMARA, TestCompactLangDetPlain(kTeststr_ay_Latn)); 464 // AZERBAIJANI Arab & Cyrl removed 2008.05.27. Just AZERBAIJANI Latn left 465 // EXPECT_EQ(AZERBAIJANI, TestCompactLangDetPlain(kTeststr_az_Arab)); 466 // Missing data: az-Cyrl 467 //// EXPECT_EQ(AZERBAIJANI, TestCompactLangDetPlain(kTeststr_az_Latn)); 468 469 //// EXPECT_EQ(BASHKIR, TestCompactLangDetPlain(kTeststr_ba_Cyrl)); 470 EXPECT_EQ(BELARUSIAN, TestCompactLangDetPlain(kTeststr_be_Cyrl)); 471 EXPECT_EQ(BULGARIAN, TestCompactLangDetPlain(kTeststr_bg_Cyrl)); 472 //// EXPECT_EQ(BIHARI, TestCompactLangDetPlain(kTeststr_bh_Deva)); 473 //// EXPECT_EQ(BISLAMA, TestCompactLangDetPlain(kTeststr_bi_Latn)); 474 //// EXPECT_EQ(BENGALI, TestCompactLangDetPlain(kTeststr_bn_Beng)); 475 476 //// EXPECT_EQ(TIBETAN, TestCompactLangDetPlain(kTeststr_bo_Tibt)); 477 //// EXPECT_EQ(BRETON, TestCompactLangDetPlain(kTeststr_br_Latn)); 478 EXPECT_EQ(SERBIAN, TestCompactLangDetPlain(kTeststr_bs_Cyrl)); // NOTE: Not BOSNIAN 479 //// EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_bs_Latn)); // NOTE: Not BOSNIAN 480 481 EXPECT_EQ(CATALAN, TestCompactLangDetPlain(kTeststr_ca_Latn)); 482 EXPECT_EQ(CHEROKEE, TestCompactLangDetPlain(kTeststr_chr_Cher)); 483 //// EXPECT_EQ(CORSICAN, TestCompactLangDetPlain(kTeststr_co_Latn)); 484 // No CREOLES_AND_PIDGINS_ENGLISH_BASED 485 // No CREOLES_AND_PIDGINS_FRENCH_BASED 486 // No CREOLES_AND_PIDGINS_OTHER 487 // No CREOLES_AND_PIDGINS_PORTUGUESE_BASED 488 EXPECT_EQ(CZECH, TestCompactLangDetPlain(kTeststr_cs_Latn)); 489 EXPECT_EQ(WELSH, TestCompactLangDetPlain(kTeststr_cy_Latn)); 490 491 EXPECT_EQ(DANISH, TestCompactLangDetPlain(kTeststr_da_Latn)); 492 EXPECT_EQ(GERMAN, TestCompactLangDetPlain(kTeststr_de_Latn)); 493 EXPECT_EQ(DHIVEHI, TestCompactLangDetPlain(kTeststr_dv_Thaa)); 494 //// EXPECT_EQ(DZONGKHA, TestCompactLangDetPlain(kTeststr_dz_Tibt)); 495 496 EXPECT_EQ(GREEK, TestCompactLangDetPlain(kTeststr_el_Grek)); 497 EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_en_Latn)); 498 //// EXPECT_EQ(ESPERANTO, TestCompactLangDetPlain(kTeststr_eo_Latn)); 499 EXPECT_EQ(SPANISH, TestCompactLangDetPlain(kTeststr_es_Latn)); 500 EXPECT_EQ(ESTONIAN, TestCompactLangDetPlain(kTeststr_et_Latn)); 501 //// EXPECT_EQ(BASQUE, TestCompactLangDetPlain(kTeststr_eu_Latn)); 502 503 EXPECT_EQ(PERSIAN, TestCompactLangDetPlain(kTeststr_fa_Arab)); 504 EXPECT_EQ(FINNISH, TestCompactLangDetPlain(kTeststr_fi_Latn)); 505 //// EXPECT_EQ(FIJIAN, TestCompactLangDetPlain(kTeststr_fj_Latn)); 506 //// EXPECT_EQ(FAROESE, TestCompactLangDetPlain(kTeststr_fo_Latn)); 507 EXPECT_EQ(FRENCH, TestCompactLangDetPlain(kTeststr_fr_Latn)); 508 //// EXPECT_EQ(FRISIAN, TestCompactLangDetPlain(kTeststr_fy_Latn)); 509 510 EXPECT_EQ(IRISH, TestCompactLangDetPlain(kTeststr_ga_Latn)); 511 //// EXPECT_EQ(SCOTS_GAELIC, TestCompactLangDetPlain(kTeststr_gd_Latn)); 512 //// EXPECT_EQ(GALICIAN, TestCompactLangDetPlain(kTeststr_gl_Latn)); 513 //// EXPECT_EQ(GUARANI, TestCompactLangDetPlain(kTeststr_gn_Latn)); 514 EXPECT_EQ(GUJARATI, TestCompactLangDetPlain(kTeststr_gu_Gujr)); 515 //// EXPECT_EQ(MANX, TestCompactLangDetPlain(kTeststr_gv_Latn)); 516 517 //// EXPECT_EQ(HAUSA, TestCompactLangDetPlain(kTeststr_ha_Latn)); 518 EXPECT_EQ(HINDI, TestCompactLangDetPlain(kTeststr_hi_Deva)); 519 EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_hr_Latn)); // NOTE: now CROATIAN 520 //// EXPECT_EQ(HAITIAN_CREOLE, TestCompactLangDetPlain(kTeststr_ht_Latn)); 521 EXPECT_EQ(HUNGARIAN, TestCompactLangDetPlain(kTeststr_hu_Latn)); 522 EXPECT_EQ(ARMENIAN, TestCompactLangDetPlain(kTeststr_hy_Armn)); 523 524 //// EXPECT_EQ(INTERLINGUA, TestCompactLangDetPlain(kTeststr_ia_Latn)); 525 EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_id_Latn)); 526 //// EXPECT_EQ(INTERLINGUE, TestCompactLangDetPlain(kTeststr_ie_Latn)); 527 //// EXPECT_EQ(INUPIAK, TestCompactLangDetPlain(kTeststr_ik_Latn)); 528 EXPECT_EQ(ICELANDIC, TestCompactLangDetPlain(kTeststr_is_Latn)); 529 EXPECT_EQ(ITALIAN, TestCompactLangDetPlain(kTeststr_it_Latn)); 530 EXPECT_EQ(INUKTITUT, TestCompactLangDetPlain(kTeststr_iu_Cans)); 531 EXPECT_EQ(HEBREW, TestCompactLangDetPlain(kTeststr_iw_Hebr)); 532 533 EXPECT_EQ(JAPANESE, TestCompactLangDetPlain(kTeststr_ja_Hani)); 534 //// EXPECT_EQ(JAVANESE, TestCompactLangDetPlain(kTeststr_jw_Latn)); 535 536 EXPECT_EQ(GEORGIAN, TestCompactLangDetPlain(kTeststr_ka_Geor)); 537 //// EXPECT_EQ(KHASI, TestCompactLangDetPlain(kTeststr_kha_Latn)); 538 //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Arab)); 539 //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Cyrl)); 540 //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Latn)); 541 //// EXPECT_EQ(GREENLANDIC, TestCompactLangDetPlain(kTeststr_kl_Latn)); 542 EXPECT_EQ(KHMER, TestCompactLangDetPlain(kTeststr_km_Khmr)); 543 EXPECT_EQ(KANNADA, TestCompactLangDetPlain(kTeststr_kn_Knda)); 544 EXPECT_EQ(KOREAN, TestCompactLangDetPlain(kTeststr_ko_Hani)); 545 //// EXPECT_EQ(KASHMIRI, TestCompactLangDetPlain(kTeststr_ks_Deva)); 546 // KURDISH Latn removed 2008.05.27. Just KURDISH Arab left 547 //// EXPECT_EQ(KURDISH, TestCompactLangDetPlain(kTeststr_ku_Arab)); 548 // EXPECT_EQ(KURDISH, TestCompactLangDetPlain(kTeststr_ku_Latn)); 549 //// EXPECT_EQ(KYRGYZ, TestCompactLangDetPlain(kTeststr_ky_Arab)); 550 //// EXPECT_EQ(KYRGYZ, TestCompactLangDetPlain(kTeststr_ky_Cyrl)); 551 552 //// EXPECT_EQ(LATIN, TestCompactLangDetPlain(kTeststr_la_Latn)); 553 //// EXPECT_EQ(LUXEMBOURGISH, TestCompactLangDetPlain(kTeststr_lb_Latn)); 554 //// EXPECT_EQ(GANDA, TestCompactLangDetPlain(kTeststr_lg_Latn)); 555 //// EXPECT_EQ(LINGALA, TestCompactLangDetPlain(kTeststr_ln_Latn)); 556 EXPECT_EQ(LAOTHIAN, TestCompactLangDetPlain(kTeststr_lo_Laoo)); 557 EXPECT_EQ(LITHUANIAN, TestCompactLangDetPlain(kTeststr_lt_Latn)); 558 EXPECT_EQ(LATVIAN, TestCompactLangDetPlain(kTeststr_lv_Latn)); 559 560 //// EXPECT_EQ(MALAGASY, TestCompactLangDetPlain(kTeststr_mg_Latn)); 561 //// EXPECT_EQ(MAORI, TestCompactLangDetPlain(kTeststr_mi_Latn)); 562 EXPECT_EQ(MACEDONIAN, TestCompactLangDetPlain(kTeststr_mk_Cyrl)); 563 EXPECT_EQ(MALAYALAM, TestCompactLangDetPlain(kTeststr_ml_Mlym)); 564 //// EXPECT_EQ(MONGOLIAN, TestCompactLangDetPlain(kTeststr_mn_Cyrl)); 565 //// EXPECT_EQ(MOLDAVIAN, TestCompactLangDetPlain(kTeststr_mo_Cyrl)); 566 //// EXPECT_EQ(MARATHI, TestCompactLangDetPlain(kTeststr_mr_Deva)); 567 EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn)); 568 // EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn2)); 569 EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn3)); 570 //// EXPECT_EQ(MALTESE, TestCompactLangDetPlain(kTeststr_mt_Latn)); 571 //// EXPECT_EQ(BURMESE, TestCompactLangDetPlain(kTeststr_my_Latn)); 572 //// EXPECT_EQ(BURMESE, TestCompactLangDetPlain(kTeststr_my_Mymr)); 573 574 //// EXPECT_EQ(NAURU, TestCompactLangDetPlain(kTeststr_na_Latn)); 575 //// EXPECT_EQ(NEPALI, TestCompactLangDetPlain(kTeststr_ne_Deva)); 576 EXPECT_EQ(DUTCH, TestCompactLangDetPlain(kTeststr_nl_Latn)); 577 //// EXPECT_EQ(NORWEGIAN_N, TestCompactLangDetPlain(kTeststr_nn_Latn)); 578 EXPECT_EQ(NORWEGIAN, TestCompactLangDetPlain(kTeststr_no_Latn)); 579 580 //// EXPECT_EQ(OCCITAN, TestCompactLangDetPlain(kTeststr_oc_Latn)); 581 //// EXPECT_EQ(OROMO, TestCompactLangDetPlain(kTeststr_om_Latn)); 582 EXPECT_EQ(ORIYA, TestCompactLangDetPlain(kTeststr_or_Orya)); 583 584 EXPECT_EQ(PUNJABI, TestCompactLangDetPlain(kTeststr_pa_Guru)); 585 EXPECT_EQ(POLISH, TestCompactLangDetPlain(kTeststr_pl_Latn)); 586 //// EXPECT_EQ(PASHTO, TestCompactLangDetPlain(kTeststr_ps_Arab)); 587 EXPECT_EQ(PORTUGUESE, TestCompactLangDetPlain(kTeststr_pt_BR)); // NOTE: not PORTUGUESE_B 588 // nor PORTUGUESE_P 589 590 //// EXPECT_EQ(QUECHUA, TestCompactLangDetPlain(kTeststr_qu_Latn)); 591 592 //// EXPECT_EQ(RHAETO_ROMANCE, TestCompactLangDetPlain(kTeststr_rm_Latn)); 593 //// EXPECT_EQ(RUNDI, TestCompactLangDetPlain(kTeststr_rn_Latn)); 594 EXPECT_EQ(ROMANIAN, TestCompactLangDetPlain(kTeststr_ro_Latn)); 595 EXPECT_EQ(RUSSIAN, TestCompactLangDetPlain(kTeststr_ru_Cyrl)); 596 //// EXPECT_EQ(KINYARWANDA, TestCompactLangDetPlain(kTeststr_rw_Latn)); 597 598 //// EXPECT_EQ(SANSKRIT, TestCompactLangDetPlain(kTeststr_sa_Deva)); 599 //// EXPECT_EQ(SANSKRIT, TestCompactLangDetPlain(kTeststr_sa_Latn)); 600 //// EXPECT_EQ(SCOTS, TestCompactLangDetPlain(kTeststr_sco_Latn)); 601 //// EXPECT_EQ(SINDHI, TestCompactLangDetPlain(kTeststr_sd_Arab)); 602 //// EXPECT_EQ(SANGO, TestCompactLangDetPlain(kTeststr_sg_Latn)); 603 // No SERBO_CROATIAN (sh) 604 EXPECT_EQ(SINHALESE, TestCompactLangDetPlain(kTeststr_si_Sinh)); 605 //// EXPECT_EQ(LIMBU, TestCompactLangDetPlain(kTeststr_sit_NP)); 606 EXPECT_EQ(SLOVAK, TestCompactLangDetPlain(kTeststr_sk_Latn)); 607 EXPECT_EQ(SLOVENIAN, TestCompactLangDetPlain(kTeststr_sl_Latn)); 608 //// EXPECT_EQ(SAMOAN, TestCompactLangDetPlain(kTeststr_sm_Latn)); 609 //// EXPECT_EQ(SHONA, TestCompactLangDetPlain(kTeststr_sn_Latn)); 610 //// EXPECT_EQ(SOMALI, TestCompactLangDetPlain(kTeststr_so_Latn)); 611 //// EXPECT_EQ(ALBANIAN, TestCompactLangDetPlain(kTeststr_sq_Latn)); 612 EXPECT_EQ(SERBIAN, TestCompactLangDetPlain(kTeststr_sr_Cyrl)); // NOTE: now SERBIAN 613 EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_sr_Latn)); // NOTE: Not SERBIAN 614 EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_sr_ME_Latn)); // NOTE: not SERBIAN nor MONTENEGRIN 615 //// EXPECT_EQ(SISWANT, TestCompactLangDetPlain(kTeststr_ss_Latn)); 616 //// EXPECT_EQ(SESOTHO, TestCompactLangDetPlain(kTeststr_st_Latn)); 617 //// EXPECT_EQ(SUNDANESE, TestCompactLangDetPlain(kTeststr_su_Latn)); 618 EXPECT_EQ(SWEDISH, TestCompactLangDetPlain(kTeststr_sv_Latn)); 619 EXPECT_EQ(SWAHILI, TestCompactLangDetPlain(kTeststr_sw_Latn)); 620 EXPECT_EQ(SYRIAC, TestCompactLangDetPlain(kTeststr_syr_Syrc)); 621 622 EXPECT_EQ(TAMIL, TestCompactLangDetPlain(kTeststr_ta_Taml)); 623 EXPECT_EQ(TELUGU, TestCompactLangDetPlain(kTeststr_te_Telu)); 624 // Tajik Arab removed 2008.05.27. Just Tajik Cyrl left 625 // EXPECT_EQ(TAJIK, TestCompactLangDetPlain(kTeststr_tg_Arab)); 626 //// EXPECT_EQ(TAJIK, TestCompactLangDetPlain(kTeststr_tg_Cyrl)); 627 EXPECT_EQ(THAI, TestCompactLangDetPlain(kTeststr_th_Thai)); 628 //// EXPECT_EQ(TIGRINYA, TestCompactLangDetPlain(kTeststr_ti_Ethi)); 629 //// EXPECT_EQ(TURKMEN, TestCompactLangDetPlain(kTeststr_tk_Cyrl)); 630 //// EXPECT_EQ(TURKMEN, TestCompactLangDetPlain(kTeststr_tk_Latn)); 631 EXPECT_EQ(TAGALOG, TestCompactLangDetPlain(kTeststr_tl_Latn)); 632 //// EXPECT_EQ(TSWANA, TestCompactLangDetPlain(kTeststr_tn_Latn)); 633 //// EXPECT_EQ(TONGA, TestCompactLangDetPlain(kTeststr_to_Latn)); 634 EXPECT_EQ(TURKISH, TestCompactLangDetPlain(kTeststr_tr_Latn)); 635 //// EXPECT_EQ(TSONGA, TestCompactLangDetPlain(kTeststr_ts_Latn)); 636 //// EXPECT_EQ(TATAR, TestCompactLangDetPlain(kTeststr_tt_Cyrl)); 637 //// EXPECT_EQ(TATAR, TestCompactLangDetPlain(kTeststr_tt_Latn)); 638 //// EXPECT_EQ(TWI, TestCompactLangDetPlain(kTeststr_tw_Latn)); 639 640 //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Arab)); 641 //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Cyrl)); 642 //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Latn)); 643 EXPECT_EQ(UKRAINIAN, TestCompactLangDetPlain(kTeststr_uk_Cyrl)); 644 //// EXPECT_EQ(URDU, TestCompactLangDetPlain(kTeststr_ur_Arab)); 645 //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Arab)); 646 //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Cyrl)); 647 //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Latn)); 648 649 EXPECT_EQ(VIETNAMESE, TestCompactLangDetPlain(kTeststr_vi_Latn)); 650 //// EXPECT_EQ(VOLAPUK, TestCompactLangDetPlain(kTeststr_vo_Latn)); 651 652 //// EXPECT_EQ(WOLOF, TestCompactLangDetPlain(kTeststr_wo_Latn)); 653 654 //// EXPECT_EQ(XHOSA, TestCompactLangDetPlain(kTeststr_xh_Latn)); 655 656 EXPECT_EQ(YIDDISH, TestCompactLangDetPlain(kTeststr_yi_Hebr)); 657 //// EXPECT_EQ(YORUBA, TestCompactLangDetPlain(kTeststr_yo_Latn)); 658 659 // Zhuang Hani removed 2008.05.13. Just Zhuang Latn left 660 // EXPECT_EQ(ZHUANG, TestCompactLangDetPlain(kTeststr_za_Hani)); 661 //// EXPECT_EQ(ZHUANG, TestCompactLangDetPlain(kTeststr_za_Latn)); 662 EXPECT_EQ(CHINESE, TestCompactLangDetPlain(kTeststr_zh_Hani)); 663 EXPECT_EQ(CHINESE_T, TestCompactLangDetPlain(kTeststr_zh_TW)); 664 //// EXPECT_EQ(ZULU, TestCompactLangDetPlain(kTeststr_zu_Latn)); 665 // No TG_UNKNOWN_LANGUAGE 666 // No UNKNOWN_LANGUAGE 667 668 // This test should be executed with ASAN. 669 EXPECT_EQ(KOREAN, TestCompactLangDetPlain(kTeststr_kr_repetitions)); 670 } 671 672 673 TEST_F(CompactLangDetTest, ExtendedTests) { 674 // Do the extended languages, with them not-allowed then allowed 675 // These turn out to be extraordinarily sensitive forms of garbage bytes 676 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_tlh_Latn)); 677 //// EXPECT_EQ(X_KLINGON, TestExtCompactLangDetPlain(kTeststr_tlh_Latn)); 678 679 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzp_Latn)); 680 //// EXPECT_EQ(X_PIG_LATIN, TestExtCompactLangDetPlain(kTeststr_zzp_Latn)); 681 682 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_xx_Bugi)); 683 //// EXPECT_EQ(X_BUGINESE, TestExtCompactLangDetPlain(kTeststr_xx_Bugi)); 684 685 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_xx_Goth)); 686 //// EXPECT_EQ(X_GOTHIC, TestExtCompactLangDetPlain(kTeststr_xx_Goth)); 687 688 // Next three now removed permanently from probability tables (May 2008) 689 // (used to be X_BORK_BORK_BORK, X_ELMER_FUDD, X_HACKER). 690 // 691 // Small changes in probability tables may cause these non-texts to 692 // change detection result. If that happens, cross-check that 693 // the new result is not because of a bug, then change the expected values. 694 EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzb_Latn)); 695 EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zzb_Latn)); 696 697 EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zze_Latn)); 698 EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zze_Latn)); 699 700 //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzh_Latn)); 701 //// EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zzh_Latn)); 702 } 703 704 705 } // End namespace 706 707 #if !defined(CLD_WINDOWS) 708 int main(int argc, char** argv) { 709 FLAGS_logtostderr = true; 710 InitGoogle("Unit test for CLD small", &argc, &argv, false); 711 return RUN_ALL_TESTS(); 712 } 713 #endif 714