1 #! /usr/bin/env python 2 3 """world -- Print mappings between country names and DNS country codes. 4 5 Contact: Barry Warsaw 6 Email: barry (at] python.org 7 Version: %(__version__)s 8 9 This script will take a list of Internet addresses and print out where in the 10 world those addresses originate from, based on the top-level domain country 11 code found in the address. Addresses can be in any of the following forms: 12 13 xx -- just the country code or top-level domain identifier 14 host.domain.xx -- any Internet host or network name 15 somebody (at] where.xx -- an Internet email address 16 17 If no match is found, the address is interpreted as a regular expression and a 18 reverse lookup is attempted. This script will search the country names and 19 print a list of matching entries. You can force reverse mappings with the 20 `-r' flag (see below). 21 22 For example: 23 24 %% world tz us 25 tz originated from Tanzania, United Republic of 26 us originated from United States 27 28 %% world united 29 united matches 6 countries: 30 ae: United Arab Emirates 31 uk: United Kingdom (common practice) 32 um: United States Minor Outlying Islands 33 us: United States 34 tz: Tanzania, United Republic of 35 gb: United Kingdom 36 37 Country codes are maintained by the RIPE Network Coordination Centre, 38 in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The 39 authoritative source of country code mappings is: 40 41 <url:ftp://ftp.ripe.net/iso3166-countrycodes.txt> 42 43 The latest known change to this information was: 44 45 Friday, 5 April 2002, 12.00 CET 2002 46 47 This script also knows about non-geographic top-level domains, and the 48 additional ccTLDs reserved by IANA. 49 50 Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...] 51 52 --dump 53 -d 54 Print mapping of all top-level domains. 55 56 --parse file 57 -p file 58 Parse an iso3166-countrycodes file extracting the two letter country 59 code followed by the country name. Note that the three letter country 60 codes and numbers, which are also provided in the standard format 61 file, are ignored. 62 63 --outputdict 64 -o 65 When used in conjunction with the `-p' option, output is in the form 66 of a Python dictionary, and country names are normalized 67 w.r.t. capitalization. This makes it appropriate for cutting and 68 pasting back into this file. Output is always to standard out. 69 70 --reverse 71 -r 72 Force reverse lookup. In this mode the address can be any Python 73 regular expression; this is matched against all country names and a 74 list of matching mappings is printed. In normal mode (e.g. without 75 this flag), reverse lookup is performed on addresses if no matching 76 country code is found. 77 78 -h 79 --help 80 Print this message. 81 """ 82 __version__ = '$Revision$' 83 84 85 import sys 86 import getopt 87 import re 88 89 PROGRAM = sys.argv[0] 90 91 92 94 def usage(code, msg=''): 95 print __doc__ % globals() 96 if msg: 97 print msg 98 sys.exit(code) 99 100 101 103 def resolve(rawaddr): 104 parts = rawaddr.split('.') 105 if not len(parts): 106 # no top level domain found, bounce it to the next step 107 return rawaddr 108 addr = parts[-1] 109 if nameorgs.has_key(addr): 110 print rawaddr, 'is in the', nameorgs[addr], 'top level domain' 111 return None 112 elif countries.has_key(addr): 113 print rawaddr, 'originated from', countries[addr] 114 return None 115 else: 116 # Not resolved, bounce it to the next step 117 return rawaddr 118 119 120 122 def reverse(regexp): 123 matches = [] 124 cre = re.compile(regexp, re.IGNORECASE) 125 for code, country in all.items(): 126 mo = cre.search(country) 127 if mo: 128 matches.append(code) 129 # print results 130 if not matches: 131 # not resolved, bounce it to the next step 132 return regexp 133 if len(matches) == 1: 134 code = matches[0] 135 print regexp, "matches code `%s', %s" % (code, all[code]) 136 else: 137 print regexp, 'matches %d countries:' % len(matches) 138 for code in matches: 139 print " %s: %s" % (code, all[code]) 140 return None 141 142 143 145 def parse(file, normalize): 146 try: 147 fp = open(file) 148 except IOError, (err, msg): 149 print msg, ':', file 150 151 cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}') 152 scanning = 0 153 154 if normalize: 155 print 'countries = {' 156 157 while 1: 158 line = fp.readline() 159 if line == '': 160 break # EOF 161 if scanning: 162 mo = cre.match(line) 163 if not mo: 164 line = line.strip() 165 if not line: 166 continue 167 elif line[0] == '-': 168 break 169 else: 170 print 'Could not parse line:', line 171 continue 172 country, code = mo.group(1, 2) 173 if normalize: 174 words = country.split() 175 for i in range(len(words)): 176 w = words[i] 177 # XXX special cases 178 if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'): 179 words[i] = w.lower() 180 elif w == 'THE' and i <> 1: 181 words[i] = w.lower() 182 elif len(w) > 3 and w[1] == "'": 183 words[i] = w[0:3].upper() + w[3:].lower() 184 elif w in ('(U.S.)', 'U.S.'): 185 pass 186 elif w[0] == '(' and w <> '(local': 187 words[i] = '(' + w[1:].capitalize() 188 elif w.find('-') <> -1: 189 words[i] = '-'.join( 190 [s.capitalize() for s in w.split('-')]) 191 else: 192 words[i] = w.capitalize() 193 code = code.lower() 194 country = ' '.join(words) 195 print ' "%s": "%s",' % (code, country) 196 else: 197 print code, country 198 199 elif line[0] == '-': 200 scanning = 1 201 202 if normalize: 203 print ' }' 204 205 207 def main(): 208 help = 0 209 status = 0 210 dump = 0 211 parsefile = None 212 normalize = 0 213 forcerev = 0 214 215 try: 216 opts, args = getopt.getopt( 217 sys.argv[1:], 218 'p:rohd', 219 ['parse=', 'reverse', 'outputdict', 'help', 'dump']) 220 except getopt.error, msg: 221 usage(1, msg) 222 223 for opt, arg in opts: 224 if opt in ('-h', '--help'): 225 help = 1 226 elif opt in ('-d', '--dump'): 227 dump = 1 228 elif opt in ('-p', '--parse'): 229 parsefile = arg 230 elif opt in ('-o', '--outputdict'): 231 normalize = 1 232 elif opt in ('-r', '--reverse'): 233 forcerev = 1 234 235 if help: 236 usage(status) 237 238 if dump: 239 print 'Non-geographic domains:' 240 codes = nameorgs.keys() 241 codes.sort() 242 for code in codes: 243 print ' %4s:' % code, nameorgs[code] 244 245 print '\nCountry coded domains:' 246 codes = countries.keys() 247 codes.sort() 248 for code in codes: 249 print ' %2s:' % code, countries[code] 250 elif parsefile: 251 parse(parsefile, normalize) 252 else: 253 if not forcerev: 254 args = filter(None, map(resolve, args)) 255 args = filter(None, map(reverse, args)) 256 for arg in args: 257 print 'Where in the world is %s?' % arg 258 259 260 262 # The mappings 263 nameorgs = { 264 # New top level domains as described by ICANN 265 # http://www.icann.org/tlds/ 266 "aero": "air-transport industry", 267 "arpa": "Arpanet", 268 "biz": "business", 269 "com": "commercial", 270 "coop": "cooperatives", 271 "edu": "educational", 272 "gov": "government", 273 "info": "unrestricted `info'", 274 "int": "international", 275 "mil": "military", 276 "museum": "museums", 277 "name": "`name' (for registration by individuals)", 278 "net": "networking", 279 "org": "non-commercial", 280 "pro": "professionals", 281 # These additional ccTLDs are included here even though they are not part 282 # of ISO 3166. IANA has 5 reserved ccTLDs as described here: 283 # 284 # http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html 285 # 286 # but I can't find an official list anywhere. 287 # 288 # Note that `uk' is the common practice country code for the United 289 # Kingdom. AFAICT, the official `gb' code is routinely ignored! 290 # 291 # <D.M.Pick (at] qmw.ac.uk> tells me that `uk' was long in use before ISO3166 292 # was adopted for top-level DNS zone names (although in the reverse order 293 # like uk.ac.qmw) and was carried forward (with the reversal) to avoid a 294 # large-scale renaming process as the UK switched from their old `Coloured 295 # Book' protocols over X.25 to Internet protocols over IP. 296 # 297 # See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt> 298 # 299 # Also, `su', while obsolete is still in limited use. 300 "ac": "Ascension Island", 301 "gg": "Guernsey", 302 "im": "Isle of Man", 303 "je": "Jersey", 304 "uk": "United Kingdom (common practice)", 305 "su": "Soviet Union (still in limited use)", 306 } 307 308 309 311 countries = { 312 "af": "Afghanistan", 313 "al": "Albania", 314 "dz": "Algeria", 315 "as": "American Samoa", 316 "ad": "Andorra", 317 "ao": "Angola", 318 "ai": "Anguilla", 319 "aq": "Antarctica", 320 "ag": "Antigua and Barbuda", 321 "ar": "Argentina", 322 "am": "Armenia", 323 "aw": "Aruba", 324 "au": "Australia", 325 "at": "Austria", 326 "az": "Azerbaijan", 327 "bs": "Bahamas", 328 "bh": "Bahrain", 329 "bd": "Bangladesh", 330 "bb": "Barbados", 331 "by": "Belarus", 332 "be": "Belgium", 333 "bz": "Belize", 334 "bj": "Benin", 335 "bm": "Bermuda", 336 "bt": "Bhutan", 337 "bo": "Bolivia", 338 "ba": "Bosnia and Herzegowina", 339 "bw": "Botswana", 340 "bv": "Bouvet Island", 341 "br": "Brazil", 342 "io": "British Indian Ocean Territory", 343 "bn": "Brunei Darussalam", 344 "bg": "Bulgaria", 345 "bf": "Burkina Faso", 346 "bi": "Burundi", 347 "kh": "Cambodia", 348 "cm": "Cameroon", 349 "ca": "Canada", 350 "cv": "Cape Verde", 351 "ky": "Cayman Islands", 352 "cf": "Central African Republic", 353 "td": "Chad", 354 "cl": "Chile", 355 "cn": "China", 356 "cx": "Christmas Island", 357 "cc": "Cocos (Keeling) Islands", 358 "co": "Colombia", 359 "km": "Comoros", 360 "cg": "Congo", 361 "cd": "Congo, The Democratic Republic of the", 362 "ck": "Cook Islands", 363 "cr": "Costa Rica", 364 "ci": "Cote D'Ivoire", 365 "hr": "Croatia", 366 "cu": "Cuba", 367 "cy": "Cyprus", 368 "cz": "Czech Republic", 369 "dk": "Denmark", 370 "dj": "Djibouti", 371 "dm": "Dominica", 372 "do": "Dominican Republic", 373 "tp": "East Timor", 374 "ec": "Ecuador", 375 "eg": "Egypt", 376 "sv": "El Salvador", 377 "gq": "Equatorial Guinea", 378 "er": "Eritrea", 379 "ee": "Estonia", 380 "et": "Ethiopia", 381 "fk": "Falkland Islands (Malvinas)", 382 "fo": "Faroe Islands", 383 "fj": "Fiji", 384 "fi": "Finland", 385 "fr": "France", 386 "gf": "French Guiana", 387 "pf": "French Polynesia", 388 "tf": "French Southern Territories", 389 "ga": "Gabon", 390 "gm": "Gambia", 391 "ge": "Georgia", 392 "de": "Germany", 393 "gh": "Ghana", 394 "gi": "Gibraltar", 395 "gr": "Greece", 396 "gl": "Greenland", 397 "gd": "Grenada", 398 "gp": "Guadeloupe", 399 "gu": "Guam", 400 "gt": "Guatemala", 401 "gn": "Guinea", 402 "gw": "Guinea-Bissau", 403 "gy": "Guyana", 404 "ht": "Haiti", 405 "hm": "Heard Island and Mcdonald Islands", 406 "va": "Holy See (Vatican City State)", 407 "hn": "Honduras", 408 "hk": "Hong Kong", 409 "hu": "Hungary", 410 "is": "Iceland", 411 "in": "India", 412 "id": "Indonesia", 413 "ir": "Iran, Islamic Republic of", 414 "iq": "Iraq", 415 "ie": "Ireland", 416 "il": "Israel", 417 "it": "Italy", 418 "jm": "Jamaica", 419 "jp": "Japan", 420 "jo": "Jordan", 421 "kz": "Kazakstan", 422 "ke": "Kenya", 423 "ki": "Kiribati", 424 "kp": "Korea, Democratic People's Republic of", 425 "kr": "Korea, Republic of", 426 "kw": "Kuwait", 427 "kg": "Kyrgyzstan", 428 "la": "Lao People's Democratic Republic", 429 "lv": "Latvia", 430 "lb": "Lebanon", 431 "ls": "Lesotho", 432 "lr": "Liberia", 433 "ly": "Libyan Arab Jamahiriya", 434 "li": "Liechtenstein", 435 "lt": "Lithuania", 436 "lu": "Luxembourg", 437 "mo": "Macau", 438 "mk": "Macedonia, The Former Yugoslav Republic of", 439 "mg": "Madagascar", 440 "mw": "Malawi", 441 "my": "Malaysia", 442 "mv": "Maldives", 443 "ml": "Mali", 444 "mt": "Malta", 445 "mh": "Marshall Islands", 446 "mq": "Martinique", 447 "mr": "Mauritania", 448 "mu": "Mauritius", 449 "yt": "Mayotte", 450 "mx": "Mexico", 451 "fm": "Micronesia, Federated States of", 452 "md": "Moldova, Republic of", 453 "mc": "Monaco", 454 "mn": "Mongolia", 455 "ms": "Montserrat", 456 "ma": "Morocco", 457 "mz": "Mozambique", 458 "mm": "Myanmar", 459 "na": "Namibia", 460 "nr": "Nauru", 461 "np": "Nepal", 462 "nl": "Netherlands", 463 "an": "Netherlands Antilles", 464 "nc": "New Caledonia", 465 "nz": "New Zealand", 466 "ni": "Nicaragua", 467 "ne": "Niger", 468 "ng": "Nigeria", 469 "nu": "Niue", 470 "nf": "Norfolk Island", 471 "mp": "Northern Mariana Islands", 472 "no": "Norway", 473 "om": "Oman", 474 "pk": "Pakistan", 475 "pw": "Palau", 476 "ps": "Palestinian Territory, Occupied", 477 "pa": "Panama", 478 "pg": "Papua New Guinea", 479 "py": "Paraguay", 480 "pe": "Peru", 481 "ph": "Philippines", 482 "pn": "Pitcairn", 483 "pl": "Poland", 484 "pt": "Portugal", 485 "pr": "Puerto Rico", 486 "qa": "Qatar", 487 "re": "Reunion", 488 "ro": "Romania", 489 "ru": "Russian Federation", 490 "rw": "Rwanda", 491 "sh": "Saint Helena", 492 "kn": "Saint Kitts and Nevis", 493 "lc": "Saint Lucia", 494 "pm": "Saint Pierre and Miquelon", 495 "vc": "Saint Vincent and the Grenadines", 496 "ws": "Samoa", 497 "sm": "San Marino", 498 "st": "Sao Tome and Principe", 499 "sa": "Saudi Arabia", 500 "sn": "Senegal", 501 "sc": "Seychelles", 502 "sl": "Sierra Leone", 503 "sg": "Singapore", 504 "sk": "Slovakia", 505 "si": "Slovenia", 506 "sb": "Solomon Islands", 507 "so": "Somalia", 508 "za": "South Africa", 509 "gs": "South Georgia and the South Sandwich Islands", 510 "es": "Spain", 511 "lk": "Sri Lanka", 512 "sd": "Sudan", 513 "sr": "Suriname", 514 "sj": "Svalbard and Jan Mayen", 515 "sz": "Swaziland", 516 "se": "Sweden", 517 "ch": "Switzerland", 518 "sy": "Syrian Arab Republic", 519 "tw": "Taiwan, Province of China", 520 "tj": "Tajikistan", 521 "tz": "Tanzania, United Republic of", 522 "th": "Thailand", 523 "tg": "Togo", 524 "tk": "Tokelau", 525 "to": "Tonga", 526 "tt": "Trinidad and Tobago", 527 "tn": "Tunisia", 528 "tr": "Turkey", 529 "tm": "Turkmenistan", 530 "tc": "Turks and Caicos Islands", 531 "tv": "Tuvalu", 532 "ug": "Uganda", 533 "ua": "Ukraine", 534 "ae": "United Arab Emirates", 535 "gb": "United Kingdom", 536 "us": "United States", 537 "um": "United States Minor Outlying Islands", 538 "uy": "Uruguay", 539 "uz": "Uzbekistan", 540 "vu": "Vanuatu", 541 "ve": "Venezuela", 542 "vn": "Viet Nam", 543 "vg": "Virgin Islands, British", 544 "vi": "Virgin Islands, U.S.", 545 "wf": "Wallis and Futuna", 546 "eh": "Western Sahara", 547 "ye": "Yemen", 548 "yu": "Yugoslavia", 549 "zm": "Zambia", 550 "zw": "Zimbabwe", 551 } 552 553 all = nameorgs.copy() 554 all.update(countries) 555 556 558 if __name__ == '__main__': 559 main() 560