Home | History | Annotate | Download | only in world
      1 #! /usr/bin/env python
      2 
      3 """world -- Print mappings between country names and DNS country codes.
      4 
      5 Contact: Barry Warsaw
      6 Email:   barry (at] python.org
      7 Version: %(__version__)s
      8 
      9 This script will take a list of Internet addresses and print out where in the
     10 world those addresses originate from, based on the top-level domain country
     11 code found in the address.  Addresses can be in any of the following forms:
     12 
     13     xx                -- just the country code or top-level domain identifier
     14     host.domain.xx    -- any Internet host or network name
     15     somebody (at] where.xx -- an Internet email address
     16 
     17 If no match is found, the address is interpreted as a regular expression and a
     18 reverse lookup is attempted.  This script will search the country names and
     19 print a list of matching entries.  You can force reverse mappings with the
     20 `-r' flag (see below).
     21 
     22 For example:
     23 
     24     %% world tz us
     25     tz originated from Tanzania, United Republic of
     26     us originated from United States
     27 
     28     %% world united
     29     united matches 6 countries:
     30         ae: United Arab Emirates
     31         uk: United Kingdom (common practice)
     32         um: United States Minor Outlying Islands
     33         us: United States
     34         tz: Tanzania, United Republic of
     35         gb: United Kingdom
     36 
     37 Country codes are maintained by the RIPE Network Coordination Centre,
     38 in coordination with the ISO 3166 Maintenance Agency at DIN Berlin.  The
     39 authoritative source of country code mappings is:
     40 
     41     <url:ftp://ftp.ripe.net/iso3166-countrycodes.txt>
     42 
     43 The latest known change to this information was:
     44 
     45     Friday, 5 April 2002, 12.00 CET 2002
     46 
     47 This script also knows about non-geographic top-level domains, and the
     48 additional ccTLDs reserved by IANA.
     49 
     50 Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
     51 
     52     --dump
     53     -d
     54         Print mapping of all top-level domains.
     55 
     56     --parse file
     57     -p file
     58         Parse an iso3166-countrycodes file extracting the two letter country
     59         code followed by the country name.  Note that the three letter country
     60         codes and numbers, which are also provided in the standard format
     61         file, are ignored.
     62 
     63     --outputdict
     64     -o
     65         When used in conjunction with the `-p' option, output is in the form
     66         of a Python dictionary, and country names are normalized
     67         w.r.t. capitalization.  This makes it appropriate for cutting and
     68         pasting back into this file.  Output is always to standard out.
     69 
     70     --reverse
     71     -r
     72         Force reverse lookup.  In this mode the address can be any Python
     73         regular expression; this is matched against all country names and a
     74         list of matching mappings is printed.  In normal mode (e.g. without
     75         this flag), reverse lookup is performed on addresses if no matching
     76         country code is found.
     77 
     78     -h
     79     --help
     80         Print this message.
     81 """
     82 __version__ = '$Revision$'
     83 
     84 
     85 import sys
     86 import getopt
     87 import re
     88 
     89 PROGRAM = sys.argv[0]
     90 
     91 
     92 
     94 def usage(code, msg=''):
     95     print __doc__ % globals()
     96     if msg:
     97         print msg
     98     sys.exit(code)
     99 
    100 
    101 
    103 def resolve(rawaddr):
    104     parts = rawaddr.split('.')
    105     if not len(parts):
    106         # no top level domain found, bounce it to the next step
    107         return rawaddr
    108     addr = parts[-1]
    109     if nameorgs.has_key(addr):
    110         print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
    111         return None
    112     elif countries.has_key(addr):
    113         print rawaddr, 'originated from', countries[addr]
    114         return None
    115     else:
    116         # Not resolved, bounce it to the next step
    117         return rawaddr
    118 
    119 
    120 
    122 def reverse(regexp):
    123     matches = []
    124     cre = re.compile(regexp, re.IGNORECASE)
    125     for code, country in all.items():
    126         mo = cre.search(country)
    127         if mo:
    128             matches.append(code)
    129     # print results
    130     if not matches:
    131         # not resolved, bounce it to the next step
    132         return regexp
    133     if len(matches) == 1:
    134         code = matches[0]
    135         print regexp, "matches code `%s', %s" % (code, all[code])
    136     else:
    137         print regexp, 'matches %d countries:' % len(matches)
    138         for code in matches:
    139             print "    %s: %s" % (code, all[code])
    140     return None
    141 
    142 
    143 
    145 def parse(file, normalize):
    146     try:
    147         fp = open(file)
    148     except IOError, (err, msg):
    149         print msg, ':', file
    150 
    151     cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
    152     scanning = 0
    153 
    154     if normalize:
    155         print 'countries = {'
    156 
    157     while 1:
    158         line = fp.readline()
    159         if line == '':
    160             break                       # EOF
    161         if scanning:
    162             mo = cre.match(line)
    163             if not mo:
    164                 line = line.strip()
    165                 if not line:
    166                     continue
    167                 elif line[0] == '-':
    168                     break
    169                 else:
    170                     print 'Could not parse line:', line
    171                     continue
    172             country, code = mo.group(1, 2)
    173             if normalize:
    174                 words = country.split()
    175                 for i in range(len(words)):
    176                     w = words[i]
    177                     # XXX special cases
    178                     if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
    179                         words[i] = w.lower()
    180                     elif w == 'THE' and i <> 1:
    181                         words[i] = w.lower()
    182                     elif len(w) > 3 and w[1] == "'":
    183                         words[i] = w[0:3].upper() + w[3:].lower()
    184                     elif w in ('(U.S.)', 'U.S.'):
    185                         pass
    186                     elif w[0] == '(' and w <> '(local':
    187                         words[i] = '(' + w[1:].capitalize()
    188                     elif w.find('-') <> -1:
    189                         words[i] = '-'.join(
    190                             [s.capitalize() for s in w.split('-')])
    191                     else:
    192                         words[i] = w.capitalize()
    193                 code = code.lower()
    194                 country = ' '.join(words)
    195                 print '    "%s": "%s",' % (code, country)
    196             else:
    197                 print code, country
    198             
    199         elif line[0] == '-':
    200             scanning = 1
    201 
    202     if normalize:
    203         print '    }'
    204 
    205 
    207 def main():
    208     help = 0
    209     status = 0
    210     dump = 0
    211     parsefile = None
    212     normalize = 0
    213     forcerev = 0
    214 
    215     try:
    216         opts, args = getopt.getopt(
    217             sys.argv[1:],
    218             'p:rohd',
    219             ['parse=', 'reverse', 'outputdict', 'help', 'dump'])
    220     except getopt.error, msg:
    221         usage(1, msg)
    222 
    223     for opt, arg in opts:
    224         if opt in ('-h', '--help'):
    225             help = 1
    226         elif opt in ('-d', '--dump'):
    227             dump = 1
    228         elif opt in ('-p', '--parse'):
    229             parsefile = arg
    230         elif opt in ('-o', '--outputdict'):
    231             normalize = 1
    232         elif opt in ('-r', '--reverse'):
    233             forcerev = 1
    234 
    235     if help:
    236         usage(status)
    237 
    238     if dump:
    239         print 'Non-geographic domains:'
    240         codes = nameorgs.keys()
    241         codes.sort()
    242         for code in codes:
    243             print '    %4s:' % code, nameorgs[code]
    244 
    245         print '\nCountry coded domains:'
    246         codes = countries.keys()
    247         codes.sort()
    248         for code in codes:
    249             print '    %2s:' % code, countries[code]
    250     elif parsefile:
    251         parse(parsefile, normalize)
    252     else:
    253         if not forcerev:
    254             args = filter(None, map(resolve, args))
    255         args = filter(None, map(reverse, args))
    256         for arg in args:
    257             print 'Where in the world is %s?' % arg
    258 
    259 
    260 
    262 # The mappings
    263 nameorgs = {
    264     # New top level domains as described by ICANN
    265     # http://www.icann.org/tlds/
    266     "aero": "air-transport industry",
    267     "arpa": "Arpanet",
    268     "biz": "business",
    269     "com": "commercial",
    270     "coop": "cooperatives",
    271     "edu": "educational",
    272     "gov": "government",
    273     "info": "unrestricted `info'",
    274     "int": "international",
    275     "mil": "military",
    276     "museum": "museums",
    277     "name": "`name' (for registration by individuals)",
    278     "net": "networking",
    279     "org": "non-commercial",
    280     "pro": "professionals",
    281     # These additional ccTLDs are included here even though they are not part
    282     # of ISO 3166.  IANA has 5 reserved ccTLDs as described here:
    283     #
    284     # http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html
    285     #
    286     # but I can't find an official list anywhere.
    287     #
    288     # Note that `uk' is the common practice country code for the United
    289     # Kingdom.  AFAICT, the official `gb' code is routinely ignored!
    290     #
    291     # <D.M.Pick (at] qmw.ac.uk> tells me that `uk' was long in use before ISO3166
    292     # was adopted for top-level DNS zone names (although in the reverse order
    293     # like uk.ac.qmw) and was carried forward (with the reversal) to avoid a
    294     # large-scale renaming process as the UK switched from their old `Coloured
    295     # Book' protocols over X.25 to Internet protocols over IP.
    296     #
    297     # See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt>
    298     #
    299     # Also, `su', while obsolete is still in limited use.
    300     "ac": "Ascension Island",
    301     "gg": "Guernsey",
    302     "im": "Isle of Man",
    303     "je": "Jersey",
    304     "uk": "United Kingdom (common practice)",
    305     "su": "Soviet Union (still in limited use)",
    306     }
    307 
    308 
    309 
    311 countries = {
    312     "af": "Afghanistan",
    313     "al": "Albania",
    314     "dz": "Algeria",
    315     "as": "American Samoa",
    316     "ad": "Andorra",
    317     "ao": "Angola",
    318     "ai": "Anguilla",
    319     "aq": "Antarctica",
    320     "ag": "Antigua and Barbuda",
    321     "ar": "Argentina",
    322     "am": "Armenia",
    323     "aw": "Aruba",
    324     "au": "Australia",
    325     "at": "Austria",
    326     "az": "Azerbaijan",
    327     "bs": "Bahamas",
    328     "bh": "Bahrain",
    329     "bd": "Bangladesh",
    330     "bb": "Barbados",
    331     "by": "Belarus",
    332     "be": "Belgium",
    333     "bz": "Belize",
    334     "bj": "Benin",
    335     "bm": "Bermuda",
    336     "bt": "Bhutan",
    337     "bo": "Bolivia",
    338     "ba": "Bosnia and Herzegowina",
    339     "bw": "Botswana",
    340     "bv": "Bouvet Island",
    341     "br": "Brazil",
    342     "io": "British Indian Ocean Territory",
    343     "bn": "Brunei Darussalam",
    344     "bg": "Bulgaria",
    345     "bf": "Burkina Faso",
    346     "bi": "Burundi",
    347     "kh": "Cambodia",
    348     "cm": "Cameroon",
    349     "ca": "Canada",
    350     "cv": "Cape Verde",
    351     "ky": "Cayman Islands",
    352     "cf": "Central African Republic",
    353     "td": "Chad",
    354     "cl": "Chile",
    355     "cn": "China",
    356     "cx": "Christmas Island",
    357     "cc": "Cocos (Keeling) Islands",
    358     "co": "Colombia",
    359     "km": "Comoros",
    360     "cg": "Congo",
    361     "cd": "Congo, The Democratic Republic of the",
    362     "ck": "Cook Islands",
    363     "cr": "Costa Rica",
    364     "ci": "Cote D'Ivoire",
    365     "hr": "Croatia",
    366     "cu": "Cuba",
    367     "cy": "Cyprus",
    368     "cz": "Czech Republic",
    369     "dk": "Denmark",
    370     "dj": "Djibouti",
    371     "dm": "Dominica",
    372     "do": "Dominican Republic",
    373     "tp": "East Timor",
    374     "ec": "Ecuador",
    375     "eg": "Egypt",
    376     "sv": "El Salvador",
    377     "gq": "Equatorial Guinea",
    378     "er": "Eritrea",
    379     "ee": "Estonia",
    380     "et": "Ethiopia",
    381     "fk": "Falkland Islands (Malvinas)",
    382     "fo": "Faroe Islands",
    383     "fj": "Fiji",
    384     "fi": "Finland",
    385     "fr": "France",
    386     "gf": "French Guiana",
    387     "pf": "French Polynesia",
    388     "tf": "French Southern Territories",
    389     "ga": "Gabon",
    390     "gm": "Gambia",
    391     "ge": "Georgia",
    392     "de": "Germany",
    393     "gh": "Ghana",
    394     "gi": "Gibraltar",
    395     "gr": "Greece",
    396     "gl": "Greenland",
    397     "gd": "Grenada",
    398     "gp": "Guadeloupe",
    399     "gu": "Guam",
    400     "gt": "Guatemala",
    401     "gn": "Guinea",
    402     "gw": "Guinea-Bissau",
    403     "gy": "Guyana",
    404     "ht": "Haiti",
    405     "hm": "Heard Island and Mcdonald Islands",
    406     "va": "Holy See (Vatican City State)",
    407     "hn": "Honduras",
    408     "hk": "Hong Kong",
    409     "hu": "Hungary",
    410     "is": "Iceland",
    411     "in": "India",
    412     "id": "Indonesia",
    413     "ir": "Iran, Islamic Republic of",
    414     "iq": "Iraq",
    415     "ie": "Ireland",
    416     "il": "Israel",
    417     "it": "Italy",
    418     "jm": "Jamaica",
    419     "jp": "Japan",
    420     "jo": "Jordan",
    421     "kz": "Kazakstan",
    422     "ke": "Kenya",
    423     "ki": "Kiribati",
    424     "kp": "Korea, Democratic People's Republic of",
    425     "kr": "Korea, Republic of",
    426     "kw": "Kuwait",
    427     "kg": "Kyrgyzstan",
    428     "la": "Lao People's Democratic Republic",
    429     "lv": "Latvia",
    430     "lb": "Lebanon",
    431     "ls": "Lesotho",
    432     "lr": "Liberia",
    433     "ly": "Libyan Arab Jamahiriya",
    434     "li": "Liechtenstein",
    435     "lt": "Lithuania",
    436     "lu": "Luxembourg",
    437     "mo": "Macau",
    438     "mk": "Macedonia, The Former Yugoslav Republic of",
    439     "mg": "Madagascar",
    440     "mw": "Malawi",
    441     "my": "Malaysia",
    442     "mv": "Maldives",
    443     "ml": "Mali",
    444     "mt": "Malta",
    445     "mh": "Marshall Islands",
    446     "mq": "Martinique",
    447     "mr": "Mauritania",
    448     "mu": "Mauritius",
    449     "yt": "Mayotte",
    450     "mx": "Mexico",
    451     "fm": "Micronesia, Federated States of",
    452     "md": "Moldova, Republic of",
    453     "mc": "Monaco",
    454     "mn": "Mongolia",
    455     "ms": "Montserrat",
    456     "ma": "Morocco",
    457     "mz": "Mozambique",
    458     "mm": "Myanmar",
    459     "na": "Namibia",
    460     "nr": "Nauru",
    461     "np": "Nepal",
    462     "nl": "Netherlands",
    463     "an": "Netherlands Antilles",
    464     "nc": "New Caledonia",
    465     "nz": "New Zealand",
    466     "ni": "Nicaragua",
    467     "ne": "Niger",
    468     "ng": "Nigeria",
    469     "nu": "Niue",
    470     "nf": "Norfolk Island",
    471     "mp": "Northern Mariana Islands",
    472     "no": "Norway",
    473     "om": "Oman",
    474     "pk": "Pakistan",
    475     "pw": "Palau",
    476     "ps": "Palestinian Territory, Occupied",
    477     "pa": "Panama",
    478     "pg": "Papua New Guinea",
    479     "py": "Paraguay",
    480     "pe": "Peru",
    481     "ph": "Philippines",
    482     "pn": "Pitcairn",
    483     "pl": "Poland",
    484     "pt": "Portugal",
    485     "pr": "Puerto Rico",
    486     "qa": "Qatar",
    487     "re": "Reunion",
    488     "ro": "Romania",
    489     "ru": "Russian Federation",
    490     "rw": "Rwanda",
    491     "sh": "Saint Helena",
    492     "kn": "Saint Kitts and Nevis",
    493     "lc": "Saint Lucia",
    494     "pm": "Saint Pierre and Miquelon",
    495     "vc": "Saint Vincent and the Grenadines",
    496     "ws": "Samoa",
    497     "sm": "San Marino",
    498     "st": "Sao Tome and Principe",
    499     "sa": "Saudi Arabia",
    500     "sn": "Senegal",
    501     "sc": "Seychelles",
    502     "sl": "Sierra Leone",
    503     "sg": "Singapore",
    504     "sk": "Slovakia",
    505     "si": "Slovenia",
    506     "sb": "Solomon Islands",
    507     "so": "Somalia",
    508     "za": "South Africa",
    509     "gs": "South Georgia and the South Sandwich Islands",
    510     "es": "Spain",
    511     "lk": "Sri Lanka",
    512     "sd": "Sudan",
    513     "sr": "Suriname",
    514     "sj": "Svalbard and Jan Mayen",
    515     "sz": "Swaziland",
    516     "se": "Sweden",
    517     "ch": "Switzerland",
    518     "sy": "Syrian Arab Republic",
    519     "tw": "Taiwan, Province of China",
    520     "tj": "Tajikistan",
    521     "tz": "Tanzania, United Republic of",
    522     "th": "Thailand",
    523     "tg": "Togo",
    524     "tk": "Tokelau",
    525     "to": "Tonga",
    526     "tt": "Trinidad and Tobago",
    527     "tn": "Tunisia",
    528     "tr": "Turkey",
    529     "tm": "Turkmenistan",
    530     "tc": "Turks and Caicos Islands",
    531     "tv": "Tuvalu",
    532     "ug": "Uganda",
    533     "ua": "Ukraine",
    534     "ae": "United Arab Emirates",
    535     "gb": "United Kingdom",
    536     "us": "United States",
    537     "um": "United States Minor Outlying Islands",
    538     "uy": "Uruguay",
    539     "uz": "Uzbekistan",
    540     "vu": "Vanuatu",
    541     "ve": "Venezuela",
    542     "vn": "Viet Nam",
    543     "vg": "Virgin Islands, British",
    544     "vi": "Virgin Islands, U.S.",
    545     "wf": "Wallis and Futuna",
    546     "eh": "Western Sahara",
    547     "ye": "Yemen",
    548     "yu": "Yugoslavia",
    549     "zm": "Zambia",
    550     "zw": "Zimbabwe",
    551     }
    552 
    553 all = nameorgs.copy()
    554 all.update(countries)
    555 
    556 
    558 if __name__ == '__main__':
    559     main()
    560