From 97cf048fe2074595a940e75566204f339c21538c Mon Sep 17 00:00:00 2001 From: secynic Date: Thu, 12 Sep 2013 15:31:51 -0500 Subject: [PATCH 1/5] set_proxy() and RWS skeleton --- CHANGES.rst | 6 +++ README.rst | 18 ++++++--- ipwhois/__init__.py | 98 +++++++++++++++++++++++++++++++++++++-------- 3 files changed, 99 insertions(+), 23 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 2a9d487..0384b68 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changelog ========= +0.1.5 () +------------------ + +- Added set_proxy() function for proxy support in Whois-RWS queries. +- Added IPWhois.lookup_rws() function for Whois-RWS queries. + 0.1.4 (2013-09-12) ------------------ diff --git a/README.rst b/README.rst index bde5e6b..221946b 100644 --- a/README.rst +++ b/README.rst @@ -44,6 +44,17 @@ Countries:: >>>> print(countries[results['nets'][0]['country']]) United States + +Installing +========== + +Latest version from PyPi:: + + pip install ipwhois + +Latest version from GitHub:: + + pip install -e git+https://github.com/secynic/ipwhois@master#egg=ipwhois Parsing ======= @@ -52,9 +63,4 @@ Parsing is currently limited to CIDR, country, description, name, and state fiel Some IPs have parent networks listed. The parser attempts to recognize this, and break the networks into individual dictionaries. If a single network has multiple CIDRs, they will be separated by ', '. -Sometimes, you will see whois information with multiple consecutive same name fields, e.g., Description: some text\\nDescription: more text. The parser will recognize this and the returned result will have these separated by '\\n'. - -Future Plans -============ - -IPWhois.httplookup() - Allow parsing of Whois data via RWS feeds from the various NICs. This feature is useful when outbound port 43 is not available. Only ARIN and RIPE have Whois-RWS services at the time of this writing. Although RIPE does have a feature that integrates the other NICs, they are limited on the types of information that is allowed to be displayed. Additionally, as part of this feature, proxy support will also be added. \ No newline at end of file +Sometimes, you will see whois information with multiple consecutive same name fields, e.g., Description: some text\\nDescription: more text. The parser will recognize this and the returned result will have these separated by '\\n'. \ No newline at end of file diff --git a/ipwhois/__init__.py b/ipwhois/__init__.py index caaacfa..8e08916 100644 --- a/ipwhois/__init__.py +++ b/ipwhois/__init__.py @@ -21,11 +21,12 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = '0.1.4' +__version__ = '0.1.5' -import ipaddress, socket, urllib.request, dns.resolver, re +import ipaddress, socket, dns.resolver, re from xml.dom.minidom import parseString from os import path +from urllib import request IETF_RFC_REFERENCES = { #IPv4 @@ -47,56 +48,58 @@ "RFC 4291, Section 2.5.7": "http://tools.ietf.org/html/rfc4291#section-2.5.7", "RFC 4193": "https://tools.ietf.org/html/rfc4193" } - -NIC_URLS = { - "arin": "http://whois.arin.net/rest/nets;q={0}?showDetails=true&showARIN=true", - "ripencc": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=ripe-grs", - "apnic": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=apnic-grs", - "lacnic": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=lacnic-grs", - "afrinic": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=afrinic-grs" - } NIC_WHOIS = { "arin": { "server": "whois.arin.net", + "url": "http://whois.arin.net/rest/nets;q={0}?showDetails=true&showARIN=true", "fields": { "name": "^(NetName):[^\S\n]+(.+)$", "description": "^(OrgName|CustName):[^\S\n]+(.+)$", "country": "^(Country):[^\S\n]+(.+)$", "state": "^(StateProv):[^\S\n]+(.+)$", "city": "^(City):[^\S\n]+(.+)$" - } + }, + "fields_rws": {} }, "ripencc": { "server": "whois.ripe.net", + "url": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=ripe-grs", "fields": { "name": "^(netname):[^\S\n]+(.+)$", "description": "^(descr):[^\S\n]+(.+)$", "country": "^(country):[^\S\n]+(.+)$" - } + }, + "fields_rws": {} }, "apnic": { "server": "whois.apnic.net", + "url": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=apnic-grs", "fields": { "name": "^(netname):[^\S\n]+(.+)$", "description": "^(descr):[^\S\n]+(.+)$", "country": "^(country):[^\S\n]+(.+)$" - } + }, + "fields_rws": {} }, "lacnic": { "server": "whois.lacnic.net", + "url": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=lacnic-grs", "fields": { "description": "^(owner):[^\S\n]+(.+)$", "country": "^(country):[^\S\n]+(.+)$" - } + }, + "fields_rws": {} }, "afrinic": { "server": "whois.afrinic.net", + "url": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=afrinic-grs", "fields": { "name": "^(netname):[^\S\n]+(.+)$", "description": "^(descr):[^\S\n]+(.+)$", "country": "^(country):[^\S\n]+(.+)$" - } + }, + "fields_rws": {} } } @@ -105,7 +108,47 @@ IPV4_DNS_ZONE = "{0}.origin.asn.cymru.com" IPV6_DNS_ZONE = "{0}.origin6.asn.cymru.com" - + +def set_proxy(host = None, port = "80", username = None, password = None): + """ + The function to set proxy settings for urllib.request.urlopen(). + + Args: + host: The proxy address. + port: The proxy port. + username: The username to authenticate against the proxy. + password: The password to authenticate against the proxy. + """ + + #Define the host URL from the schema, host, and port. + url = "http://" + host + ":" + port + "/" + + #Create the proxy handler. + handler = request.ProxyHandler({'http': url}) + + #If the proxy user and password are defined. + if username is not None and password is not None: + + #Create the proxy authentication handler. + auth_handler = request.ProxyBasicAuthHandler() + + #Add the user and password to the proxy authentication handler. + auth_handler.add_password(None, url, username, password) + + #If the proxy authentication handler is defined. + if auth_handler is not None: + + #Create the proxy opener with the authentication handler. + opener = request.build_opener(handler, auth_handler) + + else: + + #Create the proxy opener excluding an authentication handler. + opener = request.build_opener(handler) + + #Install the proxy opener. + request.install_opener(opener) + def get_countries(): """ The function to generate a dictionary containing ISO_3166-1 country codes to names. @@ -732,4 +775,25 @@ def lookup(self, inc_raw = False): #Add the networks to the return dictionary. results["nets"] = nets - return results \ No newline at end of file + return results + + def lookup_rws(self, inc_raw = False): + """ + The function for retrieving and parsing whois information for an IP address via HTTP (Whois-RWS). + + Args: + inc_raw: Boolean for whether to include the raw whois results in the returned dictionary. + + Returns: + Dictionary: A dictionary containing the following keys: + query (String) - The IP address. + asn (String) - The Autonomous System Number. + asn_date (String) - The ASN Allocation date. + asn_registry (String) - The assigned ASN registry. + asn_cidr (String) - The assigned ASN CIDR. + asn_country_code (String) - The assigned ASN country code. + nets (List) - Dictionaries containing network information which consists of the RWS fields + listed in the NIC_WHOIS dictionary. Certain IPs have more granular network listings, + hence the need for a list object. + raw (String) - Raw whois results if the inc_raw parameter is True. + """ \ No newline at end of file From d9111a1bb43eabe7588731dca7a130de9e8141dd Mon Sep 17 00:00:00 2001 From: secynic Date: Thu, 12 Sep 2013 15:34:19 -0500 Subject: [PATCH 2/5] Comments --- ipwhois/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ipwhois/__init__.py b/ipwhois/__init__.py index 8e08916..83de057 100644 --- a/ipwhois/__init__.py +++ b/ipwhois/__init__.py @@ -120,13 +120,13 @@ def set_proxy(host = None, port = "80", username = None, password = None): password: The password to authenticate against the proxy. """ - #Define the host URL from the schema, host, and port. + #Define the host URL from the host and port. url = "http://" + host + ":" + port + "/" #Create the proxy handler. handler = request.ProxyHandler({'http': url}) - #If the proxy user and password are defined. + #If the proxy username and password are defined. if username is not None and password is not None: #Create the proxy authentication handler. From 928ab424e8b9741b5c81ee7a4bc9bf88dca26a75 Mon Sep 17 00:00:00 2001 From: secynic Date: Fri, 13 Sep 2013 14:06:24 -0500 Subject: [PATCH 3/5] REST support --- CHANGES.rst | 2 +- README.rst | 43 ++++++- ipwhois/__init__.py | 285 +++++++++++++++++++++++++++++++++++++++++--- setup.py | 8 ++ 4 files changed, 318 insertions(+), 20 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0384b68..a47b91d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ Changelog ========= -0.1.5 () +0.1.5 (2013-09-13) ------------------ - Added set_proxy() function for proxy support in Whois-RWS queries. diff --git a/README.rst b/README.rst index 221946b..8d550f8 100644 --- a/README.rst +++ b/README.rst @@ -34,6 +34,42 @@ Typical usage:: 'raw': None } +REST (HTTP):: + + >>>> import ipwhois + >>>> from pprint import pprint + + >>>> obj = ipwhois.IPWhois("74.125.225.229") + >>>> results = obj.lookup_rws(False) + >>>> pprint(results) + + { + 'asn': '15169', + 'asn_cidr': '74.125.225.0/24', + 'asn_country_code': 'US', + 'asn_date': '2007-03-13', + 'asn_registry': 'arin', + 'nets': [{'cidr': '74.0.0.0/8', + 'city': 'Chantilly', + 'country': 'US', + 'description': 'American Registry for Internet Numbers', + 'name': 'NET74', + 'state': 'VA'}, + {'cidr': '74.125.0.0/16', + 'city': 'Mountain View', + 'country': 'US', + 'description': 'Google Inc.', + 'name': 'GOOGLE', + 'state': 'CA'}], + 'query': '74.125.225.229', + 'raw': None + } + +Proxy (Optional before ipwhois.IPWhois.lookup_rws()):: + + >>>> import ipwhois + >>>> ipwhois.set_proxy("192.168.0.1", "80", "some_username", "some_password") + Countries:: >>>> import ipwhois @@ -63,4 +99,9 @@ Parsing is currently limited to CIDR, country, description, name, and state fiel Some IPs have parent networks listed. The parser attempts to recognize this, and break the networks into individual dictionaries. If a single network has multiple CIDRs, they will be separated by ', '. -Sometimes, you will see whois information with multiple consecutive same name fields, e.g., Description: some text\\nDescription: more text. The parser will recognize this and the returned result will have these separated by '\\n'. \ No newline at end of file +Sometimes, you will see whois information with multiple consecutive same name fields, e.g., Description: some text\\nDescription: more text. The parser will recognize this and the returned result will have these separated by '\\n'. + +REST (HTTP) +=========== + +IPWhois.lookup_rws() should be faster than IPWhois.lookup(), but may not be as reliable. APNIC, LACNIC, and AFRINIC do not have a Whois-RWS service yet. We have to rely on the Ripe RWS service, which does not contain all of the data we need. \ No newline at end of file diff --git a/ipwhois/__init__.py b/ipwhois/__init__.py index 83de057..4b70c2b 100644 --- a/ipwhois/__init__.py +++ b/ipwhois/__init__.py @@ -23,7 +23,7 @@ __version__ = '0.1.5' -import ipaddress, socket, dns.resolver, re +import ipaddress, socket, dns.resolver, re, json from xml.dom.minidom import parseString from os import path from urllib import request @@ -59,47 +59,42 @@ "country": "^(Country):[^\S\n]+(.+)$", "state": "^(StateProv):[^\S\n]+(.+)$", "city": "^(City):[^\S\n]+(.+)$" - }, - "fields_rws": {} + } }, "ripencc": { "server": "whois.ripe.net", - "url": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=ripe-grs", + "url": "http://apps.db.ripe.net/whois/grs-search?query-string={0}&source=ripe-grs", "fields": { "name": "^(netname):[^\S\n]+(.+)$", "description": "^(descr):[^\S\n]+(.+)$", "country": "^(country):[^\S\n]+(.+)$" - }, - "fields_rws": {} + } }, "apnic": { "server": "whois.apnic.net", - "url": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=apnic-grs", + "url": "http://apps.db.ripe.net/whois/grs-search?query-string={0}&source=apnic-grs", "fields": { "name": "^(netname):[^\S\n]+(.+)$", "description": "^(descr):[^\S\n]+(.+)$", "country": "^(country):[^\S\n]+(.+)$" - }, - "fields_rws": {} + } }, "lacnic": { "server": "whois.lacnic.net", - "url": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=lacnic-grs", + "url": "http://apps.db.ripe.net/whois/grs-search?query-string={0}&source=lacnic-grs", "fields": { "description": "^(owner):[^\S\n]+(.+)$", "country": "^(country):[^\S\n]+(.+)$" - }, - "fields_rws": {} + } }, "afrinic": { "server": "whois.afrinic.net", - "url": "http://apps.db.ripe.net/whois/grs-search.xml?query-string={0}&source=afrinic-grs", + "url": "http://apps.db.ripe.net/whois/grs-search?query-string={0}&source=afrinic-grs", "fields": { "name": "^(netname):[^\S\n]+(.+)$", "description": "^(descr):[^\S\n]+(.+)$", "country": "^(country):[^\S\n]+(.+)$" - }, - "fields_rws": {} + } } } @@ -575,6 +570,41 @@ def get_whois(self, asn_registry = 'arin', retry_count = 3): return None + def get_rws(self, url = None, retry_count = 3): + """ + The function for retrieving Whois-RWS information for an IP address via HTTP (Whois-RWS). + + Args: + url: The URL to retrieve. + retry_count: The number of times to retry in case socket errors, timeouts, connection resets, etc. are encountered. + + Returns: + Dictionary: The whois data in Json format. + """ + + try: + + #Create the connection for the whois query. + conn = request.Request(url, headers = {"Accept":"application/json"}) + data = request.urlopen(conn, timeout=self.timeout) + d = json.loads(data.readall().decode()) + + return d + + except (socket.timeout, socket.error): + + if retry_count > 0: + + return self.get_rws(url, retry_count - 1) + + else: + + return None + + except: + + return None + def lookup(self, inc_raw = False): """ The function for retrieving and parsing whois information for an IP address via port 43 (WHOIS). @@ -781,6 +811,10 @@ def lookup_rws(self, inc_raw = False): """ The function for retrieving and parsing whois information for an IP address via HTTP (Whois-RWS). + NOTE: This should be faster than IPWhois.lookup(), but may not be as reliable. APNIC, LACNIC, and AFRINIC + do not have a Whois-RWS service yet. We have to rely on the Ripe RWS service, which does not contain all + of the data we need. + Args: inc_raw: Boolean for whether to include the raw whois results in the returned dictionary. @@ -792,8 +826,223 @@ def lookup_rws(self, inc_raw = False): asn_registry (String) - The assigned ASN registry. asn_cidr (String) - The assigned ASN CIDR. asn_country_code (String) - The assigned ASN country code. - nets (List) - Dictionaries containing network information which consists of the RWS fields + nets (List) - Dictionaries containing network information which consists of the fields listed in the NIC_WHOIS dictionary. Certain IPs have more granular network listings, hence the need for a list object. - raw (String) - Raw whois results if the inc_raw parameter is True. - """ \ No newline at end of file + raw (Dictionary) - Whois results in Json format if the inc_raw parameter is True. + """ + + #Attempt to resolve ASN info via Cymru. DNS is faster, so try that first. + asn_data = self.get_asn_dns() + + if asn_data is None: + + asn_data = self.get_asn_whois() + + if asn_data is None: + + raise ASNLookupError('ASN lookup failed for %r.' % self.address_str) + + #Create the return dictionary. + results = { + "query": self.address_str, + "nets": [], + "raw": None + } + + #Add the ASN information to the return dictionary. + results.update(asn_data) + + #Retrieve the whois data. + response = self.get_rws(NIC_WHOIS[results['asn_registry']]['url'].format(self.address_str)) + + #If the query failed, try the radb-grs source. + if not response: + + response = self.get_rws("http://apps.db.ripe.net/whois/grs-search?query-string={0}&source=radb-grs".format(self.address_str)) + + #If the inc_raw parameter is True, add the response to the return dictionary. + if inc_raw: + + results["raw"] = response + + #Create the network dictionary template. + base_net = { + "cidr": None, + "name": None, + "description": None, + "country": None, + "state": None, + "city": None + } + + nets = [] + + if results['asn_registry'] == "arin": + + try: + + for n in response['nets']['net']: + + addrs = [] + addrs.extend(ipaddress.summarize_address_range(ipaddress.ip_address(n['startAddress']['$'].strip()), ipaddress.ip_address(n['endAddress']['$'].strip()))) + + temp = [] + for i in ipaddress.collapse_addresses(addrs): + + temp.append(i.__str__()) + + cidr = ", ".join(temp) + + net = base_net.copy() + net["cidr"] = cidr + + if 'name' in n: + + net["name"] = n['name']['$'].strip() + + if 'customerRef' in n: + + net["description"] = n["customerRef"]["@name"].strip() + customer_url = n["customerRef"]["$"].strip() + + res = self.get_rws(customer_url) + + if res: + + if "city" in res["customer"]: + + net["city"] = res["customer"]["city"]["$"] + + if "iso3166-1" in res["customer"]: + + net["country"] = res["customer"]["iso3166-1"]["code2"]["$"] + + if "iso3166-2" in res["customer"]: + + net["state"] = res["customer"]["iso3166-2"]["$"] + + elif 'orgRef' in n: + + net["description"] = n["orgRef"]["@name"].strip() + org_url = n["orgRef"]["$"].strip() + + res = self.get_rws(org_url) + + if res: + + if "city" in res["org"]: + + net["city"] = res["org"]["city"]["$"] + + if "iso3166-1" in res["org"]: + + net["country"] = res["org"]["iso3166-1"]["code2"]["$"] + + if "iso3166-2" in res["org"]: + + net["state"] = res["org"]["iso3166-2"]["$"] + + nets.append(net) + + except: + + pass + + else: + + try: + + object_list = response['whois-resources']['objects']['object'] + + if not isinstance(object_list, list): + + object_list = [object_list] + + for n in object_list: + + if n["type"] in ("inetnum", "inet6num", "route", "route6"): + + net = base_net.copy() + + for a in n['attributes']['attribute']: + + if a['name'] in ("inetnum", "inet6num"): + + ipr = a['value'].strip() + ip_range = ipr.split(" - ") + + try: + + if len(ip_range) > 1: + + addrs = [] + addrs.extend(ipaddress.summarize_address_range(ipaddress.ip_address(ip_range[0]), ipaddress.ip_address(ip_range[1]))) + + temp = [] + for i in ipaddress.collapse_addresses(addrs): + + temp.append(i.__str__()) + + cidr = ", ".join(temp) + + else: + + cidr = ipaddress.ip_network(ip_range[0]).__str__() + + net["cidr"] = cidr + + except: + + pass + + elif a['name'] in ("route", "route6"): + + ipr = a['value'].strip() + ip_ranges = ipr.split(", ") + + try: + + temp = [] + for r in ip_ranges: + + temp.append(ipaddress.ip_network(r).__str__()) + + cidr = ", ".join(temp) + + net["cidr"] = cidr + + except: + + pass + + elif a['name'] == 'netname': + + net["name"] = a["value"].strip() + + elif a['name'] == 'descr': + + if net["description"]: + + net["description"] += "\n" + a["value"].strip() + + else: + + net["description"] = a["value"].strip() + + elif a['name'] == 'country': + + net["country"] = a["value"].strip() + + nets.append(net) + + break + + except: + + pass + + #Add the networks to the return dictionary. + results["nets"] = nets + + return results \ No newline at end of file diff --git a/setup.py b/setup.py index b9e8935..61913d1 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,13 @@ "IPv4", "IPv6", "IETF", + "REST", + "Arin", + "Ripe", + "Apnic", + "Lacnic", + "Afrinic", + "NIC" ] LONG_DESCRIPTION = '\n\n'.join([open('README.rst').read(), @@ -32,6 +39,7 @@ "Intended Audience :: Information Technology", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", + "Programming Language :: Python", "Programming Language :: Python :: 3.3", "Topic :: Internet", "Topic :: Software Development", From 7bd0fc6486aa05128c813625bfdc5abc994a9ef9 Mon Sep 17 00:00:00 2001 From: secynic Date: Fri, 13 Sep 2013 14:13:30 -0500 Subject: [PATCH 4/5] indentation --- README.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index 8d550f8..a887dd6 100644 --- a/README.rst +++ b/README.rst @@ -18,8 +18,8 @@ Typical usage:: >>>> results = obj.lookup(False) >>>> pprint(results) - { - 'asn': '15169', + { + 'asn': '15169', 'asn_cidr': '74.125.225.0/24', 'asn_country_code': 'US', 'asn_date': '2007-03-13', @@ -37,12 +37,12 @@ Typical usage:: REST (HTTP):: >>>> import ipwhois - >>>> from pprint import pprint - - >>>> obj = ipwhois.IPWhois("74.125.225.229") - >>>> results = obj.lookup_rws(False) - >>>> pprint(results) - + >>>> from pprint import pprint + + >>>> obj = ipwhois.IPWhois("74.125.225.229") + >>>> results = obj.lookup_rws(False) + >>>> pprint(results) + { 'asn': '15169', 'asn_cidr': '74.125.225.0/24', From a2c4b25a0dd7b8d513c71fbaa73cd0fbb311b099 Mon Sep 17 00:00:00 2001 From: secynic Date: Fri, 13 Sep 2013 14:14:24 -0500 Subject: [PATCH 5/5] indentation --- README.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index a887dd6..d26d7f7 100644 --- a/README.rst +++ b/README.rst @@ -11,13 +11,13 @@ Usage Examples Typical usage:: - >>>> import ipwhois - >>>> from pprint import pprint - - >>>> obj = ipwhois.IPWhois("74.125.225.229") - >>>> results = obj.lookup(False) - >>>> pprint(results) - + >>>> import ipwhois + >>>> from pprint import pprint + + >>>> obj = ipwhois.IPWhois("74.125.225.229") + >>>> results = obj.lookup(False) + >>>> pprint(results) + { 'asn': '15169', 'asn_cidr': '74.125.225.0/24',