2020-07-13 01:55:09 -07:00
|
|
|
"""Query & parse data from bgp.tools.
|
|
|
|
|
|
|
|
- See https://bgp.tools/credits for acknowledgements and licensing.
|
|
|
|
- See https://bgp.tools/kb/api for query documentation.
|
|
|
|
"""
|
2020-06-27 12:20:00 -07:00
|
|
|
|
|
|
|
# Standard Library
|
|
|
|
import re
|
2021-09-16 16:46:56 -07:00
|
|
|
import typing as t
|
2020-06-27 12:20:00 -07:00
|
|
|
import asyncio
|
2021-09-16 16:46:56 -07:00
|
|
|
from ipaddress import IPv4Address, IPv6Address, ip_address
|
2020-06-27 12:20:00 -07:00
|
|
|
|
|
|
|
# Project
|
|
|
|
from hyperglass.log import log
|
2021-09-15 18:25:37 -07:00
|
|
|
from hyperglass.state import use_state
|
2020-07-13 01:55:09 -07:00
|
|
|
|
|
|
|
DEFAULT_KEYS = ("asn", "ip", "prefix", "country", "rir", "allocated", "org")
|
2020-06-27 12:20:00 -07:00
|
|
|
|
2020-07-13 01:55:09 -07:00
|
|
|
CACHE_KEY = "hyperglass.external.bgptools"
|
2020-06-27 12:20:00 -07:00
|
|
|
|
2021-09-16 16:46:56 -07:00
|
|
|
TargetDetail = t.TypedDict(
|
|
|
|
"TargetDetail",
|
|
|
|
{"asn": str, "ip": str, "country": str, "rir": str, "allocated": str, "org": str},
|
|
|
|
)
|
2020-06-27 12:20:00 -07:00
|
|
|
|
2021-09-16 16:46:56 -07:00
|
|
|
TargetData = t.Dict[str, TargetDetail]
|
|
|
|
|
|
|
|
|
|
|
|
def default_ip_targets(*targets: str) -> t.Tuple[TargetData, t.Tuple[str, ...]]:
|
|
|
|
"""Construct a mapping of default data and other data that should be queried.
|
|
|
|
|
|
|
|
Targets in the mapping don't need to be queried and already have default values. Targets in the
|
|
|
|
query tuple should be queried.
|
|
|
|
"""
|
|
|
|
default_data = {}
|
|
|
|
query = ()
|
|
|
|
for target in targets:
|
|
|
|
detail: TargetDetail = {k: "None" for k in DEFAULT_KEYS}
|
|
|
|
try:
|
|
|
|
valid: t.Union[IPv4Address, IPv6Address] = ip_address(target)
|
|
|
|
|
|
|
|
checks = (
|
|
|
|
(valid.version == 6 and valid.is_site_local, "Site Local Address"),
|
|
|
|
(valid.is_loopback, "Loopback Address"),
|
|
|
|
(valid.is_multicast, "Multicast Address"),
|
|
|
|
(valid.is_link_local, "Link Local Address"),
|
|
|
|
(valid.is_private, "Private Address"),
|
|
|
|
)
|
|
|
|
for exp, rir in checks:
|
|
|
|
if exp is True:
|
|
|
|
detail["rir"] = rir
|
|
|
|
break
|
|
|
|
|
|
|
|
should_query = any((valid.is_global, valid.is_unspecified, valid.is_reserved))
|
|
|
|
|
|
|
|
if not should_query:
|
|
|
|
detail["ip"] = str(target)
|
|
|
|
default_data[str(target)] = detail
|
|
|
|
elif should_query:
|
|
|
|
query += (str(target),)
|
|
|
|
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
return default_data, query
|
|
|
|
|
|
|
|
|
|
|
|
def parse_whois(output: str, targets: t.List[str]) -> TargetDetail:
|
2020-06-27 12:20:00 -07:00
|
|
|
"""Parse raw whois output from bgp.tools.
|
|
|
|
|
|
|
|
Sample output:
|
2020-07-13 01:55:09 -07:00
|
|
|
AS | IP | BGP Prefix | CC | Registry | Allocated | AS Name
|
|
|
|
13335 | 1.1.1.1 | 1.1.1.0/24 | US | ARIN | 2010-07-14 | Cloudflare, Inc.
|
2020-06-27 12:20:00 -07:00
|
|
|
"""
|
|
|
|
|
2020-07-13 01:55:09 -07:00
|
|
|
def lines(raw):
|
|
|
|
"""Generate clean string values for each column."""
|
|
|
|
for r in (r for r in raw.split("\n") if r):
|
2021-09-12 15:09:24 -07:00
|
|
|
fields = (re.sub(r"(\n|\r)", "", field).strip(" ") for field in r.split("|"))
|
2020-07-13 01:55:09 -07:00
|
|
|
yield fields
|
2020-06-27 12:20:00 -07:00
|
|
|
|
2020-07-13 01:55:09 -07:00
|
|
|
data = {}
|
2020-06-27 12:20:00 -07:00
|
|
|
|
2020-07-13 01:55:09 -07:00
|
|
|
for line in lines(output):
|
2020-06-27 12:20:00 -07:00
|
|
|
|
2020-07-13 01:55:09 -07:00
|
|
|
# Unpack each line's parsed values.
|
|
|
|
asn, ip, prefix, country, rir, allocated, org = line
|
2020-06-27 12:20:00 -07:00
|
|
|
|
2020-07-13 01:55:09 -07:00
|
|
|
# Match the line to the item in the list of resources to query.
|
|
|
|
if ip in targets:
|
|
|
|
i = targets.index(ip)
|
|
|
|
data[targets[i]] = {
|
|
|
|
"asn": asn,
|
|
|
|
"ip": ip,
|
|
|
|
"prefix": prefix,
|
|
|
|
"country": country,
|
|
|
|
"rir": rir,
|
|
|
|
"allocated": allocated,
|
|
|
|
"org": org,
|
|
|
|
}
|
2020-06-27 12:20:00 -07:00
|
|
|
|
|
|
|
log.debug("Parsed bgp.tools data: {}", data)
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2021-09-16 16:46:56 -07:00
|
|
|
async def run_whois(targets: t.List[str]) -> str:
|
2020-06-27 12:20:00 -07:00
|
|
|
"""Open raw socket to bgp.tools and execute query."""
|
|
|
|
|
2020-07-13 01:55:09 -07:00
|
|
|
# Construct bulk query
|
|
|
|
query = "\n".join(("begin", *targets, "end\n")).encode()
|
|
|
|
|
2020-06-27 12:20:00 -07:00
|
|
|
# Open the socket to bgp.tools
|
|
|
|
log.debug("Opening connection to bgp.tools")
|
|
|
|
reader, writer = await asyncio.open_connection("bgp.tools", port=43)
|
|
|
|
|
|
|
|
# Send the query
|
2020-07-13 01:55:09 -07:00
|
|
|
writer.write(query)
|
2020-06-27 12:20:00 -07:00
|
|
|
if writer.can_write_eof():
|
|
|
|
writer.write_eof()
|
|
|
|
await writer.drain()
|
|
|
|
|
|
|
|
# Read the response
|
|
|
|
response = b""
|
|
|
|
while True:
|
|
|
|
data = await reader.read(128)
|
|
|
|
if data:
|
|
|
|
response += data
|
|
|
|
else:
|
|
|
|
log.debug("Closing connection to bgp.tools")
|
|
|
|
writer.close()
|
|
|
|
break
|
|
|
|
|
|
|
|
return response.decode()
|
|
|
|
|
|
|
|
|
2021-09-16 16:46:56 -07:00
|
|
|
async def network_info(*targets: str) -> TargetData:
|
2020-06-27 12:20:00 -07:00
|
|
|
"""Get ASN, Containing Prefix, and other info about an internet resource."""
|
|
|
|
|
2021-09-16 16:46:56 -07:00
|
|
|
default_data, query_targets = default_ip_targets(*targets)
|
|
|
|
|
2021-09-16 13:46:50 -07:00
|
|
|
cache = use_state("cache")
|
2020-06-27 12:20:00 -07:00
|
|
|
|
2020-07-13 01:55:09 -07:00
|
|
|
# Set default data structure.
|
2021-09-16 16:46:56 -07:00
|
|
|
query_data = {t: {k: "" for k in DEFAULT_KEYS} for t in query_targets}
|
2020-07-13 01:55:09 -07:00
|
|
|
|
|
|
|
# Get all cached bgp.tools data.
|
2021-09-16 13:46:50 -07:00
|
|
|
cached = cache.get_map(CACHE_KEY) or {}
|
2020-07-13 01:55:09 -07:00
|
|
|
|
|
|
|
# Try to use cached data for each of the items in the list of
|
|
|
|
# resources.
|
2021-09-16 16:46:56 -07:00
|
|
|
for target in (t for t in query_targets if t in cached):
|
2021-09-16 13:46:50 -07:00
|
|
|
# Reassign the cached network info to the matching resource.
|
2021-09-16 16:46:56 -07:00
|
|
|
query_data[target] = cached[target]
|
|
|
|
log.debug("Using cached network info for {}", target)
|
2020-07-13 01:55:09 -07:00
|
|
|
|
|
|
|
# Remove cached items from the resource list so they're not queried.
|
2021-09-16 16:46:56 -07:00
|
|
|
targets = [t for t in query_targets if t not in cached]
|
2020-07-13 01:55:09 -07:00
|
|
|
|
|
|
|
try:
|
|
|
|
if targets:
|
|
|
|
whoisdata = await run_whois(targets)
|
2020-06-27 12:20:00 -07:00
|
|
|
|
|
|
|
if whoisdata:
|
|
|
|
# If the response is not empty, parse it.
|
2021-09-16 16:46:56 -07:00
|
|
|
query_data.update(parse_whois(whoisdata, targets))
|
2020-07-13 01:55:09 -07:00
|
|
|
|
|
|
|
# Cache the response
|
2021-09-16 16:46:56 -07:00
|
|
|
for target in targets:
|
|
|
|
cache.set_map_item(CACHE_KEY, target, query_data[target])
|
2020-07-13 01:55:09 -07:00
|
|
|
log.debug("Cached network info for {}", t)
|
2020-06-27 12:20:00 -07:00
|
|
|
|
|
|
|
except Exception as err:
|
|
|
|
log.error(str(err))
|
|
|
|
|
2021-09-16 16:46:56 -07:00
|
|
|
return {**default_data, **query_data}
|
2020-06-27 12:20:00 -07:00
|
|
|
|
|
|
|
|
2021-09-16 16:46:56 -07:00
|
|
|
def network_info_sync(*targets: str) -> TargetData:
|
2020-06-27 12:20:00 -07:00
|
|
|
"""Get ASN, Containing Prefix, and other info about an internet resource."""
|
2021-09-16 13:46:50 -07:00
|
|
|
return asyncio.run(network_info(*targets))
|