2019-10-24 14:41:10 +08:00
|
|
|
"""Package for interacting on the network at a high level."""
|
2018-10-14 22:32:27 +08:00
|
|
|
import asyncio
|
|
|
|
import logging
|
2019-08-03 13:36:19 +08:00
|
|
|
import pickle
|
2018-10-14 22:32:27 +08:00
|
|
|
|
2019-08-03 13:36:19 +08:00
|
|
|
from .crawling import NodeSpiderCrawl, ValueSpiderCrawl
|
|
|
|
from .kad_peerinfo import create_kad_peerinfo
|
2019-01-16 01:41:41 +08:00
|
|
|
from .protocol import KademliaProtocol
|
|
|
|
from .storage import ForgetfulStorage
|
2019-08-03 13:36:19 +08:00
|
|
|
from .utils import digest
|
2018-10-14 22:32:27 +08:00
|
|
|
|
2019-08-02 23:19:36 +08:00
|
|
|
log = logging.getLogger(__name__)
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
|
2019-04-21 05:35:05 +08:00
|
|
|
class KademliaServer:
|
2019-10-24 14:41:10 +08:00
|
|
|
"""High level view of a node instance.
|
|
|
|
|
|
|
|
This is the object that should be created to start listening as an
|
|
|
|
active node on the network.
|
2018-10-14 22:32:27 +08:00
|
|
|
"""
|
|
|
|
|
|
|
|
protocol_class = KademliaProtocol
|
|
|
|
|
|
|
|
def __init__(self, ksize=20, alpha=3, node_id=None, storage=None):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Create a server instance. This will start listening on the given
|
|
|
|
port.
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
Args:
|
|
|
|
ksize (int): The k parameter from the paper
|
|
|
|
alpha (int): The alpha parameter from the paper
|
|
|
|
node_id: The id for this node on the network.
|
|
|
|
storage: An instance that implements
|
|
|
|
:interface:`~kademlia.storage.IStorage`
|
|
|
|
"""
|
|
|
|
self.ksize = ksize
|
|
|
|
self.alpha = alpha
|
|
|
|
self.storage = storage or ForgetfulStorage()
|
2019-04-20 08:44:17 +08:00
|
|
|
self.node = create_kad_peerinfo(node_id)
|
2018-10-14 22:32:27 +08:00
|
|
|
self.transport = None
|
|
|
|
self.protocol = None
|
|
|
|
self.refresh_loop = None
|
|
|
|
self.save_state_loop = None
|
|
|
|
|
|
|
|
def stop(self):
|
|
|
|
if self.transport is not None:
|
|
|
|
self.transport.close()
|
|
|
|
|
|
|
|
if self.refresh_loop:
|
|
|
|
self.refresh_loop.cancel()
|
|
|
|
|
|
|
|
if self.save_state_loop:
|
|
|
|
self.save_state_loop.cancel()
|
|
|
|
|
|
|
|
def _create_protocol(self):
|
|
|
|
return self.protocol_class(self.node, self.storage, self.ksize)
|
|
|
|
|
2019-08-01 06:00:12 +08:00
|
|
|
async def listen(self, port, interface="0.0.0.0"):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Start listening on the given port.
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
Provide interface="::" to accept ipv6 address
|
|
|
|
"""
|
|
|
|
loop = asyncio.get_event_loop()
|
2019-08-14 05:36:42 +08:00
|
|
|
listen = loop.create_datagram_endpoint(
|
|
|
|
self._create_protocol, local_addr=(interface, port)
|
|
|
|
)
|
2019-08-01 06:00:12 +08:00
|
|
|
log.info("Node %i listening on %s:%i", self.node.xor_id, interface, port)
|
2019-01-16 01:41:41 +08:00
|
|
|
self.transport, self.protocol = await listen
|
2018-10-14 22:32:27 +08:00
|
|
|
# finally, schedule refreshing table
|
|
|
|
self.refresh_table()
|
|
|
|
|
|
|
|
def refresh_table(self):
|
|
|
|
log.debug("Refreshing routing table")
|
|
|
|
asyncio.ensure_future(self._refresh_table())
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
self.refresh_loop = loop.call_later(3600, self.refresh_table)
|
|
|
|
|
|
|
|
async def _refresh_table(self):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Refresh buckets that haven't had any lookups in the last hour (per
|
|
|
|
section 2.3 of the paper)."""
|
2019-01-16 01:41:41 +08:00
|
|
|
results = []
|
|
|
|
for node_id in self.protocol.get_refresh_ids():
|
2019-04-20 08:44:17 +08:00
|
|
|
node = create_kad_peerinfo(node_id)
|
2019-01-16 01:41:41 +08:00
|
|
|
nearest = self.protocol.router.find_neighbors(node, self.alpha)
|
2019-08-14 05:36:42 +08:00
|
|
|
spider = NodeSpiderCrawl(
|
|
|
|
self.protocol, node, nearest, self.ksize, self.alpha
|
|
|
|
)
|
2019-01-16 01:41:41 +08:00
|
|
|
results.append(spider.find())
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
# do our crawling
|
2019-01-16 01:41:41 +08:00
|
|
|
await asyncio.gather(*results)
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
# now republish keys older than one hour
|
2019-01-16 01:41:41 +08:00
|
|
|
for dkey, value in self.storage.iter_older_than(3600):
|
2018-10-14 22:32:27 +08:00
|
|
|
await self.set_digest(dkey, value)
|
|
|
|
|
2019-01-16 01:41:41 +08:00
|
|
|
def bootstrappable_neighbors(self):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Get a :class:`list` of (ip, port) :class:`tuple` pairs suitable for
|
2018-10-14 22:32:27 +08:00
|
|
|
use as an argument to the bootstrap method.
|
|
|
|
|
|
|
|
The server should have been bootstrapped
|
|
|
|
already - this is just a utility for getting some neighbors and then
|
|
|
|
storing them if this server is going down for a while. When it comes
|
|
|
|
back up, the list of nodes can be used to bootstrap.
|
|
|
|
"""
|
2019-01-16 01:41:41 +08:00
|
|
|
neighbors = self.protocol.router.find_neighbors(self.node)
|
2018-10-14 22:32:27 +08:00
|
|
|
return [tuple(n)[-2:] for n in neighbors]
|
|
|
|
|
|
|
|
async def bootstrap(self, addrs):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Bootstrap the server by connecting to other known nodes in the
|
|
|
|
network.
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
Args:
|
|
|
|
addrs: A `list` of (ip, port) `tuple` pairs. Note that only IP
|
|
|
|
addresses are acceptable - hostnames will cause an error.
|
|
|
|
"""
|
2019-08-01 06:00:12 +08:00
|
|
|
log.debug("Attempting to bootstrap node with %i initial contacts", len(addrs))
|
2018-10-14 22:32:27 +08:00
|
|
|
cos = list(map(self.bootstrap_node, addrs))
|
|
|
|
gathered = await asyncio.gather(*cos)
|
|
|
|
nodes = [node for node in gathered if node is not None]
|
2019-08-14 05:36:42 +08:00
|
|
|
spider = NodeSpiderCrawl(
|
|
|
|
self.protocol, self.node, nodes, self.ksize, self.alpha
|
|
|
|
)
|
2018-10-14 22:32:27 +08:00
|
|
|
return await spider.find()
|
|
|
|
|
|
|
|
async def bootstrap_node(self, addr):
|
2019-07-31 23:50:53 +08:00
|
|
|
result = await self.protocol.ping(addr, self.node.peer_id_bytes)
|
2019-04-20 08:44:17 +08:00
|
|
|
return create_kad_peerinfo(result[1], addr[0], addr[1]) if result[0] else None
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
async def get(self, key):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Get a key if the network has it.
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
:class:`None` if not found, the value otherwise.
|
|
|
|
"""
|
|
|
|
log.info("Looking up key %s", key)
|
|
|
|
dkey = digest(key)
|
|
|
|
# if this node has it, return it
|
|
|
|
if self.storage.get(dkey) is not None:
|
|
|
|
return self.storage.get(dkey)
|
2019-04-19 07:21:43 +08:00
|
|
|
|
2019-04-20 08:44:17 +08:00
|
|
|
node = create_kad_peerinfo(dkey)
|
2019-01-16 01:41:41 +08:00
|
|
|
nearest = self.protocol.router.find_neighbors(node)
|
|
|
|
if not nearest:
|
2018-10-14 22:32:27 +08:00
|
|
|
log.warning("There are no known neighbors to get key %s", key)
|
|
|
|
return None
|
2019-08-01 06:00:12 +08:00
|
|
|
spider = ValueSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha)
|
2018-10-14 22:32:27 +08:00
|
|
|
return await spider.find()
|
|
|
|
|
|
|
|
async def set(self, key, value):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Set the given string key to the given value in the network."""
|
2018-10-14 22:32:27 +08:00
|
|
|
if not check_dht_value_type(value):
|
2019-08-01 06:00:12 +08:00
|
|
|
raise TypeError("Value must be of type int, float, bool, str, or bytes")
|
2018-10-14 22:32:27 +08:00
|
|
|
log.info("setting '%s' = '%s' on network", key, value)
|
|
|
|
dkey = digest(key)
|
|
|
|
return await self.set_digest(dkey, value)
|
|
|
|
|
2019-04-29 05:58:14 +08:00
|
|
|
async def provide(self, key):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""publish to the network that it provides for a particular key."""
|
2019-04-29 05:58:14 +08:00
|
|
|
neighbors = self.protocol.router.find_neighbors(self.node)
|
2019-08-01 06:00:12 +08:00
|
|
|
return [
|
2019-07-31 23:50:53 +08:00
|
|
|
await self.protocol.call_add_provider(n, key, self.node.peer_id_bytes)
|
2019-08-01 06:00:12 +08:00
|
|
|
for n in neighbors
|
|
|
|
]
|
2019-04-29 05:58:14 +08:00
|
|
|
|
|
|
|
async def get_providers(self, key):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""get the list of providers for a key."""
|
2019-04-29 05:58:14 +08:00
|
|
|
neighbors = self.protocol.router.find_neighbors(self.node)
|
|
|
|
return [await self.protocol.call_get_providers(n, key) for n in neighbors]
|
|
|
|
|
2018-10-14 22:32:27 +08:00
|
|
|
async def set_digest(self, dkey, value):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Set the given SHA1 digest key (bytes) to the given value in the
|
|
|
|
network."""
|
2019-04-20 08:44:17 +08:00
|
|
|
node = create_kad_peerinfo(dkey)
|
2018-10-14 22:32:27 +08:00
|
|
|
|
2019-01-16 01:41:41 +08:00
|
|
|
nearest = self.protocol.router.find_neighbors(node)
|
|
|
|
if not nearest:
|
2019-08-01 06:00:12 +08:00
|
|
|
log.warning("There are no known neighbors to set key %s", dkey.hex())
|
2018-10-14 22:32:27 +08:00
|
|
|
return False
|
|
|
|
|
2019-08-01 06:00:12 +08:00
|
|
|
spider = NodeSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha)
|
2018-10-14 22:32:27 +08:00
|
|
|
nodes = await spider.find()
|
|
|
|
log.info("setting '%s' on %s", dkey.hex(), list(map(str, nodes)))
|
|
|
|
|
|
|
|
# if this node is close too, then store here as well
|
2019-01-16 01:41:41 +08:00
|
|
|
biggest = max([n.distance_to(node) for n in nodes])
|
|
|
|
if self.node.distance_to(node) < biggest:
|
2018-10-14 22:32:27 +08:00
|
|
|
self.storage[dkey] = value
|
2019-01-16 01:41:41 +08:00
|
|
|
results = [self.protocol.call_store(n, dkey, value) for n in nodes]
|
2018-10-14 22:32:27 +08:00
|
|
|
# return true only if at least one store call succeeded
|
2019-01-16 01:41:41 +08:00
|
|
|
return any(await asyncio.gather(*results))
|
2018-10-14 22:32:27 +08:00
|
|
|
|
2019-01-16 01:41:41 +08:00
|
|
|
def save_state(self, fname):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Save the state of this node (the alpha/ksize/id/immediate neighbors)
|
|
|
|
to a cache file with the given fname."""
|
2018-10-14 22:32:27 +08:00
|
|
|
log.info("Saving state to %s", fname)
|
|
|
|
data = {
|
2019-08-01 13:25:20 +08:00
|
|
|
"ksize": self.ksize,
|
|
|
|
"alpha": self.alpha,
|
|
|
|
"id": self.node.peer_id_bytes,
|
|
|
|
"neighbors": self.bootstrappable_neighbors(),
|
2018-10-14 22:32:27 +08:00
|
|
|
}
|
2019-08-01 06:00:12 +08:00
|
|
|
if not data["neighbors"]:
|
2018-10-14 22:32:27 +08:00
|
|
|
log.warning("No known neighbors, so not writing to cache.")
|
|
|
|
return
|
2019-08-01 06:00:12 +08:00
|
|
|
with open(fname, "wb") as file:
|
2019-01-16 01:41:41 +08:00
|
|
|
pickle.dump(data, file)
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
@classmethod
|
2019-01-16 01:41:41 +08:00
|
|
|
def load_state(cls, fname):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Load the state of this node (the alpha/ksize/id/immediate neighbors)
|
|
|
|
from a cache file with the given fname."""
|
2018-10-14 22:32:27 +08:00
|
|
|
log.info("Loading state from %s", fname)
|
2019-08-01 06:00:12 +08:00
|
|
|
with open(fname, "rb") as file:
|
2019-01-16 01:41:41 +08:00
|
|
|
data = pickle.load(file)
|
2019-08-01 06:00:12 +08:00
|
|
|
svr = KademliaServer(data["ksize"], data["alpha"], data["id"])
|
|
|
|
if data["neighbors"]:
|
|
|
|
svr.bootstrap(data["neighbors"])
|
2019-01-16 01:41:41 +08:00
|
|
|
return svr
|
|
|
|
|
|
|
|
def save_state_regularly(self, fname, frequency=600):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Save the state of node with a given regularity to the given
|
2018-10-14 22:32:27 +08:00
|
|
|
filename.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
fname: File name to save retularly to
|
|
|
|
frequency: Frequency in seconds that the state should be saved.
|
|
|
|
By default, 10 minutes.
|
|
|
|
"""
|
2019-01-16 01:41:41 +08:00
|
|
|
self.save_state(fname)
|
2018-10-14 22:32:27 +08:00
|
|
|
loop = asyncio.get_event_loop()
|
2019-08-01 06:00:12 +08:00
|
|
|
self.save_state_loop = loop.call_later(
|
|
|
|
frequency, self.save_state_regularly, fname, frequency
|
|
|
|
)
|
2018-10-14 22:32:27 +08:00
|
|
|
|
|
|
|
|
|
|
|
def check_dht_value_type(value):
|
2019-10-24 14:41:10 +08:00
|
|
|
"""Checks to see if the type of the value is a valid type for placing in
|
|
|
|
the dht."""
|
2019-08-01 06:00:12 +08:00
|
|
|
typeset = [int, float, bool, str, bytes]
|
2019-08-02 23:19:36 +08:00
|
|
|
return type(value) in typeset
|