2018-10-14 10:32:27 -04:00
|
|
|
from collections import Counter
|
|
|
|
import logging
|
|
|
|
|
2019-04-19 20:44:17 -04:00
|
|
|
from .kad_peerinfo import KadPeerHeap, create_kad_peerinfo
|
2019-01-15 18:41:41 +01:00
|
|
|
from .utils import gather_dict
|
2018-10-14 10:32:27 -04:00
|
|
|
|
2019-08-02 23:19:36 +08:00
|
|
|
log = logging.getLogger(__name__)
|
2018-10-14 10:32:27 -04:00
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
|
2019-01-09 21:38:56 +03:00
|
|
|
class SpiderCrawl:
|
2018-10-14 10:32:27 -04:00
|
|
|
"""
|
|
|
|
Crawl the network and look for given 160-bit keys.
|
|
|
|
"""
|
2019-01-15 18:41:41 +01:00
|
|
|
|
2018-10-14 10:32:27 -04:00
|
|
|
def __init__(self, protocol, node, peers, ksize, alpha):
|
|
|
|
"""
|
|
|
|
Create a new C{SpiderCrawl}er.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
protocol: A :class:`~kademlia.protocol.KademliaProtocol` instance.
|
|
|
|
node: A :class:`~kademlia.node.Node` representing the key we're
|
|
|
|
looking for
|
|
|
|
peers: A list of :class:`~kademlia.node.Node` instances that
|
|
|
|
provide the entry point for the network
|
|
|
|
ksize: The value for k based on the paper
|
|
|
|
alpha: The value for alpha based on the paper
|
|
|
|
"""
|
|
|
|
self.protocol = protocol
|
|
|
|
self.ksize = ksize
|
|
|
|
self.alpha = alpha
|
|
|
|
self.node = node
|
2019-04-18 19:21:43 -04:00
|
|
|
self.nearest = KadPeerHeap(self.node, self.ksize)
|
2019-01-15 18:41:41 +01:00
|
|
|
self.last_ids_crawled = []
|
2018-10-14 10:32:27 -04:00
|
|
|
log.info("creating spider with peers: %s", peers)
|
|
|
|
self.nearest.push(peers)
|
|
|
|
|
|
|
|
async def _find(self, rpcmethod):
|
|
|
|
"""
|
|
|
|
Get either a value or list of nodes.
|
|
|
|
|
|
|
|
Args:
|
2019-01-15 18:41:41 +01:00
|
|
|
rpcmethod: The protocol's callfindValue or call_find_node.
|
2018-10-14 10:32:27 -04:00
|
|
|
|
|
|
|
The process:
|
|
|
|
1. calls find_* to current ALPHA nearest not already queried nodes,
|
|
|
|
adding results to current nearest list of k nodes.
|
|
|
|
2. current nearest list needs to keep track of who has been queried
|
|
|
|
already sort by nearest, keep KSIZE
|
|
|
|
3. if list is same as last time, next call should be to everyone not
|
|
|
|
yet queried
|
|
|
|
4. repeat, unless nearest list has all been queried, then ur done
|
|
|
|
"""
|
|
|
|
log.info("crawling network with nearest: %s", str(tuple(self.nearest)))
|
|
|
|
count = self.alpha
|
2019-01-15 18:41:41 +01:00
|
|
|
if self.nearest.get_ids() == self.last_ids_crawled:
|
2018-10-14 10:32:27 -04:00
|
|
|
count = len(self.nearest)
|
2019-01-15 18:41:41 +01:00
|
|
|
self.last_ids_crawled = self.nearest.get_ids()
|
2018-10-14 10:32:27 -04:00
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
dicts = {}
|
|
|
|
for peer in self.nearest.get_uncontacted()[:count]:
|
2019-07-31 23:50:53 +08:00
|
|
|
dicts[peer.peer_id_bytes] = rpcmethod(peer, self.node)
|
2019-01-15 18:41:41 +01:00
|
|
|
self.nearest.mark_contacted(peer)
|
|
|
|
found = await gather_dict(dicts)
|
|
|
|
return await self._nodes_found(found)
|
2018-10-14 10:32:27 -04:00
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
async def _nodes_found(self, responses):
|
2018-10-14 10:32:27 -04:00
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
class ValueSpiderCrawl(SpiderCrawl):
|
|
|
|
def __init__(self, protocol, node, peers, ksize, alpha):
|
|
|
|
SpiderCrawl.__init__(self, protocol, node, peers, ksize, alpha)
|
|
|
|
# keep track of the single nearest node without value - per
|
|
|
|
# section 2.3 so we can set the key there if found
|
2019-04-18 19:21:43 -04:00
|
|
|
self.nearest_without_value = KadPeerHeap(self.node, 1)
|
2018-10-14 10:32:27 -04:00
|
|
|
|
|
|
|
async def find(self):
|
|
|
|
"""
|
|
|
|
Find either the closest nodes or the value requested.
|
|
|
|
"""
|
2019-01-15 18:41:41 +01:00
|
|
|
return await self._find(self.protocol.call_find_value)
|
2018-10-14 10:32:27 -04:00
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
async def _nodes_found(self, responses):
|
2018-10-14 10:32:27 -04:00
|
|
|
"""
|
|
|
|
Handle the result of an iteration in _find.
|
|
|
|
"""
|
|
|
|
toremove = []
|
2019-01-15 18:41:41 +01:00
|
|
|
found_values = []
|
2018-10-14 10:32:27 -04:00
|
|
|
for peerid, response in responses.items():
|
|
|
|
response = RPCFindResponse(response)
|
|
|
|
if not response.happened():
|
|
|
|
toremove.append(peerid)
|
2019-01-15 18:41:41 +01:00
|
|
|
elif response.has_value():
|
|
|
|
found_values.append(response.get_value())
|
2018-10-14 10:32:27 -04:00
|
|
|
else:
|
2019-01-15 18:41:41 +01:00
|
|
|
peer = self.nearest.get_node(peerid)
|
|
|
|
self.nearest_without_value.push(peer)
|
|
|
|
self.nearest.push(response.get_node_list())
|
2018-10-14 10:32:27 -04:00
|
|
|
self.nearest.remove(toremove)
|
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
if found_values:
|
|
|
|
return await self._handle_found_values(found_values)
|
|
|
|
if self.nearest.have_contacted_all():
|
2018-10-14 10:32:27 -04:00
|
|
|
# not found!
|
|
|
|
return None
|
|
|
|
return await self.find()
|
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
async def _handle_found_values(self, values):
|
2018-10-14 10:32:27 -04:00
|
|
|
"""
|
|
|
|
We got some values! Exciting. But let's make sure
|
|
|
|
they're all the same or freak out a little bit. Also,
|
|
|
|
make sure we tell the nearest node that *didn't* have
|
|
|
|
the value to store it.
|
|
|
|
"""
|
2019-01-15 18:41:41 +01:00
|
|
|
value_counts = Counter(values)
|
|
|
|
if len(value_counts) != 1:
|
2019-08-13 14:36:42 -07:00
|
|
|
log.warning(
|
|
|
|
"Got multiple values for key %i: %s", self.node.xor_id, str(values)
|
|
|
|
)
|
2019-01-15 18:41:41 +01:00
|
|
|
value = value_counts.most_common(1)[0][0]
|
2018-10-14 10:32:27 -04:00
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
peer = self.nearest_without_value.popleft()
|
|
|
|
if peer:
|
2019-07-31 23:50:53 +08:00
|
|
|
await self.protocol.call_store(peer, self.node.peer_id_bytes, value)
|
2018-10-14 10:32:27 -04:00
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
class NodeSpiderCrawl(SpiderCrawl):
|
|
|
|
async def find(self):
|
|
|
|
"""
|
|
|
|
Find the closest nodes.
|
|
|
|
"""
|
2019-01-15 18:41:41 +01:00
|
|
|
return await self._find(self.protocol.call_find_node)
|
2018-10-14 10:32:27 -04:00
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
async def _nodes_found(self, responses):
|
2018-10-14 10:32:27 -04:00
|
|
|
"""
|
|
|
|
Handle the result of an iteration in _find.
|
|
|
|
"""
|
|
|
|
toremove = []
|
|
|
|
for peerid, response in responses.items():
|
|
|
|
response = RPCFindResponse(response)
|
|
|
|
if not response.happened():
|
|
|
|
toremove.append(peerid)
|
|
|
|
else:
|
2019-01-15 18:41:41 +01:00
|
|
|
self.nearest.push(response.get_node_list())
|
2018-10-14 10:32:27 -04:00
|
|
|
self.nearest.remove(toremove)
|
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
if self.nearest.have_contacted_all():
|
2018-10-14 10:32:27 -04:00
|
|
|
return list(self.nearest)
|
|
|
|
return await self.find()
|
|
|
|
|
|
|
|
|
2019-01-09 21:38:56 +03:00
|
|
|
class RPCFindResponse:
|
2018-10-14 10:32:27 -04:00
|
|
|
def __init__(self, response):
|
|
|
|
"""
|
|
|
|
A wrapper for the result of a RPC find.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
response: This will be a tuple of (<response received>, <value>)
|
|
|
|
where <value> will be a list of tuples if not found or
|
|
|
|
a dictionary of {'value': v} where v is the value desired
|
|
|
|
"""
|
|
|
|
self.response = response
|
|
|
|
|
|
|
|
def happened(self):
|
|
|
|
"""
|
|
|
|
Did the other host actually respond?
|
|
|
|
"""
|
|
|
|
return self.response[0]
|
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
def has_value(self):
|
2018-10-14 10:32:27 -04:00
|
|
|
return isinstance(self.response[1], dict)
|
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
def get_value(self):
|
2019-07-31 15:00:12 -07:00
|
|
|
return self.response[1]["value"]
|
2018-10-14 10:32:27 -04:00
|
|
|
|
2019-01-15 18:41:41 +01:00
|
|
|
def get_node_list(self):
|
2018-10-14 10:32:27 -04:00
|
|
|
"""
|
|
|
|
Get the node list in the response. If there's no value, this should
|
|
|
|
be set.
|
|
|
|
"""
|
|
|
|
nodelist = self.response[1] or []
|
2019-04-19 20:44:17 -04:00
|
|
|
return [create_kad_peerinfo(*nodeple) for nodeple in nodelist]
|