From 9ddbd18dedb1a81028a25f029f6b15b5c92cb984 Mon Sep 17 00:00:00 2001 From: zixuanzh Date: Thu, 18 Apr 2019 19:21:43 -0400 Subject: [PATCH 1/7] replace node with KadPeerInfo replace node with kadpeerinfo --- libp2p/kademlia/crawling.py | 13 +-- libp2p/kademlia/kad_peerinfo.py | 112 ++++++++++++++++++++ libp2p/kademlia/network.py | 25 +++-- libp2p/kademlia/node.py | 180 ++++++++++++++++---------------- libp2p/kademlia/protocol.py | 59 ++++++++--- 5 files changed, 271 insertions(+), 118 deletions(-) create mode 100644 libp2p/kademlia/kad_peerinfo.py diff --git a/libp2p/kademlia/crawling.py b/libp2p/kademlia/crawling.py index 6006bef..a1756af 100644 --- a/libp2p/kademlia/crawling.py +++ b/libp2p/kademlia/crawling.py @@ -1,7 +1,8 @@ from collections import Counter import logging -from .node import Node, NodeHeap +from .kad_peerinfo import KadPeerInfo, KadPeerHeap +from libp2p.peer.id import ID from .utils import gather_dict @@ -32,7 +33,7 @@ class SpiderCrawl: self.ksize = ksize self.alpha = alpha self.node = node - self.nearest = NodeHeap(self.node, self.ksize) + self.nearest = KadPeerHeap(self.node, self.ksize) self.last_ids_crawled = [] log.info("creating spider with peers: %s", peers) self.nearest.push(peers) @@ -61,7 +62,7 @@ class SpiderCrawl: dicts = {} for peer in self.nearest.get_uncontacted()[:count]: - dicts[peer.id] = rpcmethod(peer, self.node) + dicts[peer.peer_id] = rpcmethod(peer, self.node) self.nearest.mark_contacted(peer) found = await gather_dict(dicts) return await self._nodes_found(found) @@ -76,7 +77,7 @@ class ValueSpiderCrawl(SpiderCrawl): SpiderCrawl.__init__(self, protocol, node, peers, ksize, alpha) # keep track of the single nearest node without value - per # section 2.3 so we can set the key there if found - self.nearest_without_value = NodeHeap(self.node, 1) + self.nearest_without_value = KadPeerHeap(self.node, 1) async def find(self): """ @@ -124,7 +125,7 @@ class ValueSpiderCrawl(SpiderCrawl): peer = self.nearest_without_value.popleft() if peer: - await self.protocol.call_store(peer, self.node.id, value) + await self.protocol.call_store(peer, self.node.peer_id, value) return value @@ -183,4 +184,4 @@ class RPCFindResponse: be set. """ nodelist = self.response[1] or [] - return [Node(*nodeple) for nodeple in nodelist] + return [KadPeerInfo(ID(*nodeple)) for nodeple in nodelist] diff --git a/libp2p/kademlia/kad_peerinfo.py b/libp2p/kademlia/kad_peerinfo.py new file mode 100644 index 0000000..d227b11 --- /dev/null +++ b/libp2p/kademlia/kad_peerinfo.py @@ -0,0 +1,112 @@ +import heapq +from operator import itemgetter +from libp2p.peer.peerinfo import PeerInfo + + +class KadPeerInfo(PeerInfo): + def __init__(self, peer_id, peer_data): + super(KadPeerInfo, self).__init__(peer_id, peer_data) + self.long_id = int(peer_id.hex(), 16) + + def same_home_as(self, node): + #TODO: handle more than one addr + return self.addrs[0] == node.addrs[0] + + def distance_to(self, node): + """ + Get the distance between this node and another. + """ + return self.long_id ^ node.long_id + + def __iter__(self): + """ + Enables use of Node as a tuple - i.e., tuple(node) works. + """ + return iter([self.peer_id.pretty(), str(self.addrs[0])]) + + def __repr__(self): + return repr([self.long_id, str(self.addrs[0])]) + + def __str__(self): + return str(self.addrs[0]) + +class KadPeerHeap: + """ + A heap of peers ordered by distance to a given node. + """ + def __init__(self, node, maxsize): + """ + Constructor. + + @param node: The node to measure all distnaces from. + @param maxsize: The maximum size that this heap can grow to. + """ + self.node = node + self.heap = [] + self.contacted = set() + self.maxsize = maxsize + + def remove(self, peers): + """ + Remove a list of peer ids from this heap. Note that while this + heap retains a constant visible size (based on the iterator), it's + actual size may be quite a bit larger than what's exposed. Therefore, + removal of nodes may not change the visible size as previously added + nodes suddenly become visible. + """ + peers = set(peers) + if not peers: + return + nheap = [] + for distance, node in self.heap: + if node.peer_id not in peers: + heapq.heappush(nheap, (distance, node)) + self.heap = nheap + + def get_node(self, node_id): + for _, node in self.heap: + if node.peer_id == node_id: + return node + return None + + def have_contacted_all(self): + return len(self.get_uncontacted()) == 0 + + def get_ids(self): + return [n.peer_id for n in self] + + def mark_contacted(self, node): + self.contacted.add(node.peer_id) + + def popleft(self): + return heapq.heappop(self.heap)[1] if self else None + + def push(self, nodes): + """ + Push nodes onto heap. + + @param nodes: This can be a single item or a C{list}. + """ + if not isinstance(nodes, list): + nodes = [nodes] + + for node in nodes: + if node not in self: + distance = self.node.distance_to(node) + heapq.heappush(self.heap, (distance, node)) + + def __len__(self): + return min(len(self.heap), self.maxsize) + + def __iter__(self): + nodes = heapq.nsmallest(self.maxsize, self.heap) + return iter(map(itemgetter(1), nodes)) + + def __contains__(self, node): + for _, other in self.heap: + if node.peer_id == other.peer_id: + return True + return False + + def get_uncontacted(self): + return [n for n in self if n.peer_id not in self.contacted] diff --git a/libp2p/kademlia/network.py b/libp2p/kademlia/network.py index bbfbad6..2933a7e 100644 --- a/libp2p/kademlia/network.py +++ b/libp2p/kademlia/network.py @@ -5,11 +5,14 @@ import random import pickle import asyncio import logging +from multiaddr import Multiaddr +from libp2p.peer.id import ID +from libp2p.peer.peerdata import PeerData from .protocol import KademliaProtocol from .utils import digest from .storage import ForgetfulStorage -from .node import Node +from .kad_peerinfo import KadPeerInfo from .crawling import ValueSpiderCrawl from .crawling import NodeSpiderCrawl @@ -39,7 +42,8 @@ class Server: self.ksize = ksize self.alpha = alpha self.storage = storage or ForgetfulStorage() - self.node = Node(node_id or digest(random.getrandbits(255))) + new_node_id = ID(node_id) if node_id else ID(digest(random.getrandbits(255))) + self.node = KadPeerInfo(new_node_id, None) self.transport = None self.protocol = None self.refresh_loop = None @@ -86,7 +90,7 @@ class Server: """ results = [] for node_id in self.protocol.get_refresh_ids(): - node = Node(node_id) + node = KadPeerInfo(node_id, None) nearest = self.protocol.router.find_neighbors(node, self.alpha) spider = NodeSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha) @@ -130,8 +134,12 @@ class Server: return await spider.find() async def bootstrap_node(self, addr): - result = await self.protocol.ping(addr, self.node.id) - return Node(result[1], addr[0], addr[1]) if result[0] else None + result = await self.protocol.ping(addr, self.node.peer_id) + node_id = ID(result[1]) + peer_data = PeerData() #pylint: disable=no-value-for-parameter + addr = [Multiaddr("/ip4/" + str(addr[0]) + "/udp/" + str(addr[1]))] + peer_data.add_addrs(addr) + return KadPeerInfo(node_id, peer_data) if result[0] else None async def get(self, key): """ @@ -145,7 +153,8 @@ class Server: # if this node has it, return it if self.storage.get(dkey) is not None: return self.storage.get(dkey) - node = Node(dkey) + + node = KadPeerInfo(ID(dkey)) nearest = self.protocol.router.find_neighbors(node) if not nearest: log.warning("There are no known neighbors to get key %s", key) @@ -171,7 +180,7 @@ class Server: Set the given SHA1 digest key (bytes) to the given value in the network. """ - node = Node(dkey) + node = KadPeerInfo(ID(dkey)) nearest = self.protocol.router.find_neighbors(node) if not nearest: @@ -201,7 +210,7 @@ class Server: data = { 'ksize': self.ksize, 'alpha': self.alpha, - 'id': self.node.id, + 'id': self.node.peer_id, 'neighbors': self.bootstrappable_neighbors() } if not data['neighbors']: diff --git a/libp2p/kademlia/node.py b/libp2p/kademlia/node.py index 2f087f5..251b5d8 100644 --- a/libp2p/kademlia/node.py +++ b/libp2p/kademlia/node.py @@ -1,113 +1,113 @@ -from operator import itemgetter -import heapq +# from operator import itemgetter +# import heapq -class Node: - def __init__(self, node_id, ip=None, port=None): - self.id = node_id # pylint: disable=invalid-name - self.ip = ip # pylint: disable=invalid-name - self.port = port - self.long_id = int(node_id.hex(), 16) +# class Node: +# def __init__(self, node_id, ip=None, port=None): +# self.id = node_id # pylint: disable=invalid-name +# self.ip = ip # pylint: disable=invalid-name +# self.port = port +# self.long_id = int(node_id.hex(), 16) - def same_home_as(self, node): - return self.ip == node.ip and self.port == node.port +# def same_home_as(self, node): +# return self.ip == node.ip and self.port == node.port - def distance_to(self, node): - """ - Get the distance between this node and another. - """ - return self.long_id ^ node.long_id +# def distance_to(self, node): +# """ +# Get the distance between this node and another. +# """ +# return self.long_id ^ node.long_id - def __iter__(self): - """ - Enables use of Node as a tuple - i.e., tuple(node) works. - """ - return iter([self.id, self.ip, self.port]) +# def __iter__(self): +# """ +# Enables use of Node as a tuple - i.e., tuple(node) works. +# """ +# return iter([self.id, self.ip, self.port]) - def __repr__(self): - return repr([self.long_id, self.ip, self.port]) +# def __repr__(self): +# return repr([self.long_id, self.ip, self.port]) - def __str__(self): - return "%s:%s" % (self.ip, str(self.port)) +# def __str__(self): +# return "%s:%s" % (self.ip, str(self.port)) -class NodeHeap: - """ - A heap of nodes ordered by distance to a given node. - """ - def __init__(self, node, maxsize): - """ - Constructor. +# class NodeHeap: +# """ +# A heap of nodes ordered by distance to a given node. +# """ +# def __init__(self, node, maxsize): +# """ +# Constructor. - @param node: The node to measure all distnaces from. - @param maxsize: The maximum size that this heap can grow to. - """ - self.node = node - self.heap = [] - self.contacted = set() - self.maxsize = maxsize +# @param node: The node to measure all distnaces from. +# @param maxsize: The maximum size that this heap can grow to. +# """ +# self.node = node +# self.heap = [] +# self.contacted = set() +# self.maxsize = maxsize - def remove(self, peers): - """ - Remove a list of peer ids from this heap. Note that while this - heap retains a constant visible size (based on the iterator), it's - actual size may be quite a bit larger than what's exposed. Therefore, - removal of nodes may not change the visible size as previously added - nodes suddenly become visible. - """ - peers = set(peers) - if not peers: - return - nheap = [] - for distance, node in self.heap: - if node.id not in peers: - heapq.heappush(nheap, (distance, node)) - self.heap = nheap +# def remove(self, peers): +# """ +# Remove a list of peer ids from this heap. Note that while this +# heap retains a constant visible size (based on the iterator), it's +# actual size may be quite a bit larger than what's exposed. Therefore, +# removal of nodes may not change the visible size as previously added +# nodes suddenly become visible. +# """ +# peers = set(peers) +# if not peers: +# return +# nheap = [] +# for distance, node in self.heap: +# if node.id not in peers: +# heapq.heappush(nheap, (distance, node)) +# self.heap = nheap - def get_node(self, node_id): - for _, node in self.heap: - if node.id == node_id: - return node - return None +# def get_node(self, node_id): +# for _, node in self.heap: +# if node.id == node_id: +# return node +# return None - def have_contacted_all(self): - return len(self.get_uncontacted()) == 0 +# def have_contacted_all(self): +# return len(self.get_uncontacted()) == 0 - def get_ids(self): - return [n.id for n in self] +# def get_ids(self): +# return [n.id for n in self] - def mark_contacted(self, node): - self.contacted.add(node.id) +# def mark_contacted(self, node): +# self.contacted.add(node.id) - def popleft(self): - return heapq.heappop(self.heap)[1] if self else None +# def popleft(self): +# return heapq.heappop(self.heap)[1] if self else None - def push(self, nodes): - """ - Push nodes onto heap. +# def push(self, nodes): +# """ +# Push nodes onto heap. - @param nodes: This can be a single item or a C{list}. - """ - if not isinstance(nodes, list): - nodes = [nodes] +# @param nodes: This can be a single item or a C{list}. +# """ +# if not isinstance(nodes, list): +# nodes = [nodes] - for node in nodes: - if node not in self: - distance = self.node.distance_to(node) - heapq.heappush(self.heap, (distance, node)) +# for node in nodes: +# if node not in self: +# distance = self.node.distance_to(node) +# heapq.heappush(self.heap, (distance, node)) - def __len__(self): - return min(len(self.heap), self.maxsize) +# def __len__(self): +# return min(len(self.heap), self.maxsize) - def __iter__(self): - nodes = heapq.nsmallest(self.maxsize, self.heap) - return iter(map(itemgetter(1), nodes)) +# def __iter__(self): +# nodes = heapq.nsmallest(self.maxsize, self.heap) +# return iter(map(itemgetter(1), nodes)) - def __contains__(self, node): - for _, other in self.heap: - if node.id == other.id: - return True - return False +# def __contains__(self, node): +# for _, other in self.heap: +# if node.id == other.id: +# return True +# return False - def get_uncontacted(self): - return [n for n in self if n.id not in self.contacted] +# def get_uncontacted(self): +# return [n for n in self if n.id not in self.contacted] diff --git a/libp2p/kademlia/protocol.py b/libp2p/kademlia/protocol.py index 5273230..b43098c 100644 --- a/libp2p/kademlia/protocol.py +++ b/libp2p/kademlia/protocol.py @@ -4,7 +4,7 @@ import logging from rpcudp.protocol import RPCProtocol -from .node import Node +from .kad_peerinfo import KadPeerInfo from .routing import RoutingTable from .utils import digest @@ -47,12 +47,28 @@ class KademliaProtocol(RPCProtocol): return sender def rpc_ping(self, sender, nodeid): - source = Node(nodeid, sender[0], sender[1]) + print ("RPC PING") + print (sender) + + node_id = ID(nodeid) + peer_data = PeerData() #pylint: disable=no-value-for-parameter + addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] + peer_data.add_addrs(addr) + source = KadPeerInfo(node_id, peer_data) + self.welcome_if_new(source) - return self.source_node.id + return self.source_node.peer_id def rpc_store(self, sender, nodeid, key, value): - source = Node(nodeid, sender[0], sender[1]) + print ("RPC STORE") + print (sender) + + node_id = ID(nodeid) + peer_data = PeerData() #pylint: disable=no-value-for-parameter + addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] + peer_data.add_addrs(addr) + source = KadPeerInfo(node_id, peer_data) + self.welcome_if_new(source) log.debug("got a store request from %s, storing '%s'='%s'", sender, key.hex(), value) @@ -62,14 +78,29 @@ class KademliaProtocol(RPCProtocol): def rpc_find_node(self, sender, nodeid, key): log.info("finding neighbors of %i in local table", int(nodeid.hex(), 16)) - source = Node(nodeid, sender[0], sender[1]) + + node_id = ID(nodeid) + peer_data = PeerData() #pylint: disable=no-value-for-parameter + addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] + peer_data.add_addrs(addr) + source = KadPeerInfo(node_id, peer_data) + + # source = Node(nodeid, sender[0], sender[1]) self.welcome_if_new(source) - node = Node(key) + node = KadPeerInfo(ID(key)) neighbors = self.router.find_neighbors(node, exclude=source) return list(map(tuple, neighbors)) def rpc_find_value(self, sender, nodeid, key): - source = Node(nodeid, sender[0], sender[1]) + print ("RPC_FIND_VALUE") + print (sender) + + node_id = ID(nodeid) + peer_data = PeerData() #pylint: disable=no-value-for-parameter + addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] + peer_data.add_addrs(addr) + source = KadPeerInfo(node_id, peer_data) + self.welcome_if_new(source) value = self.storage.get(key, None) if value is None: @@ -78,24 +109,24 @@ class KademliaProtocol(RPCProtocol): async def call_find_node(self, node_to_ask, node_to_find): address = (node_to_ask.ip, node_to_ask.port) - result = await self.find_node(address, self.source_node.id, - node_to_find.id) + result = await self.find_node(address, self.source_node.peer_id, + node_to_find.peer_id) return self.handle_call_response(result, node_to_ask) async def call_find_value(self, node_to_ask, node_to_find): address = (node_to_ask.ip, node_to_ask.port) - result = await self.find_value(address, self.source_node.id, - node_to_find.id) + result = await self.find_value(address, self.source_node.peer_id, + node_to_find.peer_id) return self.handle_call_response(result, node_to_ask) async def call_ping(self, node_to_ask): address = (node_to_ask.ip, node_to_ask.port) - result = await self.ping(address, self.source_node.id) + result = await self.ping(address, self.source_node.peer_id) return self.handle_call_response(result, node_to_ask) async def call_store(self, node_to_ask, key, value): address = (node_to_ask.ip, node_to_ask.port) - result = await self.store(address, self.source_node.id, key, value) + result = await self.store(address, self.source_node.peer_id, key, value) return self.handle_call_response(result, node_to_ask) def welcome_if_new(self, node): @@ -117,7 +148,7 @@ class KademliaProtocol(RPCProtocol): log.info("never seen %s before, adding to router", node) for key, value in self.storage: - keynode = Node(digest(key)) + keynode = KadPeerInfo(ID(digest(key))) neighbors = self.router.find_neighbors(keynode) if neighbors: last = neighbors[-1].distance_to(keynode) From c03f2f63d203bdeffae732bee85551e984573f63 Mon Sep 17 00:00:00 2001 From: zixuanzh Date: Fri, 19 Apr 2019 15:26:30 -0400 Subject: [PATCH 2/7] replace id with peer_id --- libp2p/kademlia/routing.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/libp2p/kademlia/routing.py b/libp2p/kademlia/routing.py index 3f88e8b..7d97494 100644 --- a/libp2p/kademlia/routing.py +++ b/libp2p/kademlia/routing.py @@ -34,24 +34,24 @@ class KBucket: two = KBucket(midpoint + 1, self.range[1], self.ksize) for node in self.nodes.values(): bucket = one if node.long_id <= midpoint else two - bucket.nodes[node.id] = node + bucket.nodes[node.peer_id] = node return (one, two) def remove_node(self, node): - if node.id not in self.nodes: + if node.peer_id not in self.nodes: return # delete node, and see if we can add a replacement - del self.nodes[node.id] + del self.nodes[node.peer_id] if self.replacement_nodes: newnode = self.replacement_nodes.pop() - self.nodes[newnode.id] = newnode + self.nodes[newnode.peer_id] = newnode def has_in_range(self, node): return self.range[0] <= node.long_id <= self.range[1] def is_new_node(self, node): - return node.id not in self.nodes + return node.peer_id not in self.nodes def add_node(self, node): """ @@ -61,11 +61,11 @@ class KBucket: If the bucket is full, keep track of node in a replacement list, per section 4.1 of the paper. """ - if node.id in self.nodes: - del self.nodes[node.id] - self.nodes[node.id] = node + if node.peer_id in self.nodes: + del self.nodes[node.peer_id] + self.nodes[node.peer_id] = node elif len(self) < self.ksize: - self.nodes[node.id] = node + self.nodes[node.peer_id] = node else: self.replacement_nodes.push(node) return False @@ -73,7 +73,7 @@ class KBucket: def depth(self): vals = self.nodes.values() - sprefix = shared_prefix([bytes_to_bit_string(n.id) for n in vals]) + sprefix = shared_prefix([bytes_to_bit_string(n.peer_id) for n in vals]) return len(sprefix) def head(self): @@ -185,7 +185,7 @@ class RoutingTable: nodes = [] for neighbor in TableTraverser(self, node): notexcluded = exclude is None or not neighbor.same_home_as(exclude) - if neighbor.id != node.id and notexcluded: + if neighbor.peer_id != node.peer_id and notexcluded: heapq.heappush(nodes, (node.distance_to(neighbor), neighbor)) if len(nodes) == k: break From fb687dad0933b237e95fa71a423b325997b84a13 Mon Sep 17 00:00:00 2001 From: zixuanzh Date: Fri, 19 Apr 2019 16:29:15 -0400 Subject: [PATCH 3/7] refactor peerinfo --- libp2p/kademlia/kad_peerinfo.py | 11 +++++++++-- libp2p/kademlia/protocol.py | 8 +++++++- libp2p/kademlia/routing.py | 12 ++++++++++++ libp2p/peer/peerinfo.py | 4 ++-- 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/libp2p/kademlia/kad_peerinfo.py b/libp2p/kademlia/kad_peerinfo.py index d227b11..6524fc2 100644 --- a/libp2p/kademlia/kad_peerinfo.py +++ b/libp2p/kademlia/kad_peerinfo.py @@ -1,12 +1,19 @@ import heapq +import multihash from operator import itemgetter from libp2p.peer.peerinfo import PeerInfo class KadPeerInfo(PeerInfo): - def __init__(self, peer_id, peer_data): + def __init__(self, peer_id, peer_data=None): super(KadPeerInfo, self).__init__(peer_id, peer_data) - self.long_id = int(peer_id.hex(), 16) + print ("Kad Peer Info") + print (peer_id) + print (peer_data) + sha1 = multihash.Func.sha1 + mh_digest = multihash.digest(peer_id.pretty().encode('utf-8'), sha1) + self.peer_id = peer_id.pretty() + self.long_id = int.from_bytes(mh_digest.encode(), byteorder='big') def same_home_as(self, node): #TODO: handle more than one addr diff --git a/libp2p/kademlia/protocol.py b/libp2p/kademlia/protocol.py index b43098c..d1936f5 100644 --- a/libp2p/kademlia/protocol.py +++ b/libp2p/kademlia/protocol.py @@ -2,8 +2,10 @@ import random import asyncio import logging +from multiaddr import Multiaddr from rpcudp.protocol import RPCProtocol - +from libp2p.peer.id import ID +from libp2p.peer.peerdata import PeerData from .kad_peerinfo import KadPeerInfo from .routing import RoutingTable from .utils import digest @@ -143,6 +145,10 @@ class KademliaProtocol(RPCProtocol): is closer than the closest in that list, then store the key/value on the new node (per section 2.5 of the paper) """ + + print ("Welcome if new") + print (node) + if not self.router.is_new_node(node): return diff --git a/libp2p/kademlia/routing.py b/libp2p/kademlia/routing.py index 7d97494..e4b49d0 100644 --- a/libp2p/kademlia/routing.py +++ b/libp2p/kademlia/routing.py @@ -151,7 +151,11 @@ class RoutingTable: self.buckets[index].remove_node(node) def is_new_node(self, node): + print ("IN IS NEW NODE") + print (node) + print (self.buckets) index = self.get_bucket_for(node) + print (index) return self.buckets[index].is_new_node(node) def add_contact(self, node): @@ -174,7 +178,15 @@ class RoutingTable: """ Get the index of the bucket that the given node would fall into. """ + print ("IN GET BUKCKET FOR") + print (node) + print (node.long_id) + print (self.buckets) for index, bucket in enumerate(self.buckets): + print ("IN ENUMERATE") + print (index) + print (bucket) + print (bucket.range) if node.long_id < bucket.range[1]: return index # we should never be here, but make linter happy diff --git a/libp2p/peer/peerinfo.py b/libp2p/peer/peerinfo.py index 44cba56..2e7edb9 100644 --- a/libp2p/peer/peerinfo.py +++ b/libp2p/peer/peerinfo.py @@ -7,9 +7,9 @@ from .peerdata import PeerData class PeerInfo: # pylint: disable=too-few-public-methods - def __init__(self, peer_id, peer_data): + def __init__(self, peer_id, peer_data=None): self.peer_id = peer_id - self.addrs = peer_data.get_addrs() + self.addrs = peer_data.get_addrs() if peer_data else None def info_from_p2p_addr(addr): From 1512ae27a17f915eaeb04fc4fb0ebd8f24fa9b7b Mon Sep 17 00:00:00 2001 From: zixuanzh Date: Fri, 19 Apr 2019 20:00:23 -0400 Subject: [PATCH 4/7] fix all tests --- libp2p/kademlia/crawling.py | 16 ++++- libp2p/kademlia/kad_peerinfo.py | 31 +++++---- libp2p/kademlia/node.py | 113 -------------------------------- libp2p/kademlia/protocol.py | 13 ---- libp2p/kademlia/routing.py | 12 ---- 5 files changed, 34 insertions(+), 151 deletions(-) delete mode 100644 libp2p/kademlia/node.py diff --git a/libp2p/kademlia/crawling.py b/libp2p/kademlia/crawling.py index a1756af..2c6a618 100644 --- a/libp2p/kademlia/crawling.py +++ b/libp2p/kademlia/crawling.py @@ -1,8 +1,10 @@ from collections import Counter import logging -from .kad_peerinfo import KadPeerInfo, KadPeerHeap +from multiaddr import Multiaddr from libp2p.peer.id import ID +from libp2p.peer.peerdata import PeerData +from .kad_peerinfo import KadPeerInfo, KadPeerHeap from .utils import gather_dict @@ -184,4 +186,14 @@ class RPCFindResponse: be set. """ nodelist = self.response[1] or [] - return [KadPeerInfo(ID(*nodeple)) for nodeple in nodelist] + output = [] + for nodeple in nodelist: + #TODO check if nodeple is of the right format + # node_id, ip, port + node_id = ID(nodeple[0]) + peer_data = PeerData() #pylint: disable=no-value-for-parameter + addr = [Multiaddr("/ip4/" + str(nodeple[1]) + "/udp/" + str(nodeple[2]))] + peer_data.add_addrs(addr) + output.append(KadPeerInfo(node_id, peer_data)) + + return output diff --git a/libp2p/kademlia/kad_peerinfo.py b/libp2p/kademlia/kad_peerinfo.py index 6524fc2..91ba939 100644 --- a/libp2p/kademlia/kad_peerinfo.py +++ b/libp2p/kademlia/kad_peerinfo.py @@ -1,19 +1,28 @@ import heapq -import multihash + from operator import itemgetter from libp2p.peer.peerinfo import PeerInfo +from .utils import digest +P_IP = "ip4" +P_UDP = "udp" class KadPeerInfo(PeerInfo): def __init__(self, peer_id, peer_data=None): super(KadPeerInfo, self).__init__(peer_id, peer_data) - print ("Kad Peer Info") - print (peer_id) - print (peer_data) - sha1 = multihash.Func.sha1 - mh_digest = multihash.digest(peer_id.pretty().encode('utf-8'), sha1) - self.peer_id = peer_id.pretty() - self.long_id = int.from_bytes(mh_digest.encode(), byteorder='big') + + # pylint: disable=protected-access + self.peer_id = peer_id._id_str + self.long_id = int(digest(peer_id._id_str).hex(), 16) + + self.addrs = peer_data.get_addrs() if peer_data else None + + # pylint: disable=invalid-name + self.ip = self.addrs[0].value_for_protocol(P_IP)\ + if peer_data else None + self.port = int(self.addrs[0].value_for_protocol(P_UDP))\ + if peer_data else None + def same_home_as(self, node): #TODO: handle more than one addr @@ -29,13 +38,13 @@ class KadPeerInfo(PeerInfo): """ Enables use of Node as a tuple - i.e., tuple(node) works. """ - return iter([self.peer_id.pretty(), str(self.addrs[0])]) + return iter([self.peer_id, self.ip, self.port]) def __repr__(self): - return repr([self.long_id, str(self.addrs[0])]) + return repr([self.long_id, self.ip, self.port]) def __str__(self): - return str(self.addrs[0]) + return "%s:%s" % (self.ip, str(self.port)) class KadPeerHeap: """ diff --git a/libp2p/kademlia/node.py b/libp2p/kademlia/node.py deleted file mode 100644 index 251b5d8..0000000 --- a/libp2p/kademlia/node.py +++ /dev/null @@ -1,113 +0,0 @@ -# from operator import itemgetter -# import heapq - - -# class Node: -# def __init__(self, node_id, ip=None, port=None): -# self.id = node_id # pylint: disable=invalid-name -# self.ip = ip # pylint: disable=invalid-name -# self.port = port -# self.long_id = int(node_id.hex(), 16) - -# def same_home_as(self, node): -# return self.ip == node.ip and self.port == node.port - -# def distance_to(self, node): -# """ -# Get the distance between this node and another. -# """ -# return self.long_id ^ node.long_id - -# def __iter__(self): -# """ -# Enables use of Node as a tuple - i.e., tuple(node) works. -# """ -# return iter([self.id, self.ip, self.port]) - -# def __repr__(self): -# return repr([self.long_id, self.ip, self.port]) - -# def __str__(self): -# return "%s:%s" % (self.ip, str(self.port)) - - -# class NodeHeap: -# """ -# A heap of nodes ordered by distance to a given node. -# """ -# def __init__(self, node, maxsize): -# """ -# Constructor. - -# @param node: The node to measure all distnaces from. -# @param maxsize: The maximum size that this heap can grow to. -# """ -# self.node = node -# self.heap = [] -# self.contacted = set() -# self.maxsize = maxsize - -# def remove(self, peers): -# """ -# Remove a list of peer ids from this heap. Note that while this -# heap retains a constant visible size (based on the iterator), it's -# actual size may be quite a bit larger than what's exposed. Therefore, -# removal of nodes may not change the visible size as previously added -# nodes suddenly become visible. -# """ -# peers = set(peers) -# if not peers: -# return -# nheap = [] -# for distance, node in self.heap: -# if node.id not in peers: -# heapq.heappush(nheap, (distance, node)) -# self.heap = nheap - -# def get_node(self, node_id): -# for _, node in self.heap: -# if node.id == node_id: -# return node -# return None - -# def have_contacted_all(self): -# return len(self.get_uncontacted()) == 0 - -# def get_ids(self): -# return [n.id for n in self] - -# def mark_contacted(self, node): -# self.contacted.add(node.id) - -# def popleft(self): -# return heapq.heappop(self.heap)[1] if self else None - -# def push(self, nodes): -# """ -# Push nodes onto heap. - -# @param nodes: This can be a single item or a C{list}. -# """ -# if not isinstance(nodes, list): -# nodes = [nodes] - -# for node in nodes: -# if node not in self: -# distance = self.node.distance_to(node) -# heapq.heappush(self.heap, (distance, node)) - -# def __len__(self): -# return min(len(self.heap), self.maxsize) - -# def __iter__(self): -# nodes = heapq.nsmallest(self.maxsize, self.heap) -# return iter(map(itemgetter(1), nodes)) - -# def __contains__(self, node): -# for _, other in self.heap: -# if node.id == other.id: -# return True -# return False - -# def get_uncontacted(self): -# return [n for n in self if n.id not in self.contacted] diff --git a/libp2p/kademlia/protocol.py b/libp2p/kademlia/protocol.py index d1936f5..11400c0 100644 --- a/libp2p/kademlia/protocol.py +++ b/libp2p/kademlia/protocol.py @@ -49,9 +49,6 @@ class KademliaProtocol(RPCProtocol): return sender def rpc_ping(self, sender, nodeid): - print ("RPC PING") - print (sender) - node_id = ID(nodeid) peer_data = PeerData() #pylint: disable=no-value-for-parameter addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] @@ -62,9 +59,6 @@ class KademliaProtocol(RPCProtocol): return self.source_node.peer_id def rpc_store(self, sender, nodeid, key, value): - print ("RPC STORE") - print (sender) - node_id = ID(nodeid) peer_data = PeerData() #pylint: disable=no-value-for-parameter addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] @@ -94,9 +88,6 @@ class KademliaProtocol(RPCProtocol): return list(map(tuple, neighbors)) def rpc_find_value(self, sender, nodeid, key): - print ("RPC_FIND_VALUE") - print (sender) - node_id = ID(nodeid) peer_data = PeerData() #pylint: disable=no-value-for-parameter addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] @@ -145,10 +136,6 @@ class KademliaProtocol(RPCProtocol): is closer than the closest in that list, then store the key/value on the new node (per section 2.5 of the paper) """ - - print ("Welcome if new") - print (node) - if not self.router.is_new_node(node): return diff --git a/libp2p/kademlia/routing.py b/libp2p/kademlia/routing.py index e4b49d0..7d97494 100644 --- a/libp2p/kademlia/routing.py +++ b/libp2p/kademlia/routing.py @@ -151,11 +151,7 @@ class RoutingTable: self.buckets[index].remove_node(node) def is_new_node(self, node): - print ("IN IS NEW NODE") - print (node) - print (self.buckets) index = self.get_bucket_for(node) - print (index) return self.buckets[index].is_new_node(node) def add_contact(self, node): @@ -178,15 +174,7 @@ class RoutingTable: """ Get the index of the bucket that the given node would fall into. """ - print ("IN GET BUKCKET FOR") - print (node) - print (node.long_id) - print (self.buckets) for index, bucket in enumerate(self.buckets): - print ("IN ENUMERATE") - print (index) - print (bucket) - print (bucket.range) if node.long_id < bucket.range[1]: return index # we should never be here, but make linter happy From 69f9aa5f0ecd25c4b5e9bf9b0f8959d13597417b Mon Sep 17 00:00:00 2001 From: zixuanzh Date: Fri, 19 Apr 2019 20:44:17 -0400 Subject: [PATCH 5/7] refactor KadPeerInfo construction --- libp2p/kademlia/crawling.py | 17 ++-------------- libp2p/kademlia/kad_peerinfo.py | 15 ++++++++++++++ libp2p/kademlia/network.py | 21 ++++++------------- libp2p/kademlia/protocol.py | 36 ++++++++------------------------- 4 files changed, 31 insertions(+), 58 deletions(-) diff --git a/libp2p/kademlia/crawling.py b/libp2p/kademlia/crawling.py index 2c6a618..6a3b5fe 100644 --- a/libp2p/kademlia/crawling.py +++ b/libp2p/kademlia/crawling.py @@ -1,10 +1,7 @@ from collections import Counter import logging -from multiaddr import Multiaddr -from libp2p.peer.id import ID -from libp2p.peer.peerdata import PeerData -from .kad_peerinfo import KadPeerInfo, KadPeerHeap +from .kad_peerinfo import KadPeerHeap, create_kad_peerinfo from .utils import gather_dict @@ -186,14 +183,4 @@ class RPCFindResponse: be set. """ nodelist = self.response[1] or [] - output = [] - for nodeple in nodelist: - #TODO check if nodeple is of the right format - # node_id, ip, port - node_id = ID(nodeple[0]) - peer_data = PeerData() #pylint: disable=no-value-for-parameter - addr = [Multiaddr("/ip4/" + str(nodeple[1]) + "/udp/" + str(nodeple[2]))] - peer_data.add_addrs(addr) - output.append(KadPeerInfo(node_id, peer_data)) - - return output + return [create_kad_peerinfo(*nodeple) for nodeple in nodelist] diff --git a/libp2p/kademlia/kad_peerinfo.py b/libp2p/kademlia/kad_peerinfo.py index 91ba939..548dd70 100644 --- a/libp2p/kademlia/kad_peerinfo.py +++ b/libp2p/kademlia/kad_peerinfo.py @@ -1,7 +1,11 @@ import heapq +import random from operator import itemgetter +from multiaddr import Multiaddr from libp2p.peer.peerinfo import PeerInfo +from libp2p.peer.id import ID +from libp2p.peer.peerdata import PeerData from .utils import digest P_IP = "ip4" @@ -126,3 +130,14 @@ class KadPeerHeap: def get_uncontacted(self): return [n for n in self if n.peer_id not in self.contacted] + +def create_kad_peerinfo(raw_node_id=None, sender_ip=None, sender_port=None): + node_id = ID(raw_node_id) if raw_node_id else ID(digest(random.getrandbits(255))) + peer_data = None + if sender_ip and sender_port: + peer_data = PeerData() #pylint: disable=no-value-for-parameter + addr = [Multiaddr("/"+ P_IP +"/" + str(sender_ip) + "/"\ + + P_UDP + "/" + str(sender_port))] + peer_data.add_addrs(addr) + + return KadPeerInfo(node_id, peer_data) diff --git a/libp2p/kademlia/network.py b/libp2p/kademlia/network.py index 2933a7e..30215a0 100644 --- a/libp2p/kademlia/network.py +++ b/libp2p/kademlia/network.py @@ -1,18 +1,14 @@ """ Package for interacting on the network at a high level. """ -import random import pickle import asyncio import logging -from multiaddr import Multiaddr -from libp2p.peer.id import ID -from libp2p.peer.peerdata import PeerData from .protocol import KademliaProtocol from .utils import digest from .storage import ForgetfulStorage -from .kad_peerinfo import KadPeerInfo +from .kad_peerinfo import create_kad_peerinfo from .crawling import ValueSpiderCrawl from .crawling import NodeSpiderCrawl @@ -42,8 +38,7 @@ class Server: self.ksize = ksize self.alpha = alpha self.storage = storage or ForgetfulStorage() - new_node_id = ID(node_id) if node_id else ID(digest(random.getrandbits(255))) - self.node = KadPeerInfo(new_node_id, None) + self.node = create_kad_peerinfo(node_id) self.transport = None self.protocol = None self.refresh_loop = None @@ -90,7 +85,7 @@ class Server: """ results = [] for node_id in self.protocol.get_refresh_ids(): - node = KadPeerInfo(node_id, None) + node = create_kad_peerinfo(node_id) nearest = self.protocol.router.find_neighbors(node, self.alpha) spider = NodeSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha) @@ -135,11 +130,7 @@ class Server: async def bootstrap_node(self, addr): result = await self.protocol.ping(addr, self.node.peer_id) - node_id = ID(result[1]) - peer_data = PeerData() #pylint: disable=no-value-for-parameter - addr = [Multiaddr("/ip4/" + str(addr[0]) + "/udp/" + str(addr[1]))] - peer_data.add_addrs(addr) - return KadPeerInfo(node_id, peer_data) if result[0] else None + return create_kad_peerinfo(result[1], addr[0], addr[1]) if result[0] else None async def get(self, key): """ @@ -154,7 +145,7 @@ class Server: if self.storage.get(dkey) is not None: return self.storage.get(dkey) - node = KadPeerInfo(ID(dkey)) + node = create_kad_peerinfo(dkey) nearest = self.protocol.router.find_neighbors(node) if not nearest: log.warning("There are no known neighbors to get key %s", key) @@ -180,7 +171,7 @@ class Server: Set the given SHA1 digest key (bytes) to the given value in the network. """ - node = KadPeerInfo(ID(dkey)) + node = create_kad_peerinfo(dkey) nearest = self.protocol.router.find_neighbors(node) if not nearest: diff --git a/libp2p/kademlia/protocol.py b/libp2p/kademlia/protocol.py index 11400c0..b4bff32 100644 --- a/libp2p/kademlia/protocol.py +++ b/libp2p/kademlia/protocol.py @@ -2,13 +2,10 @@ import random import asyncio import logging -from multiaddr import Multiaddr from rpcudp.protocol import RPCProtocol -from libp2p.peer.id import ID -from libp2p.peer.peerdata import PeerData -from .kad_peerinfo import KadPeerInfo +from .kad_peerinfo import create_kad_peerinfo from .routing import RoutingTable -from .utils import digest + log = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -49,21 +46,13 @@ class KademliaProtocol(RPCProtocol): return sender def rpc_ping(self, sender, nodeid): - node_id = ID(nodeid) - peer_data = PeerData() #pylint: disable=no-value-for-parameter - addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] - peer_data.add_addrs(addr) - source = KadPeerInfo(node_id, peer_data) + source = create_kad_peerinfo(nodeid, sender[0], sender[1]) self.welcome_if_new(source) return self.source_node.peer_id def rpc_store(self, sender, nodeid, key, value): - node_id = ID(nodeid) - peer_data = PeerData() #pylint: disable=no-value-for-parameter - addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] - peer_data.add_addrs(addr) - source = KadPeerInfo(node_id, peer_data) + source = create_kad_peerinfo(nodeid, sender[0], sender[1]) self.welcome_if_new(source) log.debug("got a store request from %s, storing '%s'='%s'", @@ -74,25 +63,16 @@ class KademliaProtocol(RPCProtocol): def rpc_find_node(self, sender, nodeid, key): log.info("finding neighbors of %i in local table", int(nodeid.hex(), 16)) - - node_id = ID(nodeid) - peer_data = PeerData() #pylint: disable=no-value-for-parameter - addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] - peer_data.add_addrs(addr) - source = KadPeerInfo(node_id, peer_data) + source = create_kad_peerinfo(nodeid, sender[0], sender[1]) # source = Node(nodeid, sender[0], sender[1]) self.welcome_if_new(source) - node = KadPeerInfo(ID(key)) + node = create_kad_peerinfo(key) neighbors = self.router.find_neighbors(node, exclude=source) return list(map(tuple, neighbors)) def rpc_find_value(self, sender, nodeid, key): - node_id = ID(nodeid) - peer_data = PeerData() #pylint: disable=no-value-for-parameter - addr = [Multiaddr("/ip4/" + str(sender[0]) + "/udp/" + str(sender[1]))] - peer_data.add_addrs(addr) - source = KadPeerInfo(node_id, peer_data) + source = create_kad_peerinfo(nodeid, sender[0], sender[1]) self.welcome_if_new(source) value = self.storage.get(key, None) @@ -141,7 +121,7 @@ class KademliaProtocol(RPCProtocol): log.info("never seen %s before, adding to router", node) for key, value in self.storage: - keynode = KadPeerInfo(ID(digest(key))) + keynode = create_kad_peerinfo(key) neighbors = self.router.find_neighbors(keynode) if neighbors: last = neighbors[-1].distance_to(keynode) From dbe3fcbf5a4c9ee23f46574ce08d15349bb0febc Mon Sep 17 00:00:00 2001 From: Alex Haynes Date: Fri, 19 Apr 2019 21:00:55 -0400 Subject: [PATCH 6/7] updated same_home_as to work for variable number of addresses --- libp2p/kademlia/kad_peerinfo.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libp2p/kademlia/kad_peerinfo.py b/libp2p/kademlia/kad_peerinfo.py index 548dd70..cd0bc60 100644 --- a/libp2p/kademlia/kad_peerinfo.py +++ b/libp2p/kademlia/kad_peerinfo.py @@ -29,8 +29,7 @@ class KadPeerInfo(PeerInfo): def same_home_as(self, node): - #TODO: handle more than one addr - return self.addrs[0] == node.addrs[0] + return sorted(self.addrs) == sorted(node.addrs) def distance_to(self, node): """ From b0edc47b9aa64802b9a12cd86153ecb7288a5a96 Mon Sep 17 00:00:00 2001 From: Alex Haynes Date: Fri, 19 Apr 2019 21:04:18 -0400 Subject: [PATCH 7/7] removed commented out code --- libp2p/kademlia/protocol.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libp2p/kademlia/protocol.py b/libp2p/kademlia/protocol.py index b4bff32..b0eea2b 100644 --- a/libp2p/kademlia/protocol.py +++ b/libp2p/kademlia/protocol.py @@ -65,7 +65,6 @@ class KademliaProtocol(RPCProtocol): int(nodeid.hex(), 16)) source = create_kad_peerinfo(nodeid, sender[0], sender[1]) - # source = Node(nodeid, sender[0], sender[1]) self.welcome_if_new(source) node = create_kad_peerinfo(key) neighbors = self.router.find_neighbors(node, exclude=source)