2018-10-02 21:24:06 +08:00
|
|
|
''' mbinary
|
|
|
|
#########################################################################
|
|
|
|
# File : bTree.py
|
|
|
|
# Author: mbinary
|
|
|
|
# Mail: zhuheqin1@gmail.com
|
2019-01-31 12:09:46 +08:00
|
|
|
# Blog: https://mbinary.xyz
|
2018-10-02 21:24:06 +08:00
|
|
|
# Github: https://github.com/mbinary
|
|
|
|
# Created Time: 2018-08-29 12:49
|
|
|
|
# Description:
|
|
|
|
#########################################################################
|
|
|
|
'''
|
|
|
|
|
2020-04-15 12:28:20 +08:00
|
|
|
|
2018-08-29 15:52:02 +08:00
|
|
|
class node:
|
2020-04-15 12:28:20 +08:00
|
|
|
def __init__(self, keys=None, isLeaf=True, children=None):
|
|
|
|
if keys is None:
|
|
|
|
keys = []
|
|
|
|
if children is None:
|
|
|
|
children = []
|
2018-08-29 15:52:02 +08:00
|
|
|
self.keys = keys
|
2020-04-15 12:28:20 +08:00
|
|
|
self.isLeaf = isLeaf
|
2018-08-29 15:52:02 +08:00
|
|
|
self.children = []
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def __getitem__(self, i):
|
2018-08-29 15:52:02 +08:00
|
|
|
return self.keys[i]
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def __delitem__(self, i):
|
2018-08-29 15:52:02 +08:00
|
|
|
del self.keys[i]
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def __setitem__(self, i, k):
|
2018-08-29 15:52:02 +08:00
|
|
|
self.keys[i] = k
|
2020-04-15 12:28:20 +08:00
|
|
|
|
2018-08-29 15:52:02 +08:00
|
|
|
def __len__(self):
|
|
|
|
return len(self.keys)
|
2020-04-15 12:28:20 +08:00
|
|
|
|
2018-08-29 15:52:02 +08:00
|
|
|
def __repr__(self):
|
|
|
|
return str(self.keys)
|
2020-04-15 12:28:20 +08:00
|
|
|
|
2018-08-29 15:52:02 +08:00
|
|
|
def __str__(self):
|
|
|
|
children = ','.join([str(nd.keys) for nd in self.children])
|
|
|
|
return f'keys: {self.keys}\nchildren: {children}\nisLeaf: {self.isLeaf}'
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def getChd(self, i):
|
2018-08-29 15:52:02 +08:00
|
|
|
return self.children[i]
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def delChd(self, i):
|
2018-08-29 15:52:02 +08:00
|
|
|
del self.children[i]
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def setChd(self, i, chd):
|
2018-08-29 15:52:02 +08:00
|
|
|
self.children[i] = chd
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def getChildren(self, begin=0, end=None):
|
|
|
|
if end is None:
|
|
|
|
return self.children[begin:]
|
2018-08-29 15:52:02 +08:00
|
|
|
return self.children[begin:end]
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def findKey(self, key):
|
|
|
|
for i, k in enumerate(self.keys):
|
|
|
|
if k >= key:
|
2018-08-29 15:52:02 +08:00
|
|
|
return i
|
|
|
|
return len(self)
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def update(self, keys=None, isLeaf=None, children=None):
|
|
|
|
if keys is not None:
|
|
|
|
self.keys = keys
|
|
|
|
if children is not None:
|
|
|
|
self.children = children
|
|
|
|
if isLeaf is not None:
|
|
|
|
self.isLeaf = isLeaf
|
|
|
|
|
|
|
|
def insert(self, i, key=None, nd=None):
|
|
|
|
if key is not None:
|
|
|
|
self.keys.insert(i, key)
|
|
|
|
if not self.isLeaf and nd is not None:
|
|
|
|
self.children.insert(i, nd)
|
|
|
|
|
|
|
|
def isLeafNode(self): return self.isLeaf
|
|
|
|
|
|
|
|
def split(self, prt, t):
|
2018-08-29 15:52:02 +08:00
|
|
|
# form new two nodes
|
|
|
|
k = self[t-1]
|
|
|
|
nd1 = node()
|
|
|
|
nd2 = node()
|
2020-04-15 12:28:20 +08:00
|
|
|
# note that t is 1 bigger than key index
|
|
|
|
nd1.keys, nd2.keys = self[:t-1], self[t:]
|
2018-08-29 15:52:02 +08:00
|
|
|
nd1.isLeaf = nd2.isLeaf = self.isLeaf
|
2020-04-15 12:28:20 +08:00
|
|
|
if not self.isLeaf:
|
2018-08-29 15:52:02 +08:00
|
|
|
# note that children index is one bigger than key index, and all children included
|
2020-04-15 12:28:20 +08:00
|
|
|
nd1.children, nd2.children = self.children[0:t], self.children[t:]
|
2018-08-29 15:52:02 +08:00
|
|
|
# connect them to parent
|
|
|
|
idx = prt.findKey(k)
|
2020-04-15 12:28:20 +08:00
|
|
|
if prt.children != []:
|
|
|
|
prt.children.remove(self) # remove the original node
|
|
|
|
prt.insert(idx, k, nd2)
|
|
|
|
prt.insert(idx, nd=nd1)
|
2018-08-29 15:52:02 +08:00
|
|
|
return prt
|
|
|
|
|
|
|
|
|
|
|
|
class bTree:
|
2020-04-15 12:28:20 +08:00
|
|
|
def __init__(self, degree=2):
|
2018-08-29 15:52:02 +08:00
|
|
|
self.root = node()
|
2020-04-15 12:28:20 +08:00
|
|
|
self.degree = degree
|
2018-08-29 15:52:02 +08:00
|
|
|
self.nodeNum = 1
|
|
|
|
self.keyNum = 0
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def search(self, key, withpath=False):
|
2018-08-29 15:52:02 +08:00
|
|
|
nd = self.root
|
|
|
|
fathers = []
|
|
|
|
while True:
|
|
|
|
i = nd.findKey(key)
|
2020-04-15 12:28:20 +08:00
|
|
|
if i == len(nd):
|
|
|
|
fathers.append((nd, i-1, i))
|
|
|
|
else:
|
|
|
|
fathers.append((nd, i, i))
|
|
|
|
if i < len(nd) and nd[i] == key:
|
|
|
|
if withpath:
|
|
|
|
return nd, i, fathers
|
|
|
|
else:
|
|
|
|
return nd, i
|
|
|
|
if nd.isLeafNode():
|
|
|
|
if withpath:
|
|
|
|
return None, None, None
|
|
|
|
else:
|
|
|
|
return None, None
|
2018-08-29 15:52:02 +08:00
|
|
|
nd = nd.getChd(i)
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def insert(self, key):
|
|
|
|
if len(self.root) == self.degree*2-1:
|
|
|
|
self.root = self.root.split(node(isLeaf=False), self.degree)
|
|
|
|
self.nodeNum += 2
|
2018-08-29 15:52:02 +08:00
|
|
|
nd = self.root
|
|
|
|
while True:
|
|
|
|
idx = nd.findKey(key)
|
2020-04-15 12:28:20 +08:00
|
|
|
if idx < len(nd) and nd[idx] == key:
|
|
|
|
return
|
2018-08-29 15:52:02 +08:00
|
|
|
if nd.isLeafNode():
|
2020-04-15 12:28:20 +08:00
|
|
|
nd.insert(idx, key)
|
|
|
|
self.keyNum += 1
|
2018-08-29 15:52:02 +08:00
|
|
|
return
|
|
|
|
else:
|
|
|
|
chd = nd.getChd(idx)
|
2020-04-15 12:28:20 +08:00
|
|
|
# ensure its keys won't excess when its chd split and u
|
|
|
|
if len(chd) == self.degree*2-1:
|
|
|
|
nd = chd.split(nd, self.degree)
|
|
|
|
self.nodeNum += 1
|
2018-08-29 15:52:02 +08:00
|
|
|
else:
|
|
|
|
nd = chd
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def delete(self, key): # to do
|
2018-08-29 15:52:02 +08:00
|
|
|
'''search the key, delete it , and form down to up to rebalance it '''
|
2020-04-15 12:28:20 +08:00
|
|
|
nd, idx, fathers = self.search(key, withpath=True)
|
|
|
|
if nd is None:
|
|
|
|
return
|
2018-08-29 15:52:02 +08:00
|
|
|
del nd[idx]
|
2020-04-15 12:28:20 +08:00
|
|
|
self.keyNum -= 1
|
2018-08-29 15:52:02 +08:00
|
|
|
if not nd.isLeafNode():
|
2020-04-15 12:28:20 +08:00
|
|
|
chd = nd.getChd(idx) # find the predecessor key
|
|
|
|
while not chd.isLeafNode():
|
|
|
|
fathers.append((chd, len(chd)-1, len(chd)))
|
2018-08-29 15:52:02 +08:00
|
|
|
chd = chd.getChd(-1)
|
2020-04-15 12:28:20 +08:00
|
|
|
fathers.append((chd, len(chd)-1, len(chd)))
|
|
|
|
nd.insert(idx, chd[-1])
|
2018-08-29 15:52:02 +08:00
|
|
|
del chd[-1]
|
2020-04-15 12:28:20 +08:00
|
|
|
if len(fathers) > 1:
|
|
|
|
self.rebalance(fathers)
|
|
|
|
|
|
|
|
def rebalance(self, fathers):
|
|
|
|
nd, keyIdx, chdIdx = fathers.pop()
|
|
|
|
while len(nd) < self.degree-1: # rebalance tree from down to up
|
|
|
|
prt, keyIdx, chdIdx = fathers[-1]
|
|
|
|
lbro = [] if chdIdx == 0 else prt.getChd(chdIdx-1)
|
|
|
|
rbro = [] if chdIdx == len(prt) else prt.getChd(chdIdx+1)
|
|
|
|
if len(lbro) < self.degree and len(rbro) < self.degree: # merge two deficient nodes
|
|
|
|
beforeNode, afterNode = None, None
|
|
|
|
if lbro == []:
|
2018-08-29 15:52:02 +08:00
|
|
|
keyIdx = chdIdx
|
2020-04-15 12:28:20 +08:00
|
|
|
beforeNode, afterNode = nd, rbro
|
2018-08-29 15:52:02 +08:00
|
|
|
else:
|
2020-04-15 12:28:20 +08:00
|
|
|
beforeNode, afterNode = lbro, nd
|
|
|
|
keyIdx = chdIdx-1 # important, when choosing
|
2018-08-29 15:52:02 +08:00
|
|
|
keys = beforeNode[:]+[prt[keyIdx]]+afterNode[:]
|
|
|
|
children = beforeNode.getChildren() + afterNode.getChildren()
|
|
|
|
isLeaf = beforeNode.isLeafNode()
|
|
|
|
prt.delChd(keyIdx+1)
|
|
|
|
del prt[keyIdx]
|
2020-04-15 12:28:20 +08:00
|
|
|
nd.update(keys, isLeaf, children)
|
|
|
|
prt.children[keyIdx] = nd
|
|
|
|
self.nodeNum -= 1
|
|
|
|
elif len(lbro) >= self.degree: # rotate when only one sibling is deficient
|
2018-08-29 15:52:02 +08:00
|
|
|
keyIdx = chdIdx-1
|
2020-04-15 12:28:20 +08:00
|
|
|
nd.insert(0, prt[keyIdx]) # rotate keys
|
|
|
|
prt[keyIdx] = lbro[-1]
|
2018-08-29 15:52:02 +08:00
|
|
|
del lbro[-1]
|
|
|
|
if not nd.isLeafNode(): # if not leaf, move children
|
2020-04-15 12:28:20 +08:00
|
|
|
nd.insert(0, nd=lbro.getChd(-1))
|
2018-08-29 15:52:02 +08:00
|
|
|
lbro.delChd(-1)
|
|
|
|
else:
|
|
|
|
keyIdx = chdIdx
|
2020-04-15 12:28:20 +08:00
|
|
|
nd.insert(len(nd), prt[keyIdx]) # rotate keys
|
|
|
|
prt[keyIdx] = rbro[0]
|
2018-08-29 15:52:02 +08:00
|
|
|
del rbro[0]
|
|
|
|
if not nd.isLeafNode(): # if not leaf, move children
|
2020-04-15 12:28:20 +08:00
|
|
|
# note that insert(-1,ele) will make the ele be the last second one
|
|
|
|
nd.insert(len(nd), nd=rbro.getChd(0))
|
2018-08-29 15:52:02 +08:00
|
|
|
rbro.delChd(0)
|
2020-04-15 12:28:20 +08:00
|
|
|
if len(fathers) == 1:
|
|
|
|
if len(self.root) == 0:
|
2018-08-29 15:52:02 +08:00
|
|
|
self.root = nd
|
2020-04-15 12:28:20 +08:00
|
|
|
self.nodeNum -= 1
|
2018-08-29 15:52:02 +08:00
|
|
|
break
|
2020-04-15 12:28:20 +08:00
|
|
|
nd, i, j = fathers.pop()
|
|
|
|
|
2018-08-29 15:52:02 +08:00
|
|
|
def __str__(self):
|
2020-04-15 12:28:20 +08:00
|
|
|
head = '\n'+'-'*30+'B Tree'+'-'*30
|
|
|
|
tail = '-'*30+'the end'+'-'*30+'\n'
|
|
|
|
lst = [[head], [f'node num: {self.nodeNum}, key num: {self.keyNum}']]
|
2018-08-29 15:52:02 +08:00
|
|
|
cur = []
|
2020-04-15 12:28:20 +08:00
|
|
|
ndNum = 0
|
|
|
|
ndTotal = 1
|
2018-08-29 15:52:02 +08:00
|
|
|
que = [self.root]
|
2020-04-15 12:28:20 +08:00
|
|
|
while que != []:
|
2018-08-29 15:52:02 +08:00
|
|
|
nd = que.pop(0)
|
|
|
|
cur.append(repr(nd))
|
2020-04-15 12:28:20 +08:00
|
|
|
ndNum += 1
|
|
|
|
que += nd.getChildren()
|
|
|
|
if ndNum == ndTotal:
|
2018-08-29 15:52:02 +08:00
|
|
|
lst.append(cur)
|
|
|
|
cur = []
|
|
|
|
ndNum = 0
|
2020-04-15 12:28:20 +08:00
|
|
|
ndTotal = len(que)
|
2018-08-29 15:52:02 +08:00
|
|
|
lst.append([tail])
|
|
|
|
lst = [','.join(li) for li in lst]
|
|
|
|
return '\n'.join(lst)
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
def __iter__(self, nd=None):
|
|
|
|
if nd is None:
|
|
|
|
nd = self.root
|
2018-08-29 15:52:02 +08:00
|
|
|
que = [nd]
|
2020-04-15 12:28:20 +08:00
|
|
|
while que != []:
|
2018-08-29 15:52:02 +08:00
|
|
|
nd = que.pop(0)
|
|
|
|
yield nd
|
2020-04-15 12:28:20 +08:00
|
|
|
if nd.isLeafNode():
|
|
|
|
continue
|
2018-08-29 15:52:02 +08:00
|
|
|
for i in range(len(nd)+1):
|
|
|
|
que.append(nd.getChd(i))
|
|
|
|
|
|
|
|
|
2020-04-15 12:28:20 +08:00
|
|
|
if __name__ == '__main__':
|
2018-08-29 15:52:02 +08:00
|
|
|
bt = bTree()
|
2020-04-15 12:28:20 +08:00
|
|
|
from random import shuffle, sample
|
2018-08-29 15:52:02 +08:00
|
|
|
n = 20
|
|
|
|
lst = [i for i in range(n)]
|
|
|
|
shuffle(lst)
|
2020-04-15 12:28:20 +08:00
|
|
|
test = sample(lst, len(lst)//4)
|
2018-08-29 15:52:02 +08:00
|
|
|
print(f'building b-tree with {lst}')
|
|
|
|
for i in lst:
|
|
|
|
bt.insert(i)
|
2020-04-15 12:28:20 +08:00
|
|
|
# print(f'inserting {i})
|
|
|
|
# print(bt)
|
2018-08-29 15:52:02 +08:00
|
|
|
print(bt)
|
|
|
|
print(f'serching {test}')
|
|
|
|
for i in test:
|
2020-04-15 12:28:20 +08:00
|
|
|
nd, idx = bt.search(i)
|
2018-08-29 15:52:02 +08:00
|
|
|
print(f'node: {repr(nd)}[{idx}]== {i}')
|
|
|
|
for i in test:
|
|
|
|
print(f'deleting {i}')
|
|
|
|
bt.delete(i)
|
|
|
|
print(bt)
|