algorithm-in-python/dataStructure/bTree.py
2020-04-15 12:28:20 +08:00

269 lines
8.5 KiB
Python

''' mbinary
#########################################################################
# File : bTree.py
# Author: mbinary
# Mail: zhuheqin1@gmail.com
# Blog: https://mbinary.xyz
# Github: https://github.com/mbinary
# Created Time: 2018-08-29 12:49
# Description:
#########################################################################
'''
class node:
def __init__(self, keys=None, isLeaf=True, children=None):
if keys is None:
keys = []
if children is None:
children = []
self.keys = keys
self.isLeaf = isLeaf
self.children = []
def __getitem__(self, i):
return self.keys[i]
def __delitem__(self, i):
del self.keys[i]
def __setitem__(self, i, k):
self.keys[i] = k
def __len__(self):
return len(self.keys)
def __repr__(self):
return str(self.keys)
def __str__(self):
children = ','.join([str(nd.keys) for nd in self.children])
return f'keys: {self.keys}\nchildren: {children}\nisLeaf: {self.isLeaf}'
def getChd(self, i):
return self.children[i]
def delChd(self, i):
del self.children[i]
def setChd(self, i, chd):
self.children[i] = chd
def getChildren(self, begin=0, end=None):
if end is None:
return self.children[begin:]
return self.children[begin:end]
def findKey(self, key):
for i, k in enumerate(self.keys):
if k >= key:
return i
return len(self)
def update(self, keys=None, isLeaf=None, children=None):
if keys is not None:
self.keys = keys
if children is not None:
self.children = children
if isLeaf is not None:
self.isLeaf = isLeaf
def insert(self, i, key=None, nd=None):
if key is not None:
self.keys.insert(i, key)
if not self.isLeaf and nd is not None:
self.children.insert(i, nd)
def isLeafNode(self): return self.isLeaf
def split(self, prt, t):
# form new two nodes
k = self[t-1]
nd1 = node()
nd2 = node()
# note that t is 1 bigger than key index
nd1.keys, nd2.keys = self[:t-1], self[t:]
nd1.isLeaf = nd2.isLeaf = self.isLeaf
if not self.isLeaf:
# note that children index is one bigger than key index, and all children included
nd1.children, nd2.children = self.children[0:t], self.children[t:]
# connect them to parent
idx = prt.findKey(k)
if prt.children != []:
prt.children.remove(self) # remove the original node
prt.insert(idx, k, nd2)
prt.insert(idx, nd=nd1)
return prt
class bTree:
def __init__(self, degree=2):
self.root = node()
self.degree = degree
self.nodeNum = 1
self.keyNum = 0
def search(self, key, withpath=False):
nd = self.root
fathers = []
while True:
i = nd.findKey(key)
if i == len(nd):
fathers.append((nd, i-1, i))
else:
fathers.append((nd, i, i))
if i < len(nd) and nd[i] == key:
if withpath:
return nd, i, fathers
else:
return nd, i
if nd.isLeafNode():
if withpath:
return None, None, None
else:
return None, None
nd = nd.getChd(i)
def insert(self, key):
if len(self.root) == self.degree*2-1:
self.root = self.root.split(node(isLeaf=False), self.degree)
self.nodeNum += 2
nd = self.root
while True:
idx = nd.findKey(key)
if idx < len(nd) and nd[idx] == key:
return
if nd.isLeafNode():
nd.insert(idx, key)
self.keyNum += 1
return
else:
chd = nd.getChd(idx)
# ensure its keys won't excess when its chd split and u
if len(chd) == self.degree*2-1:
nd = chd.split(nd, self.degree)
self.nodeNum += 1
else:
nd = chd
def delete(self, key): # to do
'''search the key, delete it , and form down to up to rebalance it '''
nd, idx, fathers = self.search(key, withpath=True)
if nd is None:
return
del nd[idx]
self.keyNum -= 1
if not nd.isLeafNode():
chd = nd.getChd(idx) # find the predecessor key
while not chd.isLeafNode():
fathers.append((chd, len(chd)-1, len(chd)))
chd = chd.getChd(-1)
fathers.append((chd, len(chd)-1, len(chd)))
nd.insert(idx, chd[-1])
del chd[-1]
if len(fathers) > 1:
self.rebalance(fathers)
def rebalance(self, fathers):
nd, keyIdx, chdIdx = fathers.pop()
while len(nd) < self.degree-1: # rebalance tree from down to up
prt, keyIdx, chdIdx = fathers[-1]
lbro = [] if chdIdx == 0 else prt.getChd(chdIdx-1)
rbro = [] if chdIdx == len(prt) else prt.getChd(chdIdx+1)
if len(lbro) < self.degree and len(rbro) < self.degree: # merge two deficient nodes
beforeNode, afterNode = None, None
if lbro == []:
keyIdx = chdIdx
beforeNode, afterNode = nd, rbro
else:
beforeNode, afterNode = lbro, nd
keyIdx = chdIdx-1 # important, when choosing
keys = beforeNode[:]+[prt[keyIdx]]+afterNode[:]
children = beforeNode.getChildren() + afterNode.getChildren()
isLeaf = beforeNode.isLeafNode()
prt.delChd(keyIdx+1)
del prt[keyIdx]
nd.update(keys, isLeaf, children)
prt.children[keyIdx] = nd
self.nodeNum -= 1
elif len(lbro) >= self.degree: # rotate when only one sibling is deficient
keyIdx = chdIdx-1
nd.insert(0, prt[keyIdx]) # rotate keys
prt[keyIdx] = lbro[-1]
del lbro[-1]
if not nd.isLeafNode(): # if not leaf, move children
nd.insert(0, nd=lbro.getChd(-1))
lbro.delChd(-1)
else:
keyIdx = chdIdx
nd.insert(len(nd), prt[keyIdx]) # rotate keys
prt[keyIdx] = rbro[0]
del rbro[0]
if not nd.isLeafNode(): # if not leaf, move children
# note that insert(-1,ele) will make the ele be the last second one
nd.insert(len(nd), nd=rbro.getChd(0))
rbro.delChd(0)
if len(fathers) == 1:
if len(self.root) == 0:
self.root = nd
self.nodeNum -= 1
break
nd, i, j = fathers.pop()
def __str__(self):
head = '\n'+'-'*30+'B Tree'+'-'*30
tail = '-'*30+'the end'+'-'*30+'\n'
lst = [[head], [f'node num: {self.nodeNum}, key num: {self.keyNum}']]
cur = []
ndNum = 0
ndTotal = 1
que = [self.root]
while que != []:
nd = que.pop(0)
cur.append(repr(nd))
ndNum += 1
que += nd.getChildren()
if ndNum == ndTotal:
lst.append(cur)
cur = []
ndNum = 0
ndTotal = len(que)
lst.append([tail])
lst = [','.join(li) for li in lst]
return '\n'.join(lst)
def __iter__(self, nd=None):
if nd is None:
nd = self.root
que = [nd]
while que != []:
nd = que.pop(0)
yield nd
if nd.isLeafNode():
continue
for i in range(len(nd)+1):
que.append(nd.getChd(i))
if __name__ == '__main__':
bt = bTree()
from random import shuffle, sample
n = 20
lst = [i for i in range(n)]
shuffle(lst)
test = sample(lst, len(lst)//4)
print(f'building b-tree with {lst}')
for i in lst:
bt.insert(i)
# print(f'inserting {i})
# print(bt)
print(bt)
print(f'serching {test}')
for i in test:
nd, idx = bt.search(i)
print(f'node: {repr(nd)}[{idx}]== {i}')
for i in test:
print(f'deleting {i}')
bt.delete(i)
print(bt)