algorithm-in-python/dataStructure/bTree.py

269 lines
8.5 KiB
Python
Raw Permalink Normal View History

2018-10-02 21:24:06 +08:00
''' mbinary
#########################################################################
# File : bTree.py
# Author: mbinary
# Mail: zhuheqin1@gmail.com
2019-01-31 12:09:46 +08:00
# Blog: https://mbinary.xyz
2018-10-02 21:24:06 +08:00
# Github: https://github.com/mbinary
# Created Time: 2018-08-29 12:49
# Description:
#########################################################################
'''
2020-04-15 12:28:20 +08:00
2018-08-29 15:52:02 +08:00
class node:
2020-04-15 12:28:20 +08:00
def __init__(self, keys=None, isLeaf=True, children=None):
if keys is None:
keys = []
if children is None:
children = []
2018-08-29 15:52:02 +08:00
self.keys = keys
2020-04-15 12:28:20 +08:00
self.isLeaf = isLeaf
2018-08-29 15:52:02 +08:00
self.children = []
2020-04-15 12:28:20 +08:00
def __getitem__(self, i):
2018-08-29 15:52:02 +08:00
return self.keys[i]
2020-04-15 12:28:20 +08:00
def __delitem__(self, i):
2018-08-29 15:52:02 +08:00
del self.keys[i]
2020-04-15 12:28:20 +08:00
def __setitem__(self, i, k):
2018-08-29 15:52:02 +08:00
self.keys[i] = k
2020-04-15 12:28:20 +08:00
2018-08-29 15:52:02 +08:00
def __len__(self):
return len(self.keys)
2020-04-15 12:28:20 +08:00
2018-08-29 15:52:02 +08:00
def __repr__(self):
return str(self.keys)
2020-04-15 12:28:20 +08:00
2018-08-29 15:52:02 +08:00
def __str__(self):
children = ','.join([str(nd.keys) for nd in self.children])
return f'keys: {self.keys}\nchildren: {children}\nisLeaf: {self.isLeaf}'
2020-04-15 12:28:20 +08:00
def getChd(self, i):
2018-08-29 15:52:02 +08:00
return self.children[i]
2020-04-15 12:28:20 +08:00
def delChd(self, i):
2018-08-29 15:52:02 +08:00
del self.children[i]
2020-04-15 12:28:20 +08:00
def setChd(self, i, chd):
2018-08-29 15:52:02 +08:00
self.children[i] = chd
2020-04-15 12:28:20 +08:00
def getChildren(self, begin=0, end=None):
if end is None:
return self.children[begin:]
2018-08-29 15:52:02 +08:00
return self.children[begin:end]
2020-04-15 12:28:20 +08:00
def findKey(self, key):
for i, k in enumerate(self.keys):
if k >= key:
2018-08-29 15:52:02 +08:00
return i
return len(self)
2020-04-15 12:28:20 +08:00
def update(self, keys=None, isLeaf=None, children=None):
if keys is not None:
self.keys = keys
if children is not None:
self.children = children
if isLeaf is not None:
self.isLeaf = isLeaf
def insert(self, i, key=None, nd=None):
if key is not None:
self.keys.insert(i, key)
if not self.isLeaf and nd is not None:
self.children.insert(i, nd)
def isLeafNode(self): return self.isLeaf
def split(self, prt, t):
2018-08-29 15:52:02 +08:00
# form new two nodes
k = self[t-1]
nd1 = node()
nd2 = node()
2020-04-15 12:28:20 +08:00
# note that t is 1 bigger than key index
nd1.keys, nd2.keys = self[:t-1], self[t:]
2018-08-29 15:52:02 +08:00
nd1.isLeaf = nd2.isLeaf = self.isLeaf
2020-04-15 12:28:20 +08:00
if not self.isLeaf:
2018-08-29 15:52:02 +08:00
# note that children index is one bigger than key index, and all children included
2020-04-15 12:28:20 +08:00
nd1.children, nd2.children = self.children[0:t], self.children[t:]
2018-08-29 15:52:02 +08:00
# connect them to parent
idx = prt.findKey(k)
2020-04-15 12:28:20 +08:00
if prt.children != []:
prt.children.remove(self) # remove the original node
prt.insert(idx, k, nd2)
prt.insert(idx, nd=nd1)
2018-08-29 15:52:02 +08:00
return prt
class bTree:
2020-04-15 12:28:20 +08:00
def __init__(self, degree=2):
2018-08-29 15:52:02 +08:00
self.root = node()
2020-04-15 12:28:20 +08:00
self.degree = degree
2018-08-29 15:52:02 +08:00
self.nodeNum = 1
self.keyNum = 0
2020-04-15 12:28:20 +08:00
def search(self, key, withpath=False):
2018-08-29 15:52:02 +08:00
nd = self.root
fathers = []
while True:
i = nd.findKey(key)
2020-04-15 12:28:20 +08:00
if i == len(nd):
fathers.append((nd, i-1, i))
else:
fathers.append((nd, i, i))
if i < len(nd) and nd[i] == key:
if withpath:
return nd, i, fathers
else:
return nd, i
if nd.isLeafNode():
if withpath:
return None, None, None
else:
return None, None
2018-08-29 15:52:02 +08:00
nd = nd.getChd(i)
2020-04-15 12:28:20 +08:00
def insert(self, key):
if len(self.root) == self.degree*2-1:
self.root = self.root.split(node(isLeaf=False), self.degree)
self.nodeNum += 2
2018-08-29 15:52:02 +08:00
nd = self.root
while True:
idx = nd.findKey(key)
2020-04-15 12:28:20 +08:00
if idx < len(nd) and nd[idx] == key:
return
2018-08-29 15:52:02 +08:00
if nd.isLeafNode():
2020-04-15 12:28:20 +08:00
nd.insert(idx, key)
self.keyNum += 1
2018-08-29 15:52:02 +08:00
return
else:
chd = nd.getChd(idx)
2020-04-15 12:28:20 +08:00
# ensure its keys won't excess when its chd split and u
if len(chd) == self.degree*2-1:
nd = chd.split(nd, self.degree)
self.nodeNum += 1
2018-08-29 15:52:02 +08:00
else:
nd = chd
2020-04-15 12:28:20 +08:00
def delete(self, key): # to do
2018-08-29 15:52:02 +08:00
'''search the key, delete it , and form down to up to rebalance it '''
2020-04-15 12:28:20 +08:00
nd, idx, fathers = self.search(key, withpath=True)
if nd is None:
return
2018-08-29 15:52:02 +08:00
del nd[idx]
2020-04-15 12:28:20 +08:00
self.keyNum -= 1
2018-08-29 15:52:02 +08:00
if not nd.isLeafNode():
2020-04-15 12:28:20 +08:00
chd = nd.getChd(idx) # find the predecessor key
while not chd.isLeafNode():
fathers.append((chd, len(chd)-1, len(chd)))
2018-08-29 15:52:02 +08:00
chd = chd.getChd(-1)
2020-04-15 12:28:20 +08:00
fathers.append((chd, len(chd)-1, len(chd)))
nd.insert(idx, chd[-1])
2018-08-29 15:52:02 +08:00
del chd[-1]
2020-04-15 12:28:20 +08:00
if len(fathers) > 1:
self.rebalance(fathers)
def rebalance(self, fathers):
nd, keyIdx, chdIdx = fathers.pop()
while len(nd) < self.degree-1: # rebalance tree from down to up
prt, keyIdx, chdIdx = fathers[-1]
lbro = [] if chdIdx == 0 else prt.getChd(chdIdx-1)
rbro = [] if chdIdx == len(prt) else prt.getChd(chdIdx+1)
if len(lbro) < self.degree and len(rbro) < self.degree: # merge two deficient nodes
beforeNode, afterNode = None, None
if lbro == []:
2018-08-29 15:52:02 +08:00
keyIdx = chdIdx
2020-04-15 12:28:20 +08:00
beforeNode, afterNode = nd, rbro
2018-08-29 15:52:02 +08:00
else:
2020-04-15 12:28:20 +08:00
beforeNode, afterNode = lbro, nd
keyIdx = chdIdx-1 # important, when choosing
2018-08-29 15:52:02 +08:00
keys = beforeNode[:]+[prt[keyIdx]]+afterNode[:]
children = beforeNode.getChildren() + afterNode.getChildren()
isLeaf = beforeNode.isLeafNode()
prt.delChd(keyIdx+1)
del prt[keyIdx]
2020-04-15 12:28:20 +08:00
nd.update(keys, isLeaf, children)
prt.children[keyIdx] = nd
self.nodeNum -= 1
elif len(lbro) >= self.degree: # rotate when only one sibling is deficient
2018-08-29 15:52:02 +08:00
keyIdx = chdIdx-1
2020-04-15 12:28:20 +08:00
nd.insert(0, prt[keyIdx]) # rotate keys
prt[keyIdx] = lbro[-1]
2018-08-29 15:52:02 +08:00
del lbro[-1]
if not nd.isLeafNode(): # if not leaf, move children
2020-04-15 12:28:20 +08:00
nd.insert(0, nd=lbro.getChd(-1))
2018-08-29 15:52:02 +08:00
lbro.delChd(-1)
else:
keyIdx = chdIdx
2020-04-15 12:28:20 +08:00
nd.insert(len(nd), prt[keyIdx]) # rotate keys
prt[keyIdx] = rbro[0]
2018-08-29 15:52:02 +08:00
del rbro[0]
if not nd.isLeafNode(): # if not leaf, move children
2020-04-15 12:28:20 +08:00
# note that insert(-1,ele) will make the ele be the last second one
nd.insert(len(nd), nd=rbro.getChd(0))
2018-08-29 15:52:02 +08:00
rbro.delChd(0)
2020-04-15 12:28:20 +08:00
if len(fathers) == 1:
if len(self.root) == 0:
2018-08-29 15:52:02 +08:00
self.root = nd
2020-04-15 12:28:20 +08:00
self.nodeNum -= 1
2018-08-29 15:52:02 +08:00
break
2020-04-15 12:28:20 +08:00
nd, i, j = fathers.pop()
2018-08-29 15:52:02 +08:00
def __str__(self):
2020-04-15 12:28:20 +08:00
head = '\n'+'-'*30+'B Tree'+'-'*30
tail = '-'*30+'the end'+'-'*30+'\n'
lst = [[head], [f'node num: {self.nodeNum}, key num: {self.keyNum}']]
2018-08-29 15:52:02 +08:00
cur = []
2020-04-15 12:28:20 +08:00
ndNum = 0
ndTotal = 1
2018-08-29 15:52:02 +08:00
que = [self.root]
2020-04-15 12:28:20 +08:00
while que != []:
2018-08-29 15:52:02 +08:00
nd = que.pop(0)
cur.append(repr(nd))
2020-04-15 12:28:20 +08:00
ndNum += 1
que += nd.getChildren()
if ndNum == ndTotal:
2018-08-29 15:52:02 +08:00
lst.append(cur)
cur = []
ndNum = 0
2020-04-15 12:28:20 +08:00
ndTotal = len(que)
2018-08-29 15:52:02 +08:00
lst.append([tail])
lst = [','.join(li) for li in lst]
return '\n'.join(lst)
2020-04-15 12:28:20 +08:00
def __iter__(self, nd=None):
if nd is None:
nd = self.root
2018-08-29 15:52:02 +08:00
que = [nd]
2020-04-15 12:28:20 +08:00
while que != []:
2018-08-29 15:52:02 +08:00
nd = que.pop(0)
yield nd
2020-04-15 12:28:20 +08:00
if nd.isLeafNode():
continue
2018-08-29 15:52:02 +08:00
for i in range(len(nd)+1):
que.append(nd.getChd(i))
2020-04-15 12:28:20 +08:00
if __name__ == '__main__':
2018-08-29 15:52:02 +08:00
bt = bTree()
2020-04-15 12:28:20 +08:00
from random import shuffle, sample
2018-08-29 15:52:02 +08:00
n = 20
lst = [i for i in range(n)]
shuffle(lst)
2020-04-15 12:28:20 +08:00
test = sample(lst, len(lst)//4)
2018-08-29 15:52:02 +08:00
print(f'building b-tree with {lst}')
for i in lst:
bt.insert(i)
2020-04-15 12:28:20 +08:00
# print(f'inserting {i})
# print(bt)
2018-08-29 15:52:02 +08:00
print(bt)
print(f'serching {test}')
for i in test:
2020-04-15 12:28:20 +08:00
nd, idx = bt.search(i)
2018-08-29 15:52:02 +08:00
print(f'node: {repr(nd)}[{idx}]== {i}')
for i in test:
print(f'deleting {i}')
bt.delete(i)
print(bt)