2018-07-11 19:26:24 +08:00
|
|
|
''' mbinary
|
|
|
|
#########################################################################
|
|
|
|
# File : sunday.py
|
|
|
|
# Author: mbinary
|
|
|
|
# Mail: zhuheqin1@gmail.com
|
2019-01-31 12:09:46 +08:00
|
|
|
# Blog: https://mbinary.xyz
|
2018-07-11 19:26:24 +08:00
|
|
|
# Github: https://github.com/mbinary
|
|
|
|
# Created Time: 2018-07-11 15:26
|
|
|
|
# Description: 字符串模式匹配, sunday 算法, kmp 的改进
|
|
|
|
# pattern matching for strings using sunday algorithm
|
|
|
|
#########################################################################
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
def getPos(pattern):
|
|
|
|
dic = {}
|
2020-04-15 12:28:20 +08:00
|
|
|
for i, j in enumerate(pattern[::-1]):
|
2018-07-11 19:26:24 +08:00
|
|
|
if j not in dic:
|
2020-04-15 12:28:20 +08:00
|
|
|
dic[j] = i
|
2018-07-11 19:26:24 +08:00
|
|
|
return dic
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
|
|
|
|
def find(s, p):
|
2018-07-11 19:26:24 +08:00
|
|
|
dic = getPos(p)
|
|
|
|
ps = pp = 0
|
|
|
|
ns = len(s)
|
|
|
|
np = len(p)
|
2020-04-15 12:28:20 +08:00
|
|
|
while ps < ns and pp < np:
|
2018-07-11 19:26:24 +08:00
|
|
|
if s[ps] == p[pp]:
|
2020-04-15 12:28:20 +08:00
|
|
|
ps, pp = ps+1, pp+1
|
2018-07-11 19:26:24 +08:00
|
|
|
else:
|
2020-04-15 12:28:20 +08:00
|
|
|
idx = ps + np-pp
|
|
|
|
if idx >= ns:
|
|
|
|
return -1
|
2018-07-11 19:26:24 +08:00
|
|
|
ch = s[idx]
|
|
|
|
if ch in dic:
|
|
|
|
ps += dic[ch]+1-pp
|
|
|
|
else:
|
2018-12-11 15:28:05 +08:00
|
|
|
ps = idx+1
|
2018-07-11 19:26:24 +08:00
|
|
|
pp = 0
|
2020-04-15 12:28:20 +08:00
|
|
|
if pp == np:
|
|
|
|
return ps-np
|
2018-12-11 15:28:05 +08:00
|
|
|
else:
|
2018-07-11 19:26:24 +08:00
|
|
|
return -1
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
|
|
|
|
def findAll(s, p):
|
2018-12-11 15:28:05 +08:00
|
|
|
ns = len(s)
|
|
|
|
np = len(p)
|
|
|
|
i = 0
|
|
|
|
ret = []
|
|
|
|
while s:
|
2020-04-15 12:28:20 +08:00
|
|
|
print(s, p)
|
|
|
|
tmp = find(s, p)
|
|
|
|
if tmp == -1:
|
|
|
|
break
|
2018-12-11 15:28:05 +08:00
|
|
|
ret.append(i+tmp)
|
|
|
|
end = tmp+np
|
2020-04-15 12:28:20 +08:00
|
|
|
i += end
|
2018-12-11 15:28:05 +08:00
|
|
|
s = s[end:]
|
|
|
|
return ret
|
|
|
|
|
|
|
|
|
|
|
|
def randStr(n=3):
|
2020-04-15 12:28:20 +08:00
|
|
|
return [randint(ord('a'), ord('z')) for i in range(n)]
|
|
|
|
|
2018-12-11 15:28:05 +08:00
|
|
|
|
|
|
|
def test(n):
|
|
|
|
s = randStr(n)
|
|
|
|
p = randStr(3)
|
2018-07-11 19:26:24 +08:00
|
|
|
str_s = ''.join((chr(i) for i in s))
|
|
|
|
str_p = ''.join((chr(i) for i in p))
|
2020-04-15 12:28:20 +08:00
|
|
|
n1 = find(s, p)
|
|
|
|
n2 = str_s.find(str_p) # 利用已有的 str find 算法检验
|
|
|
|
if n1 != n2:
|
|
|
|
print(n1, n2, str_p, str_s)
|
2018-07-11 19:26:24 +08:00
|
|
|
return False
|
|
|
|
return True
|
2020-04-15 12:28:20 +08:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2018-07-11 19:26:24 +08:00
|
|
|
from random import randint
|
2018-12-11 15:28:05 +08:00
|
|
|
n = 1000
|
|
|
|
suc = sum(test(n) for i in range(n))
|
2020-04-15 12:28:20 +08:00
|
|
|
print('test {n} times, success {suc} times'.format(n=n, suc=suc))
|