mirror of
https://github.com/heqin-zhu/algorithm.git
synced 2024-03-22 13:30:46 +08:00
String matching algorithm, permutation algorithm
This commit is contained in:
parent
3b8fa1782b
commit
166cd2737b
|
@ -84,7 +84,7 @@ def genNum(n =10,upper=10):
|
|||
return nums.values()
|
||||
|
||||
def buildTree(n=10,nums=None,visitor=None):
|
||||
if nums is None or nums ==[]: nums = genNum(n)
|
||||
#if nums is None or nums ==[]: nums = genNum(n)
|
||||
tree = intervalTree()
|
||||
print(f'build a red-black tree using {nums}')
|
||||
for i in nums:
|
||||
|
@ -100,6 +100,7 @@ def testInsert(nums=None):
|
|||
print('-'*5+ 'in-order visit' + '-'*5)
|
||||
for i,j in enumerate(tree.sort()):
|
||||
print(f'{i+1}: {j}')
|
||||
return tree
|
||||
|
||||
def testSuc(nums=None):
|
||||
tree,nums = buildTree(nums=nums)
|
||||
|
@ -113,10 +114,16 @@ def testDelete(nums=None):
|
|||
print(f'deleting {i}')
|
||||
tree.delete(i[0])
|
||||
print(tree)
|
||||
return tree
|
||||
|
||||
if __name__=='__main__':
|
||||
lst = [(0,3),(5,8),(6,10),(26,26),(25,30),(8,9),(19,20),(15,23),(16,21),(17,19)]
|
||||
lst = None
|
||||
#lst = None
|
||||
#testSuc(lst)
|
||||
#testInsert(lst)
|
||||
testDelete(lst)
|
||||
tree = testInsert(lst)
|
||||
#tree,_= buildTree(lst)
|
||||
while 1:
|
||||
a =int( input('low:'))
|
||||
b =int( input('high:'))
|
||||
res = tree.search(a,b)
|
||||
print(res)
|
||||
|
|
|
@ -286,6 +286,7 @@ def buildTree(n=10,nums=None,visitor=None):
|
|||
print(f'build a red-black tree using {nums}')
|
||||
for i in nums:
|
||||
rbtree.insert(node(i))
|
||||
print(rbtree)
|
||||
if visitor:
|
||||
visitor(rbtree,i)
|
||||
return rbtree,nums
|
||||
|
|
|
@ -107,18 +107,18 @@ def test(f=minDistance_n2):
|
|||
print('result: {:.2f} {} {}\n'.format(minD, p,q))
|
||||
|
||||
def genData(n,unique=True):
|
||||
upper = 1000000
|
||||
if unique:
|
||||
points = set()
|
||||
for i in range(n):
|
||||
points.add(point(randint(1,1000),randint(1,1000)))
|
||||
points.add(point(randint(1,upper),randint(1,upper)))
|
||||
return list(points)
|
||||
else:return [point(randint(1,1000),randint(1,1000)) for i in range(n)]
|
||||
else:return [point(randint(1,upper),randint(1,upper)) for i in range(n)]
|
||||
|
||||
if __name__ =='__main__':
|
||||
n = 10000
|
||||
n = 1000
|
||||
points = genData(n, unique=True)
|
||||
print('min distance of {} points'.format(n))
|
||||
#print(sorted(points))
|
||||
test(minDistance_n2)
|
||||
test(minDistance_nlogn)
|
||||
|
||||
|
|
|
@ -1,8 +0,0 @@
|
|||
import Vec2d (Vec2d,getVal,setVal)
|
||||
|
||||
|
||||
lcs a b =
|
||||
let m = lenghth a
|
||||
n = length b
|
||||
rst = []
|
||||
in 1 --to do
|
|
@ -29,17 +29,22 @@ def lcs2(a,b):
|
|||
m,n= len(a),len(b)
|
||||
board = [[] for i in range(n+1)]
|
||||
for i in range(m):
|
||||
last = []
|
||||
upperLevel = board[0].copy()
|
||||
for j in range(n):
|
||||
tmp = board[j+1].copy()
|
||||
if a[i]==b[j]:
|
||||
board[j+1] =board[j]+[a[i]]
|
||||
elif len(board[j+1]) < len(last):
|
||||
board[j+1] = last
|
||||
last = board[j+1]
|
||||
board[j+1] = upperLevel+[a[i]]
|
||||
elif len(board[j+1]) < len(board[j]):
|
||||
board[j+1] = board[j].copy() # copy is needed
|
||||
upperLevel = tmp
|
||||
return board[n]
|
||||
|
||||
if __name__ =='__main__':
|
||||
a="dsaffqewqfqewregqwefqwe"
|
||||
b="adsfsfs3qt5yhyh24efwq"
|
||||
print(lcs(a,b))
|
||||
print(lcs2(a,b))
|
||||
a = 'ABCBDAB'
|
||||
b = 'BDCABA'
|
||||
print('s1:',a)
|
||||
print('s2:',b)
|
||||
while 1:
|
||||
print('lcs:',lcs2(a,b))
|
||||
a = input('s1: ')
|
||||
b = input('s2: ')
|
||||
|
|
12
math/permute_back_track.py
Normal file
12
math/permute_back_track.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
def permute(n):
|
||||
def _util(lst,i):
|
||||
if i==n:print(lst)
|
||||
else:
|
||||
for j in range(i,n):
|
||||
lst[i],lst[j]=lst[j],lst[i]
|
||||
_util(lst,i+1)
|
||||
lst[i],lst[j]=lst[j],lst[i]
|
||||
_util([i for i in range(n)],0)
|
||||
|
||||
if __name__=='__main__':
|
||||
permute(5)
|
|
@ -11,7 +11,7 @@ void calFac(int n)
|
|||
}
|
||||
}
|
||||
|
||||
void getArrangement(int *arr,int n,int sum)
|
||||
void permute(int *arr,int n,int sum)
|
||||
{
|
||||
/*sum表示全排列由小到大排序后的名次,从0 开始计数, 由名次求出 n位的排列存储到 arr 中*/
|
||||
int i,j,ct=0,k, ct2;
|
||||
|
@ -36,3 +36,21 @@ void getArrangement(int *arr,int n,int sum)
|
|||
}
|
||||
}
|
||||
|
||||
void printArr(int *p,int n)
|
||||
{
|
||||
for(int i=0;i<n;++i)printf("%d, ",p[i]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int n = 5,arr[n];
|
||||
calFac(n);
|
||||
for(int i=0;i<5;++i)arr[i]=i;
|
||||
for(int i=0;i<fac[n];++i){
|
||||
printArr(arr,n);
|
||||
permute(arr,n,i);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
12
math/permute_divide_and_conquer.py
Normal file
12
math/permute_divide_and_conquer.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
def permute(lst,n):
|
||||
''' O(n!), optimal'''
|
||||
if n==1:print(lst)
|
||||
else:
|
||||
for i in range(n):
|
||||
lst[i],lst[n-1] = lst[n-1],lst[i]
|
||||
permute(lst,n-1)
|
||||
lst[i],lst[n-1] = lst[n-1],lst[i]
|
||||
|
||||
if __name__=='__main__':
|
||||
n = 3
|
||||
permute([i for i in range(n)],n)
|
3
math/primesLEn.hs
Normal file
3
math/primesLEn.hs
Normal file
|
@ -0,0 +1,3 @@
|
|||
genPrimes 2= [2]
|
||||
genPrimes n = let li = genPrimes $n-1
|
||||
in if all (\x-> mod n x /=0) li then n:li else li
|
34
search/BFS_knight.hs
Normal file
34
search/BFS_knight.hs
Normal file
|
@ -0,0 +1,34 @@
|
|||
{- mbinary
|
||||
#########################################################################
|
||||
# File : BFS_knight.hs
|
||||
# Author: mbinary
|
||||
# Mail: zhuheqin1@gmail.com
|
||||
# Blog: https://mbinary.coding.me
|
||||
# Github: https://github.com/mbinary
|
||||
# Created Time: 2018-11-11 19:40
|
||||
# Description:
|
||||
#########################################################################
|
||||
-}
|
||||
{-
|
||||
Given two different positions on a chess board, find the least number of moves it would take a knight to get from one to the other. The positions will be passed as two arguments in algebraic notation. For example, knight("a3", "b5") should return 1.
|
||||
|
||||
The knight is not allowed to move off the board. The board is 8x8.
|
||||
-}
|
||||
|
||||
module ShortestKnightPath.Kata (knight) where
|
||||
import Data.Char
|
||||
import Data.List
|
||||
knight :: String -> String -> Int
|
||||
knight s1 s2 = let begin = axis s1
|
||||
end = axis s2
|
||||
notEnd = all (\tp->tp /=end)
|
||||
in length . takeWhile notEnd .iterate gen $[begin]
|
||||
|
||||
gen li = nub. flatten $map (filter (\(a,b) ->a>0 && b>0 &&a<9&&b<9 ) . change) li
|
||||
change (a,b) = [(a-1,b-2),(a-1,b+2),(a+1,b-2),(a+1,b+2),(a+2,b-1),(a+2,b+1),(a-2,b+1),(a-2,b-1)]
|
||||
|
||||
axis s = (ord (s!!0) -96, digitToInt (s!!1)::Int)
|
||||
|
||||
flatten [] = []
|
||||
flatten (x:xs) = x ++ flatten xs
|
||||
|
7
search/binary_search.hs
Normal file
7
search/binary_search.hs
Normal file
|
@ -0,0 +1,7 @@
|
|||
search i li= binary 0 $length li -1
|
||||
where binary a b= let mid = div (a+b) 2
|
||||
p = li!!mid
|
||||
in if a>=b then a
|
||||
else if p==i then mid
|
||||
else if p>i then binary a $mid-1
|
||||
else binary (mid+1) b
|
104
search/schedule.py
Normal file
104
search/schedule.py
Normal file
|
@ -0,0 +1,104 @@
|
|||
'''
|
||||
回溯全空间搜索, 剪枝优化
|
||||
|
||||
|
||||
设有n个任务由k个可并行工作的机器来完成,完成任务i需要时间为 。试设计一个算法找出完成这n个任务的最佳调度,使完成全部任务的时间最早。
|
||||
'''
|
||||
from time import time
|
||||
from functools import total_ordering
|
||||
@total_ordering
|
||||
class record:
|
||||
def __init__(self,nums=None):
|
||||
if nums is None:
|
||||
nums=[]
|
||||
self.nums=nums
|
||||
self.sum = sum(nums)
|
||||
def append(self,x):
|
||||
self.nums.append(x)
|
||||
self.sum+=x
|
||||
def pop(self):
|
||||
x = self.nums.pop()
|
||||
self.sum-=x
|
||||
return x
|
||||
def __repr__(self):
|
||||
return repr(self.nums)
|
||||
def __lt__(self,r):
|
||||
return self.sum<r.sum
|
||||
def __eq__(self,r):
|
||||
return self.sum==r.sum
|
||||
def tolist(self):
|
||||
return self.nums.copy()
|
||||
def __hash__(self):
|
||||
return self.sum
|
||||
def schedule(works,k):
|
||||
def backtrackSearch(i,lsts):
|
||||
nonlocal best,rst
|
||||
if i==n:
|
||||
cost = max(r.sum for r in lsts )
|
||||
if best>cost:
|
||||
best= cost
|
||||
rst = [st.tolist() for st in lsts]
|
||||
else:
|
||||
for cur in set(lsts):
|
||||
if best>cur.sum+works[i]:
|
||||
cur.append(works[i])
|
||||
backtrackSearch(i+1,lsts)
|
||||
cur.pop()
|
||||
def findInitial(i,lst):
|
||||
nonlocal best
|
||||
if i==n:
|
||||
cost = max(lst)
|
||||
if best>cost:best = cost
|
||||
else:
|
||||
mn = lst[0]
|
||||
idx = 0
|
||||
visited=set()
|
||||
for j,cur in enumerate(lst):
|
||||
if cur not in visited:
|
||||
visited.add(cur)
|
||||
if mn>cur:
|
||||
mn = cur
|
||||
idx = j
|
||||
lst[idx]+=works[i]
|
||||
findInitial(i+1,lst)
|
||||
lst[idx]-=works[i]
|
||||
|
||||
|
||||
n = len(works)
|
||||
print()
|
||||
print('machine Num:',n)
|
||||
print('works :',works)
|
||||
rst = None
|
||||
works.sort(reverse=True) # key step
|
||||
best = sum(works[:n-k+1])
|
||||
t = time()
|
||||
findInitial(0,[0]*k) # key step
|
||||
t1 = time()-t
|
||||
print('init solution: {} cost time {:.6f}s'.format(best,t1))
|
||||
t = time()
|
||||
backtrackSearch(0,[record() for i in range(k)])
|
||||
t2 = time()-t
|
||||
print('final solution: {} cost time {:.6f}s'.format(best,t2))
|
||||
print('schedule plan:',rst)
|
||||
return best,rst
|
||||
|
||||
if __name__=='__main__':
|
||||
from random import randint
|
||||
schedule([47,20,28,44,21,45,30,39,28,33],3)
|
||||
schedule([98,84,50,23,32,99,22,76,72,61,81,39,76,54,37],5)
|
||||
schedule([39,39,23,45,100,69,21,81,39,55,20,86,34,53,58,99,36,45,46],8)
|
||||
|
||||
'''
|
||||
machine Num: 19
|
||||
works : [39, 39, 23, 45, 100, 69, 21, 81, 39, 55, 20, 86, 34, 53, 58, 99, 36, 45, 46]
|
||||
|
||||
works 经过逆序排序
|
||||
init solution: 135 cost time 0.000196s
|
||||
final solution: 126 cost time 0.022922s
|
||||
schedule plan: [[100, 21], [99, 23], [86, 39], [81, 45], [69, 53], [58, 45, 20], [55, 36, 34], [46, 39, 39]]
|
||||
|
||||
works 没有经过排序
|
||||
init solution: 168 cost time 0.000179s
|
||||
final solution: 126 cost time 10.646307s
|
||||
schedule plan: [[39, 86], [39, 34, 53], [23, 99], [45, 39, 36], [100, 20], [69, 55], [21, 58, 46], [81, 45]]
|
||||
'''
|
58
string/KMP.py
Normal file
58
string/KMP.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
#coding: utf-8
|
||||
''' mbinary
|
||||
#########################################################################
|
||||
# File : KMP.py
|
||||
# Author: mbinary
|
||||
# Mail: zhuheqin1@gmail.com
|
||||
# Blog: https://mbinary.coding.me
|
||||
# Github: https://github.com/mbinary
|
||||
# Created Time: 2018-12-11 14:02
|
||||
# Description:
|
||||
#########################################################################
|
||||
'''
|
||||
|
||||
def getPrefixFunc(s):
|
||||
'''return the list of prefix function of s'''
|
||||
length = 0
|
||||
i = 1
|
||||
n = len(s)
|
||||
ret = [0]
|
||||
while i<n:
|
||||
if s[i]==s[length]:
|
||||
length +=1
|
||||
ret.append(length)
|
||||
i+=1
|
||||
else:
|
||||
if length==0:
|
||||
ret.append(0)
|
||||
i+=1
|
||||
else:
|
||||
length = ret[length-1]
|
||||
return ret
|
||||
|
||||
def findAll(s,p):
|
||||
pre = getPrefixFunc(p)
|
||||
i = j =0
|
||||
n,m = len(s),len(p)
|
||||
ret = []
|
||||
while i<n:
|
||||
if s[i]==p[j]:
|
||||
i+=1
|
||||
j+=1
|
||||
if j==m:
|
||||
ret.append(i-j)
|
||||
j=pre[j-1]
|
||||
else:
|
||||
if j==0: i+=1
|
||||
else: j = pre[j-1]
|
||||
return ret
|
||||
def randStr(n=3):
|
||||
return [randint(ord('a'),ord('z')) for i in range(n)]
|
||||
|
||||
if __name__ =='__main__':
|
||||
from random import randint
|
||||
s = randStr(50)
|
||||
p = randStr(1)
|
||||
print(s)
|
||||
print(p)
|
||||
print(findAll(s,p))
|
110
string/README.md
Normal file
110
string/README.md
Normal file
|
@ -0,0 +1,110 @@
|
|||
# String Matching algorithm
|
||||
|
||||
![](https://upload-images.jianshu.io/upload_images/7130568-e10dc137e9083a0e.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
|
||||
|
||||
## Rabin-Karp
|
||||
We can view a string of k characters (digits) as a length-k decimal number. E.g., the string “31425” corresponds to the decimal number 31,425.
|
||||
- Given a pattern P [1..m], let p denote the corresponding decimal value.
|
||||
- Given a text T [1..n], let $t_s$ denote the decimal value of the length-m substring T [(s+1)..(s+m)] for s=0,1,…,(n-m).
|
||||
- let `d` be the radix of num, thus $d = len(set(s))$
|
||||
- $t_s$ = p iff T [(s+1)..(s+m)] = P [1..m].
|
||||
- p can be computed in O(m) time. p = P[m] + d\*(P[m-1] + d\*(P[m-2]+…)).
|
||||
- t0 can similarly be computed in O(m) time.
|
||||
- Other $t_1,\ldots,t_{n-m}$ can be computed in O(n-m) time since $t_{s+1} can be computed from ts in constant time.
|
||||
Namely,
|
||||
|
||||
$$
|
||||
t_{s+1} = d*(t_s-d^{m-1} * T[s+1])+T[s+m+1]
|
||||
$$
|
||||
However, it's no need to calculate $t_{s+1}$ directly. We can use modulus operation to reduce the work of caculation.
|
||||
|
||||
We choose a small prime number. Eg 13 for radix( noted as d) 10.
|
||||
Generally, d\*q should fit within one computer word.
|
||||
|
||||
We firstly caculate t0 mod q.
|
||||
Then, for every $t_i (i>1)$
|
||||
assume
|
||||
$$
|
||||
t_{i-1} = T[i+m-1] + 10*T[i+m-2]+\ldots+10^{m-1}*T[i-1]
|
||||
$$
|
||||
denote $ d' = d^{m-1}\ mod\ q$
|
||||
thus,
|
||||
$$
|
||||
\begin{aligned}
|
||||
t_i &= (t_{i-1} - d^{m-1}*T[i-1]) * d + T[i+m]\\
|
||||
&\equiv (t_{i-1} - d^{m-1}*T[i-1]) * d + T[i+m] (mod\ q)\\
|
||||
&\equiv (t_{i-1}- ( d^{m-1} mod \ q) *T[i-1]) * d + T[i+m] (mod\ q)\\
|
||||
&\equiv (t_{i-1}- d'*T[i-1]) * d + T[i+m] (mod\ q)
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
So we can compare the modular value of each ti with p's.
|
||||
Only if they are the same, then we compare the origin chracter, namely $T[i],T[i+1],\ldots,T[i+m-1]$ and the pattern.
|
||||
Gernerally, this algorithm's time approximation is O(n+m), and the worst case is O((n-m+1)\*m)
|
||||
|
||||
**Problem: this is assuming p and ts are small numbers. They may be too large to work with easily.**
|
||||
|
||||
## FSM
|
||||
A FSM can be represented as (Q,q0,A,S,C), where
|
||||
- Q is the set of all states
|
||||
- q0 is the start state
|
||||
- $A\in Q$ is a set of accepting states.
|
||||
- S is a finite input alphabet.
|
||||
- C is the set of transition functions: namely $q_j = c(s,q_i)$.
|
||||
|
||||
Given a pattern string S, we can build a FSM for string matching.
|
||||
Assume S has m chars, and there should be m+1 states. One is for the begin state, and the others are for matching state of each position of S.
|
||||
|
||||
Once we have built the FSM, we can run it on any input string.
|
||||
## KMP
|
||||
>Knuth-Morris-Pratt method
|
||||
|
||||
The idea is inspired by FSM. We can avoid computing the transition functions. Instead, we compute a prefix functi`Next` on P in O(m) time, and Next has only m entries.
|
||||
> Prefix funtion stores info about how the pattern matches against shifts of itself.
|
||||
|
||||
- String w is a prefix of string x, if x=wy for some string y
|
||||
- String w is a suffix of string x, if x=yw for some string y
|
||||
- The k-character prefix of the pattern P [1..m] denoted by Pk.
|
||||
- Given that pattern prefix P [1..q] matches text characters T [(s+1)..(s+q)], what is the least shift s'> s such that P [1..k] = T [(s'+1)..(s'+k)] where s'+k=s+q?
|
||||
- At the new shift s', no need to compare the first k characters of P with corresponding characters of T.
|
||||
Method: For prefix pi, find the longest proper prefix of pi that is also a suffix of pi.
|
||||
next[q] = max{k|k\<q and pk is a suffix of pq}
|
||||
|
||||
For example: p = ababaca, for p5 = ababa, Next[5] = 3. Namely p3=aba is the longest prefix of p that is also a suffix of p5.
|
||||
|
||||
Time approximation: finding prefix function `next` take O(m), matching takes O(m+n)
|
||||
|
||||
## Boyer-Moore
|
||||
- The longer the pattern is, the faster it works.
|
||||
- Starts from the end of pattern, while KMP starts from the beginning.
|
||||
- Works best for character string, while KMP works best for binary string.
|
||||
- KMP and Boyer-Moore
|
||||
- Preprocessing existing patterns.
|
||||
- Searching patterns in input strings.
|
||||
## Sunday
|
||||
### features
|
||||
- simplification of the Boyer-Moore algorithm;
|
||||
- uses only the bad-character shift;
|
||||
- easy to implement;
|
||||
- preprocessing phase in O(m+sigma) time and O(sigma) space complexity;
|
||||
- searching phase in O(mn) time complexity;
|
||||
- very fast in practice for short patterns and large alphabets.
|
||||
### description
|
||||
The Quick Search algorithm uses only the bad-character shift table (see chapter Boyer-Moore algorithm). After an attempt where the window is positioned on the text factor y[j .. j+m-1], the length of the shift is at least equal to one. So, the character y[j+m] is necessarily involved in the next attempt, and thus can be used for the bad-character shift of the current attempt.
|
||||
|
||||
The bad-character shift of the present algorithm is slightly modified to take into account the last character of x as follows: for c in Sigma, qsBc[c]=min{i : 0 < i leq m and x[m-i]=c} if c occurs in x, m+1 otherwise (thanks to Darko Brljak).
|
||||
|
||||
The preprocessing phase is in O(m+sigma) time and O(sigma) space complexity.
|
||||
|
||||
During the searching phase the comparisons between pattern and text characters during each attempt can be done in any order. The searching phase has a quadratic worst case time complexity but it has a good practical behaviour.
|
||||
|
||||
For instance,
|
||||
![image.png](https://upload-images.jianshu.io/upload_images/7130568-76d130ae24603d51.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
|
||||
|
||||
In this example, t0, ..., t4 = a b c a b is the current text window that is compared with the pattern. Its suffix a b has matched, but the comparison c-a causes a mismatch. The bad-character heuristics of the Boyer-Moore algorithm (a) uses the "bad" text character c to determine the shift distance. The Horspool algorithm (b) uses the rightmost character b of the current text window. The Sunday algorithm (c) uses the character directly right of the text window, namely d in this example. Since d does not occur in the pattern at all, the pattern can be shifted past this position.
|
||||
|
||||
|
||||
# Reference:
|
||||
1. Xuyun, ppt, String matching
|
||||
2. [Sunday-algorithm](http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sunday.htm)
|
||||
3. GeeksforGeeks, [KMP Algorithm](https://www.geeksforgeeks.org/kmp-algorithm-for-pattern-searching/)
|
60
string/rabin_karp.py
Normal file
60
string/rabin_karp.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
#coding: utf-8
|
||||
''' mbinary
|
||||
#########################################################################
|
||||
# File : rabin_karp.py
|
||||
# Author: mbinary
|
||||
# Mail: zhuheqin1@gmail.com
|
||||
# Blog: https://mbinary.coding.me
|
||||
# Github: https://github.com/mbinary
|
||||
# Created Time: 2018-12-11 00:01
|
||||
# Description: rabin-karp algorithm
|
||||
#########################################################################
|
||||
'''
|
||||
|
||||
def isPrime(x):
|
||||
for i in range(2,int(x**0.5)+1):
|
||||
if x%i==0:return False
|
||||
return True
|
||||
def getPrime(x):
|
||||
'''return a prime which is bigger than x'''
|
||||
for i in range(x,2*x):
|
||||
if isPrime(i):return i
|
||||
def findAll(s,p):
|
||||
'''s: string p: pattern'''
|
||||
dic={}
|
||||
n,m = len(s),len(p)
|
||||
d=0 #radix
|
||||
for c in s:
|
||||
if c not in dic:
|
||||
dic[c]=d
|
||||
d+=1
|
||||
sm = 0
|
||||
for c in p:
|
||||
if c not in dic:return [-1]
|
||||
sm = sm*d+dic[c]
|
||||
|
||||
ret = []
|
||||
cur = 0
|
||||
for i in range(m): cur=cur*d + dic[s[i]]
|
||||
if cur==sm:ret.append(0)
|
||||
tmp = n-m
|
||||
q = getPrime(m)
|
||||
cur = cur%q
|
||||
sm = sm%q
|
||||
exp = d**(m-1) % q
|
||||
for i in range(m,n):
|
||||
cur = ((cur-dic[s[i-m]]*exp)*d+dic[s[i]]) % q
|
||||
if cur == sm and p==s[i-m+1:i+1]:
|
||||
ret.append(i-m+1)
|
||||
return ret
|
||||
|
||||
def randStr(n=3):
|
||||
return [randint(ord('a'),ord('z')) for i in range(n)]
|
||||
|
||||
if __name__ =='__main__':
|
||||
from random import randint
|
||||
s = randStr(50)
|
||||
p = randStr(1)
|
||||
print(s)
|
||||
print(p)
|
||||
print(findAll(s,p))
|
BIN
string/src/compare.jpg
Normal file
BIN
string/src/compare.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
BIN
string/src/general.jpg
Normal file
BIN
string/src/general.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
|
@ -28,30 +28,50 @@ def find(s,p):
|
|||
if s[ps] == p[pp]:
|
||||
ps,pp = ps+1,pp+1
|
||||
else:
|
||||
idx = ps-pp+np
|
||||
idx = ps+ np-pp
|
||||
if idx >=ns:return -1
|
||||
ch = s[idx]
|
||||
if ch in dic:
|
||||
ps += dic[ch]+1-pp
|
||||
else:
|
||||
ps += np-pp
|
||||
ps = idx+1
|
||||
pp = 0
|
||||
if pp==np:return ps-np
|
||||
else:
|
||||
else:
|
||||
return -1
|
||||
def test():
|
||||
s = [randint(78,88) for i in range(30)]
|
||||
p = [randint(78,88) for i in range(3)]
|
||||
def findAll(s,p):
|
||||
ns = len(s)
|
||||
np = len(p)
|
||||
i = 0
|
||||
ret = []
|
||||
while s:
|
||||
print(s,p)
|
||||
tmp = find(s,p)
|
||||
if tmp==-1: break
|
||||
ret.append(i+tmp)
|
||||
end = tmp+np
|
||||
i +=end
|
||||
s = s[end:]
|
||||
return ret
|
||||
|
||||
|
||||
|
||||
def randStr(n=3):
|
||||
return [randint(ord('a'),ord('z')) for i in range(n)]
|
||||
|
||||
def test(n):
|
||||
s = randStr(n)
|
||||
p = randStr(3)
|
||||
str_s = ''.join((chr(i) for i in s))
|
||||
str_p = ''.join((chr(i) for i in p))
|
||||
n1 = find(s,p)
|
||||
n2 = str_s.find(str_p)
|
||||
n2 = str_s.find(str_p) # 利用已有的 str find 算法检验
|
||||
if n1!=n2:
|
||||
print(n1,n2,str_p,str_s)
|
||||
return False
|
||||
return True
|
||||
if __name__ =='__main__':
|
||||
from random import randint
|
||||
n = 10000
|
||||
suc = sum(test() for i in range(n))
|
||||
print(f'test {n} times, success {suc} times')
|
||||
n = 1000
|
||||
suc = sum(test(n) for i in range(n))
|
||||
print('test {n} times, success {suc} times'.format(n=n,suc=suc))
|
||||
|
|
Loading…
Reference in New Issue
Block a user