''' mbinary ######################################################################### # File : markov.py # Author: mbinary # Mail: zhuheqin1@gmail.com # Blog: https://mbinary.xyz # Github: https://github.com/mbinary # Created Time: 2018-07-06 15:57 # Description: ######################################################################### ''' from random import randint import re class markov: def __init__(self, txt): self.words = self.clean(txt) self.dic = self.getDic(self.words) def clean(self, text): text = text.replace("\n", " ") text = text.replace("\"", "") # 保证每个标点符号都和前面的单词在一起 # 这样不会被剔除,保留在马尔可夫链中 punctuation = [',', '.', ';', ':'] for symbol in punctuation: text = text.replace(symbol, symbol+" ") return re.split(' +', text) def getDic(self, words): dic = {} end = len(words) for i in range(1, end): if words[i-1] not in dic: dic[words[i-1]] = {words[i]: 1} elif words[i] not in dic[words[i-1]]: dic[words[i-1]][words[i]] = 1 else: dic[words[i-1]][words[i]] += 1 return dic def getSum(self, dic): if '%size' not in dic: dic['%size'] = sum(list(dic.values())) return dic['%size'] def nextWord(self, word): k = randint(1, self.getSum(self.dic[word])) for i, j in self.dic[word].items(): k -= j if k <= 0: return i def genSentence(self, begin='I', length=30): li = [begin] nextWord = begin for i in range(1, length): nextWord = self.nextWord(nextWord) li.append(nextWord) return ' '.join(li)