This article is participating in Python Theme Month. See the link for details

introduce

This article implements prefix trees in Python and supports editing distance fault-tolerant queries. The prefix tree in this paper only stores three participles in the format of (participle string, frequency), such as :(‘ zhonghai jinxiyuan ‘, 2), (‘ zhonghai xiyuan ‘, 24), (‘ zhongnanhai ‘, 4), which can be replaced with their own files for data replacement. Specify a string and a maximum error tolerance edit distance when querying.

implementation

class Word: def __init__(self, word, freq): self.word = word self.freq = freq class Trie: def __init__(self): self.root = LetterNode('') self.START = 3 def insert(self, word, freq): self.root.insert(word, freq, 0) def findAll(self, query, maxDistance): suggestions = self.root.recommend(query, maxDistance, self.START) return sorted(set(suggestions), key=lambda x: x.freq) class LetterNode: def __init__(self, char): self.REMOVE = -1 self.ADD = 1 self.SAME = 0 self.CHANGE = 2 self.START = 3 self.pointers = [] self.char = char self.word  = None def charIs(self, c): return self.char == c def insert(self, word, freq, depth): if ' ' in word: word = [i for i in word.split(' ')] if depth < len(word): c = word[depth].lower() for next in self.pointers: if next.charIs(c): return next.insert(word, freq, depth + 1) nextNode = LetterNode(c) self.pointers.append(nextNode) return nextNode.insert(word, freq, depth + 1) else: self.word = Word(word, freq) def recommend(self, query, movesLeft, lastAction): suggestions = [] length = len(query) if length >= 0 and movesLeft - length >= 0 and self.word: suggestions.append(self.word) if movesLeft == 0 and length > 0: for next in self.pointers: if next.charIs(query[0]): suggestions += next.recommend(query[1:], movesLeft, self.SAME) break elif movesLeft > 0: for next in self.pointers: if length > 0: if next.charIs(query[0]): suggestions += next.recommend(query[1:], movesLeft, self.SAME) else: suggestions += next.recommend(query[1:], movesLeft - 1, self.CHANGE) if lastAction ! = self.CHANGE and lastAction ! = self.REMOVE: suggestions += next.recommend(query, movesLeft - 1, self.ADD) if lastAction ! = self.ADD and lastAction ! = self.CHANGE: if length > 1 and next.charIs(query[1]): suggestions += next.recommend(query[2:], movesLeft - 1, self.REMOVE) elif length > 2 and next.charIs(query[2]) and movesLeft == 2: suggestions += next.recommend(query[3:], movesLeft - 2, self.REMOVE) else: if lastAction ! = self.CHANGE and lastAction ! = self.REMOVE: suggestions += next.recommend(query, movesLeft - 1, self.ADD) return suggestions def buildTrieFromFile(): Trie = trie () rows = [(' China jin west park, 2), (' China west park, 24), (' zhongnanhai, 4)] for the row in rows: trie.insert(row[0], int(row[1])) return trie def suggestor(trie, s, maxDistance): if ' ' in s: s = [x for x in s.split(' ')] suggestions = trie.findAll(s, maxDistance) return [str(x.word) for x in suggestions] if __name__ == "__main__": Trie = buildTrieFromFile() r = suggestor(trie, 'zhongzhongxiyuan ', 1) print(r) suggestorCopy the code

Analysis of the

Results print:

[' Zhonghai Jin West Garden ', 'Zhonghai West Garden ']Copy the code

It can be seen that “Zhonghai Jin Xiyuan” is exactly the same as the input string, and the editing distance is 0, so it meets the requirement that the maximum editing distance is 1.

“Zhonghai West Garden” is the result of removing the word “Jin” from “Zhonghai Jin West Garden”, the editing distance is 1, so it meets the requirement of maximum editing distance of 1, and returns directly.

In addition, the editing distance of “Zhongnanhai” and “Zhonghai Jin Xiyuan” is 4, which does not meet the requirement of the maximum editing distance of 1, so it does not appear in the result.

reference

Github.com/leoRoss/Aut…