前缀树,用来快速匹配一段文本中是否包含某些指定的文本数据。
算法类 Trie
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
/**
* 前缀树
*/
public class Trie {
private final TrieNode root;
public Trie() {
root = new TrieNode();
}
public void insert(String keyWord) {
TrieNode node = root;
for (int i = 0; i < keyWord.length(); i++) {
char c = keyWord.charAt(i);
if (!node.containsKey(c)) {
node.put(c, new TrieNode());
}
node = node.get(c);
}
node.setEnd();
}
public boolean search(String text) {
return search(text, 0);
}
public boolean search(String text, int startIndex) {
if (startIndex < 0) {
startIndex = 0;
} else if (startIndex >= text.length()) {
return false;
}
TrieNode node = root;
for (int i = startIndex; i < text.length(); i++) {
char c = text.charAt(i);
TrieNode childNode = node.get(c);
if (childNode == null) {
node = root;
} else if (childNode.isEnd()) {
return true;
} else {
node = childNode;
}
}
return node.isEnd();
}
static class TrieNode {
private final Map<Character, TrieNode> children = new HashMap<>();
private boolean isEndOfWord;
public TrieNode() {
isEndOfWord = false;
}
public boolean containsKey(char ch) {
return children.containsKey(ch);
}
public TrieNode get(char ch) {
return children.get(ch);
}
public void put(char ch, TrieNode node) {
children.put(ch, node);
}
public boolean isEnd() {
return isEndOfWord;
}
public void setEnd() {
isEndOfWord = true;
}
}
}
|
使用场景案例
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
/**
* 敏感词前缀树(字典树)
*/
@Slf4j
@Component
public class ChatSensitiveTrie {
/**
* 敏感词前缀树。
* 注意:敏感词更新频率很低,且对实时性没有要求。不要加任何同步机制。连volatile都不要加。
*/
private Trie prefixTrie = new Trie();
/**
* 初始化
*
* @param sensitiveWords 敏感词列表
*/
public synchronized void init(List<String> sensitiveWords) {
Trie trie = new Trie();
for (String sensitiveWord : sensitiveWords) {
// 忽略大小写
String lowerWord = sensitiveWord.toLowerCase();
trie.insert(lowerWord);
}
prefixTrie = trie;
log.info("敏感词初始化成功,共初始化敏感词{}个", sensitiveWords.size());
log.debug("敏感词内容:{}", sensitiveWords);
}
/**
* 是否涉及敏感词
*
* @param chatMessage 聊天信息
* @return 是否涉及敏感词 true/false
*/
public boolean isSensitive(String chatMessage) {
String lowerCaseMsg = chatMessage.toLowerCase();
return prefixTrie.search(lowerCaseMsg);
}
}
|