一个OOP的AC自动机代码
网上代码很多,但是大多ACMer的风格,呃不是我说,代码可读性和封装性是比较欠缺的…也许Java出身的码农也就这点还有些优势了吧…纯自己手动敲的,build_ac函数(建立fail指针的过程)学习了网上的教程后模仿着写的,而且带clear()释放内存。
AC自动机基于Trie树,Trie树基于字符表,宏定义SIZE表明字符集大小,宏定义MINCHAR表明字符集中最小的字符,这里的定义只适合26个英文小写字母。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | #define SIZE 26 #define MINCHAR ('a') struct TrieNode{ int tail; //表明此节点为某一字符串结尾 TrieNode* fail; //失败指针 TrieNode* nodes[SIZE]; //节点指针的数组 TrieNode(){ for(int i = 0;i < SIZE;i++) nodes[i] = NULL; tail = 0, fail = NULL; } void clear(){ for(int i = 0;i < SIZE;i++){ if(nodes[i] != NULL){ nodes[i]->clear(); delete nodes[i]; nodes[i] = NULL; } } } }; struct Trie{ TrieNode* root; Trie(){ root = new TrieNode(); } //清空Trie树,释放内存 void clear(){ root->clear(); } //插入一个字符串 void insert(char *s, int len){ TrieNode* N = root; int idx; for(int i = 0;i < len;i++){ idx = (unsigned char)s[i] - MINCHAR; if(N->nodes[idx] == NULL){ N->nodes[idx] = new TrieNode(); } N = N->nodes[idx]; } N->tail = 1; } //插入字符串完毕后,建立fail指针 void build_ac(){ queue<TrieNode*> q; TrieNode *nd, *child, *pt; root->fail = NULL; q.push(root); while(!q.empty()){ nd = q.front(); q.pop(); pt = NULL; for(int i = 0;i < SIZE;i++){ child = nd->nodes[i]; if(child != NULL){ if(nd == root) child->fail = root; else{ pt = nd->fail; while(pt != NULL){ if(pt->nodes[i] != NULL){ child->fail = pt->nodes[i]; break; } pt = pt->fail; } if(pt == NULL) child->fail = root; } q.push(child); } } } } //建立fail指针完毕后,在指定的字符串文本数据中来搜索匹配串 void process(char *s, int len){ TrieNode *nd = root, *nd2; int idx; for(int i = 0;i < len;i++){ idx = (unsigned char)s[i] - MINCHAR; while(nd != root && nd->nodes[idx] == NULL) nd = nd->fail; if(nd->nodes[idx] != NULL) { nd = nd->nodes[idx]; nd2 = nd; while(nd2 != root){ if(nd2->tail == 1){ //找到一个匹配 } nd2 = nd2->fail; } } } } }; |
评论 (0)
留下评论