JavaScript上万关键字瞬间匹配
既然考虑的是那种极端的关键字搜索,通常的逐个遍历搜索显然是行不通的。如今用的是JavaScript,若不使用Hash表实在是太对不起这门语言了。有着对表特天独厚的支持,不妨就拿出少量的空间来换取大量的时间吧。
代码在下面:
var treeSearch = { makeTree: function(strKeys) { 'use strict'; var tblCur = {}, tblRoot, key, str_key, Length, j, i; tblRoot = tblCur; for (j = strKeys.length - 1; j >= 0; j -= 1) { str_key = strKeys[j]; Length = str_key.length; for (i = 0; i < Length; i += 1) { key = str_key.charAt(i); if (tblCur.hasOwnProperty(key)) { //生成子节点 tblCur = tblCur[key]; } else { tblCur = tblCur[key] = {}; } } tblCur.end = true; //最后一个关键字没有分割符 tblCur = tblRoot; } return tblRoot; }, search: function(content, tblRoot) { 'use strict'; var tblCur, p_star = 0, n = content.length, p_end, match, //是否找到匹配 match_key, match_str, arrMatch = [], //存储结果 arrLength = 0; //arrMatch的长度索引 while (p_star < n) { tblCur = tblRoot; //回溯至根部 p_end = p_star; match_str = ''; match = false; do { match_key = content.charAt(p_end); if (!(tblCur = tblCur[match_key])) { //本次匹配结束 p_star += 1; break; } else { match_str += match_key; } p_end += 1; if (tblCur.end === true) { //是否匹配到尾部 //找到匹配关键字 match = true; } } while (true); if (match === true) { //最大匹配 arrMatch[arrLength] = { //增强可读性 key: match_str, begin: p_star - 1, end: p_end }; arrLength += 1; p_star = p_end; } } return arrMatch; } };
测试代码:
function test(strContent, strKeys) { var arrMatch, tblRoot = treeSearch.makeTree(strKeys), t = new Date(); arrMatch = treeSearch.search(strContent, tblRoot); console.log('time is: ' + (new Date() - t) + 'mm'); console.log(arrMatch); } var s = (function() { var Things = [' ', '\n', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']; var s = ''; for (var i = 1000000; i >= 0; i--) { s += Things[parseInt(Math.random() * Things.length) % Things.length]; } return s; })(); test(s, ['abc', 'efge', 'fun', 'tree']);