免费小程序模板/windows优化大师要钱
字符串哈希
传送门
核心思想:将字符串看成P进制数,P的经验值是131或13331,取这两个值的冲突概率低
小技巧:取模的数用2^64,这样直接用unsigned long long存储,溢出的结果就是取模的结果typedef unsigned long long ULL;
ULL h[N], p[N]; // h[k]存储字符串前k个字母的哈希值, p[k]存储 P^k mod 2^64// 初始化
p[0] = 1;
for (int i = 1; i <= n; i ++ )
{h[i] = h[i - 1] * P + str[i];p[i] = p[i - 1] * P;
}// 计算子串 str[l ~ r] 的哈希值
ULL get(int l, int r)
{return h[r] - h[l - 1] * p[r - l + 1];
}
力扣题目
1044. 最长重复子串 - 力扣(LeetCode)
class Solution {static const int N = 4e4;
public:typedef unsigned long long ull;ull h[N], p[N];int start;int n;int P = 131;ull get_hash(int l, int r){return h[r] - h[l - 1] * p[r - l + 1];}string longestDupSubstring(string s) {//很容易想到的思路是求解每段区间的最长前后缀//但是kmp算只能求开头到任意位置的next,解决这个问题是n^2的复杂度//看到数据范围很容易想到二分,当时只想怎么用kmp实现了,忘了字符串匹配的另一种方式,hashstringn = s.size();p[0] = 1;for(int i = 1; i <= n; i ++ ){h[i] = h[i - 1] * P + s[i - 1];p[i] = p[i - 1] * P;}int l = 0, r = N;string res;while(l <= r)//二分长度,复杂度为nlogn{int mid = (l + r) / 2;if(check(mid)){l = mid + 1;res = s.substr(start - 1, mid);}else r = mid - 1;}return res;}bool check(int len){unordered_map<ull, bool> mp;//记录出现过的字符串,若有相同长度相同值得字符串,那么就是重复子串for(int i = 1; i + len - 1 <= n; i ++ ){ull cnt = get_hash(i, i + len - 1);if(mp[cnt]){start = i;return true;}mp[cnt] = true;}return false;}
};
686. 重复叠加字符串匹配 - 力扣(LeetCode)
class Solution {
public:typedef unsigned long long ull;ull P = 131;string s;ull h[40010], p[40010];ull get(int l, int r){return h[r] - h[l - 1] * p[r - l + 1];}int repeatedStringMatch(string a, string b) {string s;int res = 0;int n = a.size(), m = b.size();while(s.size() < b.size()){s.append(a);res ++;}s.append(a);s.append(b);int len = s.size();cout << len;p[0] = 1;for(int i = 1; i <= len; i ++ ){h[i] = h[i - 1] * P + s[i - 1];p[i] = p[i - 1] * P;}int index = -1;ull target = get(len - m + 1, len);for(int i = 1; i + m - 1 <= len - m; i ++ ){if(target == get(i, i + m - 1)){index = i;break;}}if(index == -1) return -1;return index <= len - m - n - m + 1 ? res : res + 1;}
};