Post 70

昔日我曾苍老,如今风华正茂

[HAOI2016]找相同字符

题目描述

给定两个字符串,求出在两个字符串中各取出一个子串使得这两个子串相同的方案数。两个方案不同当且仅当这两个子串中有一个位置不同。

输入输出格式

输入格式:

两行,两个字符串$s_1$,$s_2$,长度分别为$n_1,n_2$。$1 <=n_1, n_2<= 200000$,字符串中只有小写字母

输出格式:

输出一个整数表示答案

输入输出样例

输入样例#1:

1
2
aabb
bbaa

输出样例#1:

1
10

题解

首先想到最后的答案就是$h$数组所有区间的最小值之和减去后缀在相同字符串的所有区间之和。

这样问题转化成求三个序列的$h$所有子区间最小值之和。

再转化成考虑每个元素是哪些区间的最小值,因为最小值求并有单调性(单调不增),所以每个元素所控制的那些区间必定是一个连续区间的所有子区间。这是可以用两次单调栈完成的,具体如下。

从左到右维护一个单调上升的栈,如果栈顶$p$比当前位置$pos$的元素$q$要小,就让$p$出栈,记录$R_p = pos - 1$(因为不包括)。
从右向左同样,不过为了避免重复统计,如果第一次是小于才出栈,那么第二次小于等于就应该出栈,并同样记录​$L_p = pos+1$。这样对于两个相等元素同样是一个区间的最小值时只有一个会计入答案。

其实ST表+二分也是可以做的,原理还是最小值取并有单调性。。对于每个位置$i$,我们二分长度,到左边的位置为$p$,查ST表看看$[p,i)$最小值是不是大于等于$h_i$,如果是那就增加长度直到最小值小于当前元素。注意边界细节同单调栈做法相同,向右二分长度,位置为$p$时,应当查看$(i,p]$的最小值是否大于$h_i$,只有大于才能继续增大二分长度。

看上去单调栈更好写,实现使用单调栈。
时间复杂度都是$O(nlogn)$

然后因为一个sb细节又坑了我一上午。。。。。

好久没认真写题就是状态不行。

其实一开始想到了这种出错的可能性,但是由于答案是容斥结果看不出是那部分错的,正好wzx和我一样是容斥就去输出一下发现真的是那个特殊字符导致的错误,$h$数组里直接去掉那个元素就好了。

思考5分钟,实现4小时.jpg

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <iostream>
#include <stack>
#define maxn 500005
int sa[maxn] , rk[maxn] , pos[maxn] , c[maxn] , h[maxn] , n , m , ct , s[maxn] , s1[maxn] , s2[maxn] , n1 , n2 , L[maxn] , R[maxn];
char tmp[maxn] , tmp2[maxn];
long long ans;
std::stack<int> st;
inline void sort()
{
for(int i = 1 ; i <= m ; ++i) c[i] = 0;
for(int i = 1 ; i <= ct ; ++i) ++c[rk[pos[i]]] ;
for(int i = 1 ; i <= m ; ++i) c[i] += c[i-1];
for(int i = ct ; i >= 1 ; --i) sa[c[rk[pos[i]]]--] = pos[i];
}

inline void SA(int *s , int n)
{
std::memset(rk,0,sizeof(rk));
std::memset(pos,0,sizeof(pos));
std::memset(sa,0,sizeof(sa));
std::memset(h,0,sizeof(h));
for(int i = 1 ; i <= n ; ++i)
pos[i] = i , rk[i] = s[i];
m = 10000 , ct = n;
sort();
for(int k = 1 ; k <= n ; k <<= 1)
{
ct = 0 ;// !!!!!
for(int i = 1 ; i <= k ; ++i) pos[++ct] = n - k + i;
for(int i = 1 ; i <= n ; ++i) if(sa[i] > k) pos[++ct] = sa[i] - k;
sort();
std::swap(rk , pos);
rk[sa[1]] = ct = 1;
for(int i = 2 ; i <= n ; ++i)
rk[sa[i]] = (pos[sa[i]] == pos[sa[i-1]] && pos[sa[i] + k] == pos[sa[i-1] + k]) ? ct : ++ct;
if((m = ct) == n) break;
}
int k = 0;
for(int i = 1 ; i <= n ; ++i)
{
if(k) --k;
int cur = sa[rk[i]-1];
for( ; s[i + k] == s[cur + k] ; ++k);
h[rk[i]] = k;
}
}

inline void PRE(int n)
{
std::memset(L,0,sizeof(L));
std::memset(R,0,sizeof(R));
for(int i = 1 ; i <= n ; ++i)
{
// if(i == n1 + 1 && n > n1) continue;
while((int)st.size() && h[st.top()] > h[i]) R[st.top()] = i - 1 , st.pop();
st.push(i);
}
while((int)st.size()) R[st.top()] = n , st.pop();
for(int i = n ; i >= 1 ; --i)
{
// if(i == n1 + 1 && n > n1) continue;
while((int)st.size() && h[st.top()] >= h[i]) L[st.top()] = i + 1, st.pop();
st.push(i);
}
while((int)st.size()) L[st.top()] = 1 , st.pop();
}


inline void solve()
{
// printf("%d %d %d\n",n,n1,n2);
SA(s , n);
int dam = h[rk[n1+1]];
for(int i = dam + 1 ; i <= n ; ++i) h[i-1] = h[i];
h[n] = 0;
PRE(n);
for(int i = 1 ; i < n ; ++i)
{

// printf("DEBUG : : %lld\n",1ll*(n1+1-L[n1+1]+1)*(R[n1+1]-(n1+1)+1));
long long num = 1ll * (i - L[i] + 1) * (R[i] - i + 1);
// if(R[i] >= n1 + 1 && L[i] <= n1 + 1) num -= 1ll * (R[i] - (n1 + 1) + 1) * (i - L[i]);
ans += 1ll * num * h[i];
}
// printf("%lld\n",ans);
// puts("PART 1");
// for(int i = 1 ; i <= n ; ++i)
// printf("%d ",h[i]);
// putchar(10);
// for(int i = 1 ; i <= n ; ++i)
// printf("%d ",sa[i]);
// putchar(10);
// for(int i = 1 ; i <= n ; ++i)
// printf("%d %d\n",L[i],R[i]);
// printf("%lld\n",ans);

SA(s1 , n1);
PRE(n1);
long long tp1 = 0;
for(int i = 1 ; i <= n1 ; ++i)
{
long long num = 1ll * (i - L[i] + 1) * (R[i] - i + 1);
// if(R[i] >= n1 + 1 && L[i] <= n1 + 1) num -= 1ll * (R[i] - (n1 + 1) + 1) * (i - L[i]);
ans -= 1ll * num * h[i] , tp1 += 1ll * num * h[i];
}
// printf("%lld\n",tp1);
/* puts("PART 2");
for(int i = 1 ; i <= n1 ; ++i)
printf("%d ",h[i]);
putchar(10);
for(int i = 1 ; i <= n1 ; ++i)
printf("%d ",sa[i]);
putchar(10);
for(int i = 1 ; i <= n1 ; ++i)
printf("%d %d\n",L[i],R[i]);
printf("%lld\n",ans);
*/
SA(s2 , n2);
PRE(n2);
long long tp2 = 0;
for(int i = 1 ; i <= n2 ; ++i)
{
long long num = 1ll * (i - L[i] + 1) * (R[i] - i + 1);
// if(R[i] >= n1 + 1 && L[i] <= n1 + 1) num -= 1ll * (R[i] - (n1 + 1) + 1) * (i - L[i]);
ans -= 1ll * num * h[i] , tp2 += 1ll * num * h[i];
}
// printf("%lld\n",tp2);
/* puts("PART 3");
for(int i = 1 ; i <= n2 ; ++i)
printf("%d ",h[i]);
putchar(10);
for(int i = 1 ; i <= n2 ; ++i)
printf("%d ",sa[i]);
putchar(10);
for(int i = 1 ; i <= n2 ; ++i)
printf("%d %d\n",L[i],R[i]);
printf("%lld\n",ans);
*/
}

inline void read()
{
scanf("%s\n",tmp+1);
int nl = std::strlen(tmp+1);
for(int i = 1 ; i <= nl ; ++i)
s[i] = s1[i] = tmp[i];
n = n1 = nl;
s[++n] = 'z' + 412;
scanf("%s",tmp2+1);
nl = std::strlen(tmp2+1);
for(int i = 1 ; i <= nl ; ++i)
s[++n] = s2[++n2] = tmp2[i];

}

int main()
{
// freopen("data.in","r",stdin);
// freopen("my.out","w",stdout);
read();
solve();
printf("%lld",ans);
}

没删调试代码是正常代码长度3倍


SP8222 NSUBSTR - Substrings

题意翻译

你得到一个字符串,最多由25万个小写拉丁字母组成。我们将 F(x)定义为某些长度X的字符串在s中出现的最大次数,例如字符串’ababaf’- F(x),因为有一个字符串’ABA’出现两次。你的任务是输出 F(x)每一个I,以使1<=i<=|S|.

题目描述

You are given a string S which consists of 250000 lowercase latin letters at most. We define F(x) as the maximal number of times that some string with length x appears in S. For example for string ‘ababa’ F(3) will be 2 because there is a string ‘aba’ that occurs twice. Your task is to output F(i) for every i so that 1<=i<=|S|.

输入输出格式

输入格式:

String S consists of at most 250000 lowercase latin letters.

输出格式

Output |S| lines. On the i-th line output F(i).

输入输出样例

输入样例#1:

1
ababa

输出样例#1:

1
2
3
4
5
3
2
2
1
1

题解

很容易想出SAM的做法。
构建串$S$的后缀自动机,求出每个状态的出现次数(SAM笔记中有),然后就可以直接更新。。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <cstring>
#define maxn 500005

int ch[maxn][26] , ln[maxn] , lk[maxn] , tot = 1, c[maxn] , tp[maxn] , la = 1 , ocr[maxn] , f[maxn] , n;

inline void ins(int c)
{
int cur = ++tot ; ln[cur] = ln[la] + 1 , ocr[cur] = 1;
int p = la;
for( ; !ch[p][c] && p ; ch[p][c] = cur , p = lk[p]);
if(!p) lk[cur] = 1;
else
{
int q = ch[p][c];
if(ln[p] + 1 == ln[q]) lk[cur] = q;
else
{
int nq = ++tot;
//copy transfer edges
for(int i = 0 ; i < 26 ; ++i) ch[nq][i] = ch[q][i];
//set up status
ln[nq] = ln[p] + 1 , lk[nq] = lk[q] , lk[q] = lk[cur] = nq;
//modify suffix transfer edge
for( ; ch[p][c] == q ; ch[p][c] = nq , p = lk[p]);
}
}
la = cur;
}

inline void solve()
{
for(int i = 1 ; i <= tot ; ++i) ++c[ln[i]];
for(int i = 1 ; i <= n ; ++i) c[i] += c[i-1];
for(int i = 1 ; i <= tot ; ++i) tp[c[ln[i]]--] = i;
for(int i = tot ; i >= 1 ; --i)
ocr[lk[tp[i]]] += ocr[tp[i]];
for(int i = 1 ; i <= tot ; ++i)
f[ln[i]] = std::max(f[ln[i]] , ocr[i]);
for(int i = 1 ; i <= n ; ++i)
printf("%d\n",f[i]);
}

char s[maxn];
int main()
{
scanf("%s",s+1);
n = std::strlen(s+1);
for(int i = 1 ; i <= n ; ++i) ins(s[i] - 'a');
solve();
}

[AHOI2013]差异

题目描述

给定一个长度为 $n$的字符串 $S$,令 $T_i$ 表示它从第 $i$个字符开始的后缀。求

$\displaystyle \sum_{1\leqslant i<j\leqslant n}\text{len}(T_i)+\text{len}(T_j)-2\times\text{lcp}$

其中,$\text{len}(a)$表示字符串$a$ 的长度,$\text{lcp}(a,b)$表示字符串 $a$ 和字符串 $b$ 的最长公共前缀。

输入输出格式

输入格式:

一行,一个字符串 SS

输出格式:

一行,一个整数,表示所求值。

输入输出样例

输入样例#1:

1
cacao

输出样例#1:

1
54

说明

对于 100% 的数据,保证 $2\leqslant n\leqslant 500000$,且均为小写字母。

题解

由简单的数学感觉可知,$\sum{i=1}{n}\sum{j=i+1}{n} (n-i+1) + (n-j+1)$是一个$O(n^3)$的多项式
把它算出来是$\frac{n(n-1)(n+1)}{2}$
然后后面那部分就是$h$数组所有子区间最小值之和,由上午刚想的单调栈计算每个元素的影响区间即可。
时间复杂度$O(nlogn)$

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
// luogu-judger-enable-o2
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <iostream>
#include <stack>
#define maxn 500005
int sa[maxn] , rk[maxn] , pos[maxn] , c[maxn] , h[maxn] , n , m , ct , s[maxn] , s1[maxn] , s2[maxn] , n1 , n2 , L[maxn] , R[maxn];
char tmp[maxn] , tmp2[maxn];
long long ans;
std::stack<int> st;
inline void sort()
{
for(int i = 1 ; i <= m ; ++i) c[i] = 0;
for(int i = 1 ; i <= ct ; ++i) ++c[rk[pos[i]]] ;
for(int i = 1 ; i <= m ; ++i) c[i] += c[i-1];
for(int i = ct ; i >= 1 ; --i) sa[c[rk[pos[i]]]--] = pos[i];
}

inline void SA(int *s , int n)
{
std::memset(rk,0,sizeof(rk));
std::memset(pos,0,sizeof(pos));
std::memset(sa,0,sizeof(sa));
std::memset(h,0,sizeof(h));
for(int i = 1 ; i <= n ; ++i)
pos[i] = i , rk[i] = s[i];
m = 10000 , ct = n;
sort();
for(int k = 1 ; k <= n ; k <<= 1)
{
ct = 0 ;// !!!!!
for(int i = 1 ; i <= k ; ++i) pos[++ct] = n - k + i;
for(int i = 1 ; i <= n ; ++i) if(sa[i] > k) pos[++ct] = sa[i] - k;
sort();
std::swap(rk , pos);
rk[sa[1]] = ct = 1;
for(int i = 2 ; i <= n ; ++i)
rk[sa[i]] = (pos[sa[i]] == pos[sa[i-1]] && pos[sa[i] + k] == pos[sa[i-1] + k]) ? ct : ++ct;
if((m = ct) == n) break;
}
int k = 0;
for(int i = 1 ; i <= n ; ++i)
{
if(k) --k;
int cur = sa[rk[i]-1];
for( ; s[i + k] == s[cur + k] ; ++k);
h[rk[i]] = k;
}
}

inline void PRE(int n)
{
std::memset(L,0,sizeof(L));
std::memset(R,0,sizeof(R));
for(int i = 1 ; i <= n ; ++i)
{
// if(i == n1 + 1 && n > n1) continue;
while((int)st.size() && h[st.top()] > h[i]) R[st.top()] = i - 1 , st.pop();
st.push(i);
}
while((int)st.size()) R[st.top()] = n , st.pop();
for(int i = n ; i >= 1 ; --i)
{
// if(i == n1 + 1 && n > n1) continue;
while((int)st.size() && h[st.top()] >= h[i]) L[st.top()] = i + 1, st.pop();
st.push(i);
}
while((int)st.size()) L[st.top()] = 1 , st.pop();
}


inline void solve()
{
// printf("%d %d %d\n",n,n1,n2);
SA(s , n);
int dam = h[rk[n1+1]];
for(int i = dam + 1 ; i <= n ; ++i) h[i-1] = h[i];
h[n] = 0;
PRE(n);
for(int i = 1 ; i < n ; ++i)
{

// printf("DEBUG : : %lld\n",1ll*(n1+1-L[n1+1]+1)*(R[n1+1]-(n1+1)+1));
long long num = 1ll * (i - L[i] + 1) * (R[i] - i + 1);
// if(R[i] >= n1 + 1 && L[i] <= n1 + 1) num -= 1ll * (R[i] - (n1 + 1) + 1) * (i - L[i]);
ans += 1ll * num * h[i];
}
}

inline void read()
{
scanf("%s\n",tmp+1);
int nl = std::strlen(tmp+1);
for(int i = 1 ; i <= nl ; ++i)
s[i] = s1[i] = tmp[i];
n = n1 = nl;

}

int main()
{
// freopen("data.in","r",stdin);
// freopen("my.out","w",stdout);
read();
solve();
printf("%lld",1ll * n * (n-1) * (n + 1) / 2 - 2 * ans);
}

SP7258 SUBLEX - Lexicographical Substring Search

题意翻译

给定一个字符串,求本质不同排名第k小的子串

输入格式:

第一行给定主串$(len<=90000)$

第二行给定询问个数$T \leq 500$

随后给出$T$行$T$个询问,每次询问排名第$k$小的串,范围在$int$内

输出格式:

对于每一个询问,输出T行,每行为排名第k小的串

感谢@Creeper_LKF 提供的翻译

题目描述

Little Daniel loves to play with strings! He always finds different ways to have fun with strings! Knowing that, his friend Kinan decided to test his skills so he gave him a string S and asked him Q questions of the form:

If all distinct substrings of string S were sorted lexicographically, which one will be the K-th smallest?

After knowing the huge number of questions Kinan will ask, Daniel figured out that he can’t do this alone. Daniel, of course, knows your exceptional programming skills, so he asked you to write him a program which given S will answer Kinan’s questions.
Example:

S = “aaa” (without quotes)
substrings of S are “a” , “a” , “a” , “aa” , “aa” , “aaa”. The sorted list of substrings will be:
“a”, “aa”, “aaa”.

Input

In the first line there is Kinan’s string S (with length no more than 90000 characters). It contains only small letters of English alphabet. The second line contains a single integer Q (Q <= 500) , the number of questions Daniel will be asked. In the next Q lines a single integer K is given (0 < K < 2^31).

Output

Output consists of Q lines, the i-th contains a string which is the answer to the i-th asked question.

输入输出格式

输入格式:

输出格式:

输入输出样例

输入样例#1:

1
2
3
4
aaa
2
2
3

输出样例#1:

1
2
aa
aaa

题解

后缀数组和后缀自动机都能做

后缀数组只需要每次按照排好序的后缀枚举,记得用$h$数组去重即可不断分治缩小$k$。
如果想提高效率不妨对$n - sa_i + 1 - h_i$做一个前缀和后二分出第一个大于等于$k$的$i$,就能确定子串。

时间复杂度$O(nlogn+Tlogn)$

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <cstring>
#define maxn 100055
int sa[maxn] , rk[maxn] , c[maxn] , pos[maxn] , h[maxn] , n , m , ct;
char s[maxn];
inline void sort()
{
for(int i = 1 ; i <= m ; ++i) c[i] = 0;
for(int i = 1 ; i <= n ; ++i) ++c[rk[pos[i]]];
for(int i = 1 ; i <= m ; ++i) c[i] += c[i-1];
for(int i = ct ; i >= 1 ; --i) sa[c[rk[pos[i]]]--] = pos[i];
}

inline void SA()
{
for(int i = 1 ; i <= n ; ++i)
pos[i] = i , rk[i] = s[i];
ct = n , m = 10000;
sort();
for(int k = 1 ; k <= n ; k <<= 1)
{
ct = 0;
for(int i = 1 ; i <= k ; ++i) pos[++ct] = n - k + i;
for(int i = 1 ; i <= n ; ++i) if(sa[i] > k) pos[++ct] = sa[i] - k;
sort();
std::swap(rk , pos);
rk[sa[1]] = ct = 1;
for(int i = 2 ; i <= n ; ++i)
rk[sa[i]] = (pos[sa[i]] == pos[sa[i-1]] && pos[sa[i]+k] == pos[sa[i-1]+k]) ? ct : ++ct;
if((m = ct) == n) break;
}
int k = 0;
for(int i = 1 ; i <= n ; ++i)
{
if(k) --k;
for(int p = sa[rk[i]-1] ; s[i+k] == s[p+k] ; ++k);
h[rk[i]] = k;
}
}

inline void query(int k)
{
int l = 0 , r = 0;
for(int i = 1 ; i <= n ; ++i)
{
if(n - sa[i] + 1 - h[i] < k) k -= n - sa[i] + 1 - h[i];
else{
l = sa[i] , r = sa[i] + k - 1 + h[i];
break;
}
}
for(int i = l ; i <= r ; ++i) putchar(s[i]);
putchar(10);
}

int main()
{
scanf("%s",s+1);
n = std::strlen(s+1);
SA();
int t;
scanf("%d",&t);
while(t--)
{
int k;
scanf("%d",&k);
query(k);
}
}

后缀自动机则需要根据$Parent~Tree$的平凡性质:一个状态的$Endpos$是它子树$Endpos$的并。
然后树上递推出子树和后和SA一样的分治求第$K$大策略。
不过后缀自动机求第$k$大没有优化空间(暂时没想出来),每次$O(answer)=O(n)$的。

CF25E Test

题意翻译

给定3个字符串s1,s2,s3,试求一个字符串,使s1,s2,s3都是这个字符串的子串,并使这个字符串最短。输出最短字符串的长度l。 s1,s2,s3长度<=100000

Translated by 稀神探女

题目描述

Sometimes it is hard to prepare tests for programming problems. Now Bob is preparing tests to new problem about strings — input data to his problem is one string. Bob has 3 wrong solutions to this problem. The first gives the wrong answer if the input data contains the substring s{1}s1 , the second enters an infinite loop if the input data contains the substring s{2}s2 , and the third requires too much memory if the input data contains the substring s_{3}s3 . Bob wants these solutions to fail single test. What is the minimal length of test, which couldn’t be passed by all three Bob’s solutions?

输入输出格式

输入格式:

There are exactly 3 lines in the input data. The ii -th line contains string s_{i}s**i . All the strings are non-empty, consists of lowercase Latin letters, the length of each string doesn’t exceed 10^{5}105 .

输出格式:

Output one number — what is minimal length of the string, containing s{1}s1 , s{2}s2 and s_{3}s3 as substrings.

输入输出样例

输入样例#1:

1
2
3
ab
bc
cd

输出样例#1:

1
4

输入样例#2:

1
2
3
abacaba
abaaba
x

输出样例#2:

1
11

题解

本来以为是道和SAM有关的难题,结果发现是道sb题。

只需要处理两两间收尾重合长度(用Hash),然后枚举各种情况去最小就好了。

(不过被傻逼细节卡了弃了


SP1812 LCS2 - Longest Common Substring II

题意翻译

题面描述 给定一些字符串,求出它们的最长公共子串 输入格式 输入至多$10$ 行,每行包含不超过$100000$ 个的小写字母,表示一个字符串 输出格式 一个数,最长公共子串的长度 若不存在最长公共子串,请输出$0$ 。

题目描述

A string is finite sequence of characters over a non-empty finite set Σ.

In this problem, $\sum$ is the set of lowercase letters.

Substring, also called factor, is a consecutive sequence of characters occurrences at least once in a string.

Now your task is a bit harder, for some given strings, find the length of the longest common substring of them.

Here common substring means a substring of two or more strings.

输入输出格式

输入格式:

The input contains at most 10 lines, each line consists of no more than 100000 lowercase letters, representing a string.

输出格式:

The length of the longest common substring. If such string doesn’t exist, print “0” instead.

输入输出样例

输入样例#1:

1
2
3
alsdfkjfjkdsal
fdjskalajfkdsla
aaaajfaaaa

输出样例#1:

1
2

题解

一道有些扩展的$SAM$应用题。

由于最长公共子串一定存在于任意一个串,所以我们对任意一个串建后缀自动机。

然后考虑如何匹配所有的串。

多个串的公共子串对于每一个等价类,应该是每个串匹配最大长度取最小。

然后用每个状态的所有串最大匹配最小值更新答案,就做完了这道题。

(我实在不知道该怎么证明这种题做法的正确性,大概归纳证明?)

代码又调了一年,不要思维混乱的初始化错值,取最大变取最小,取最小变取最大,etc..

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <cstring>
#define maxn 400005
int ln[maxn] , lk[maxn] , ch[maxn][26] , n , m , tot = 1 , la = 1 , ml[maxn] , mxl[maxn] , c[maxn] , tp[maxn] , cur = 1, curans;
char s[maxn];

inline void ins(int c)
{
int cur = ++tot;
ln[cur] = ln[la] + 1 ;
int p = la;
for( ; !ch[p][c] && p ; ch[p][c] = cur , p = lk[p]);
if(!p) lk[cur] = 1;
else
{
int q = ch[p][c];
if(ln[p] + 1 == ln[q]) lk[cur] = q;
else
{
int nq = ++tot;
//copy transfer edges
for(int i = 0 ; i < 26 ; ++i) ch[nq][i] = ch[q][i];
// set up new status
ln[nq] = ln[p] + 1 , lk[nq] = lk[q] , lk[cur] = lk[q] = nq;
//modify DAWG
for( ; ch[p][c] == q && p ; ch[p][c] = nq , p = lk[p]);
}
}
// printf("SAM:%d\n",lk[cur]);
la = cur;
}


inline void sort()
{
for(int i = 1 ; i <= tot ; ++i) ++c[ln[i]];
for(int i = 1 ; i <= n ; ++i) c[i] += c[i-1];
for(int i = 1 ; i <= tot ; ++i) tp[c[ln[i]]--] = i;
}

inline void upd(int c)
{
// puts("OK");
while(!ch[cur][c] && cur) cur = lk[cur] , curans = ln[cur] , mxl[cur] = std::max(mxl[cur] , curans);
if(!cur) { cur = 1 , curans = 0 ; return ; }
else cur = ch[cur][c] , ++curans , mxl[cur] = std::max(mxl[cur] , curans);
// printf("DEBUG :%d\n",curans);
}
inline void work(char* s)
{
cur = 1 , curans = 0;
for(int i = 1 ; i <= n ; ++i)
upd(s[i] - 'a');
for(int i = tot ; i >= 1 ; --i)
mxl[lk[tp[i]]] = std::max(mxl[lk[tp[i]]] , std::min(ln[lk[tp[i]]] , mxl[tp[i]])) ,
ml[tp[i]] = std::min(ml[tp[i]] , mxl[tp[i]]) , mxl[tp[i]] = 0;
}


int main()
{
// freopen("data.in","r",stdin);
// freopen("my.out","w",stdout);
std::memset(ml,0x3f,sizeof(ml));
int asfasd = 0;
while(scanf("%s",s+1) != EOF)
{
++asfasd;
n = std::strlen(s+1);
if(asfasd == 1)
{
for(int i = 1 ; i <= n ; ++i)
ins(s[i] - 'a');
sort();
}
else work(s);
}
int ans = 0;
for(int i = 1 ; i <= tot ; ++i)
ans = std::max(ans , ml[i]);
printf("%d\n",ans);
}