一、内容
Great! Your new software is almost finished! The only thing left to do is archiving all your n resource files into a big one.
Wait a minute… you realized that it isn’t as easy as you thought. Think about the virus killers. They’ll find your software suspicious, if your software contains one of the m predefined virus codes. You absolutely don’t want this to happen.
Technically, resource files and virus codes are merely 01 strings. You’ve already convinced yourself that none of the resource strings contain a virus code, but if you make the archive arbitrarily, virus codes can still be found somewhere.
Here comes your task (formally): design a 01 string that contains all your resources (their occurrences can overlap), but none of the virus codes. To make your software smaller in size, the string should be as short as possible.
Input
There will be at most 10 test cases, each begins with two integers in a single line: n and m (2 <= n <= 10, 1 <= m <= 1000). The next n lines contain the resources, one in each line. The next m lines contain the virus codes, one in each line. The resources and virus codes are all non-empty 01 strings without spaces inside. Each resource is at most 1000 characters long. The total length of all virus codes is at most 50000. The input ends with n = m = 0.
Output
For each test case, print the length of shortest string.
Sample Input
2 2
1110
0111
101
1001
0 0
Sample Output
5
二、思路
- 求出不包含病毒串,担包含所有资源串的最短串长度。
- 可以用bfs求出每个资源串结尾的节点(ac自动机上)到另外一个资源串节点的最短距离,那么我们只需要求从ac自动机的根节点出发,遍历所有资源串节点最后的最短距离即是答案,那么这就是一个TSP问题了。只需要dp求一下即可。
三、代码
#include <cstdio>
#include <cstring>
#include <queue>
using namespace std;
const int N = 15, M = 6e4 + 5, INF = 0x3f3f3f3f;
int n, m, len, cnt, tr[M][2], fail[M], ed[M], dis[M], id[N], g[N][N], dp[1 << 12][N]; //g代表个资源串到其他资源串的最短路径
char s[1005];
void add(int id, int st) {
int p = 0;
for (int i = 0; s[i]; i++) {
int j = s[i] - '0';
if (!tr[p][j]) tr[p][j] = ++len;
p = tr[p][j];
}
if (st == -1 || ed[p] == -1) ed[p] = -1; //代表是virus
else ed[p] |= (1 << id);
}
void build() {
queue<int> q;
if (tr[0][0]) q.push(tr[0][0]);
if (tr[0][1]) q.push(tr[0][1]);
while (!q.empty()) {
int p = q.front(); q.pop();
for (int j = 0; j < 2; j++) {
int c = tr[p][j];
if (!c) tr[p][j] = tr[fail[p]][j];
else {
if (ed[fail[c]] == -1) ed[c] = -1; //若包含了virus 自然变成-1
else ed[c] |= ed[fail[c]]; //若子串有资源串,包含上
q.push(c);
fail[c] = tr[fail[p]][j];
}
}
}
}
void bfs(int s) {
memset(dis, -1, sizeof(dis));
dis[id[s]] = 0;
queue<int> q; q.push(id[s]);
while (!q.empty()) {
int p = q.front(); q.pop();
for (int j = 0; j < 2; j++) {
int c = tr[p][j];
if (ed[c] == -1 || dis[c] >= 0) continue;
dis[c] = dis[p] + 1;
q.push(c);
}
}
//更新s 到其他个串的距离
for (int i = 0; i < cnt; i++) {
g[s][i] = dis[id[i]];
}
}
void solve() {
id[0] = 0; cnt = 1; //ac自动机的根节点也算一个点
for (int j = 0; j <= len; j++) {
if (ed[j] > 0) id[cnt++] = j; //代表这个串 在trie上的节点编号
}
for (int i = 0; i < cnt; i++) bfs(i); //求该串到其他串的最短距离
memset(dp, 0x3f, sizeof(dp));
dp[0][0] = 0; //以0为起点
for (int i = 0; i < (1 << n); i++) {
for (int j = 0; j < cnt; j++) {
if (dp[i][j] == INF) continue;
for (int k = 0; k < cnt; k++) {
int st = (i | ed[id[k]]);
if (g[j][k] == -1 || i == st) continue;
dp[st][k] = min(dp[st][k], dp[i][j] + g[j][k]);
}
}
}
//找一个最小的距离
int ans = INF;
for (int j = 0; j < cnt; j++) ans = min(ans, dp[(1 << n) - 1][j]);
printf("%d\n", ans);
}
int main() {
while (scanf("%d%d", &n, &m), n) {
memset(tr, 0, sizeof(tr)); len = 0;
memset(ed, 0, sizeof(ed));
memset(fail, 0, sizeof(fail));
for (int i = 0; i < n; i++) {
scanf("%s", s); add(i, 1);
}
for (int i = 0; i < m; i++) {
scanf("%s", s); add(i, -1);
}
build();
solve();
}
return 0;
}