Longest Common Substring
核心精神
Recurrence formula 會是這種形式:
if s1[i] == s2[j]
dp[i][j] = 1 + dp[i-1][j-1]
else
dp[i][j] = 0
dp[i][j] 存的是 s1[0]-s1[i] 跟 s2[0]-s2[j],有包含 s1[i] 跟 s2[j] 這兩個 character 的 longest common substring 長度。
如果兩邊的 character 一樣,那就可以看兩邊各往前一個 character,最大的 common substring 是多長(也就是 dp[i-1][j-1])。而如果兩邊的 character 不一樣,那 dp[i][j] 就得是 0(如果是 longest common subsequence,dp[i][j] 就會是 max(dp[i-1][j], dp[i][j-1]) )。
簡介
暴力法的程式碼如下:
using namespace std;
#include <iostream>
#include <string>
class LCS {
public:
int findLCSLength(const string &s1, const string &s2) {
return findLCSLengthRecursive(s1, s2, 0, 0, 0);
}
private:
int findLCSLengthRecursive(const string &s1, const string &s2, int i1, int i2, int count) {
if (i1 == s1.length() || i2 == s2.length()) {
return count;
}
if (s1[i1] == s2[i2]) {
count = findLCSLengthRecursive(s1, s2, i1 + 1, i2 + 1, count + 1);
}
int c1 = findLCSLengthRecursive(s1, s2, i1, i2 + 1, 0);
int c2 = findLCSLengthRecursive(s1, s2, i1 + 1, i2, 0);
return max(count, max(c1, c2));
}
};
int main(int argc, char *argv[]) {
LCS *lcs = new LCS();
cout << lcs->findLCSLength("abdca", "cbda") << endl;
cout << lcs->findLCSLength("passport", "ppsspt") << endl;
delete lcs;
}
Memoization 的程式碼如下:
using namespace std;
#include <iostream>
#include <string>
#include <vector>
class LCS {
public:
int findLCSLength(const string &s1, const string &s2) {
int maxLength = max(s1.length(), s2.length());
vector<vector<vector<int>>> dp(s1.length(),
vector<vector<int>>(s2.length(), vector<int>(maxLength, -1)));
return findLCSLengthRecursive(dp, s1, s2, 0, 0, 0);
}
private:
int findLCSLengthRecursive(vector<vector<vector<int>>> &dp, const string &s1, const string &s2,
int i1, int i2, int count) {
if (i1 == s1.length() || i2 == s2.length()) {
return count;
}
if (dp[i1][i2][count] == -1) {
int c1 = count;
if (s1[i1] == s2[i2]) {
c1 = findLCSLengthRecursive(dp, s1, s2, i1 + 1, i2 + 1, count + 1);
}
int c2 = findLCSLengthRecursive(dp, s1, s2, i1, i2 + 1, 0);
int c3 = findLCSLengthRecursive(dp, s1, s2, i1 + 1, i2, 0);
dp[i1][i2][count] = max(c1, max(c2, c3));
}
return dp[i1][i2][count];
}
};
int main(int argc, char *argv[]) {
LCS *lcs = new LCS();
cout << lcs->findLCSLength("abdca", "cbda") << endl;
cout << lcs->findLCSLength("passport", "ppsspt") << endl;
delete lcs;
}
初始化的方式是 i == 0 跟 j == 0 時,Longest common substring 的長度都是 0:
最後算出來的 DP table 是:
DP 解的程式碼是:
using namespace std;
#include <iostream>
#include <string>
#include <vector>
class LCS {
public:
int findLCSLength(const string &s1, const string &s2) {
vector<vector<int>> dp(s1.length() + 1, vector<int>(s2.length() + 1));
int maxLength = 0;
for (int i = 1; i <= s1.length(); i++) {
for (int j = 1; j <= s2.length(); j++) {
if (s1[i - 1] == s2[j - 1]) {
dp[i][j] = 1 + dp[i - 1][j - 1];
maxLength = max(maxLength, dp[i][j]);
}
}
}
return maxLength;
}
};
int main(int argc, char *argv[]) {
LCS *lcs = new LCS();
cout << lcs->findLCSLength("abdca", "cbda") << endl;
cout << lcs->findLCSLength("passport", "ppsspt") << endl;
delete lcs;
}
DP 解還可以進一步優化,只用兩個 row 就好:
using namespace std;
#include <iostream>
#include <string>
#include <vector>
class LCS {
public:
int findLCSLength(const string &s1, const string &s2) {
vector<vector<int>> dp(2, vector<int>(s2.length() + 1));
int maxLength = 0;
for (int i = 1; i <= s1.length(); i++) {
for (int j = 1; j <= s2.length(); j++) {
// Have to set dp[i%2][j] = 0
// Because s1[i-1] might not equal to s2[j-1]
// But previous result in dp[i%2][j] might not be 0
dp[i%2][j] = 0;
if (s1[i - 1] == s2[j - 1]) {
dp[i%2][j] = 1 + dp[(i - 1)%2][j - 1];
maxLength = max(maxLength, dp[i%2][j]);
}
}
}
return maxLength;
}
};
Last updated