Longest Common Substring

核心精神

Recurrence formula 會是這種形式:

if s1[i] == s2[j] 
  dp[i][j] = 1 + dp[i-1][j-1]
else 
  dp[i][j] = 0

dp[i][j] 存的是 s1[0]-s1[i] 跟 s2[0]-s2[j],有包含 s1[i] 跟 s2[j] 這兩個 character 的 longest common substring 長度。

如果兩邊的 character 一樣,那就可以看兩邊各往前一個 character,最大的 common substring 是多長(也就是 dp[i-1][j-1])。而如果兩邊的 character 不一樣,那 dp[i][j] 就得是 0(如果是 longest common subsequence,dp[i][j] 就會是 max(dp[i-1][j], dp[i][j-1]) )。

簡介

暴力法的程式碼如下:

using namespace std;

#include <iostream>
#include <string>

class LCS {
public:
  int findLCSLength(const string &s1, const string &s2) {
    return findLCSLengthRecursive(s1, s2, 0, 0, 0);
  }

private:
  int findLCSLengthRecursive(const string &s1, const string &s2, int i1, int i2, int count) {
    if (i1 == s1.length() || i2 == s2.length()) {
      return count;
    }
    if (s1[i1] == s2[i2]) {
      count = findLCSLengthRecursive(s1, s2, i1 + 1, i2 + 1, count + 1);
    }
    int c1 = findLCSLengthRecursive(s1, s2, i1, i2 + 1, 0);
    int c2 = findLCSLengthRecursive(s1, s2, i1 + 1, i2, 0);
    return max(count, max(c1, c2));
  }
};

int main(int argc, char *argv[]) {
  LCS *lcs = new LCS();
  cout << lcs->findLCSLength("abdca", "cbda") << endl;
  cout << lcs->findLCSLength("passport", "ppsspt") << endl;

  delete lcs;
}

Memoization 的程式碼如下:

using namespace std;

#include <iostream>
#include <string>
#include <vector>

class LCS {

public:
  int findLCSLength(const string &s1, const string &s2) {
    int maxLength = max(s1.length(), s2.length());
    vector<vector<vector<int>>> dp(s1.length(),
                                   vector<vector<int>>(s2.length(), vector<int>(maxLength, -1)));
    return findLCSLengthRecursive(dp, s1, s2, 0, 0, 0);
  }

private:
  int findLCSLengthRecursive(vector<vector<vector<int>>> &dp, const string &s1, const string &s2,
                             int i1, int i2, int count) {
    if (i1 == s1.length() || i2 == s2.length()) {
      return count;
    }

    if (dp[i1][i2][count] == -1) {
      int c1 = count;
      if (s1[i1] == s2[i2]) {
        c1 = findLCSLengthRecursive(dp, s1, s2, i1 + 1, i2 + 1, count + 1);
      }
      int c2 = findLCSLengthRecursive(dp, s1, s2, i1, i2 + 1, 0);
      int c3 = findLCSLengthRecursive(dp, s1, s2, i1 + 1, i2, 0);
      dp[i1][i2][count] = max(c1, max(c2, c3));
    }

    return dp[i1][i2][count];
  }
};

int main(int argc, char *argv[]) {
  LCS *lcs = new LCS();
  cout << lcs->findLCSLength("abdca", "cbda") << endl;
  cout << lcs->findLCSLength("passport", "ppsspt") << endl;

  delete lcs;
}

初始化的方式是 i == 0 跟 j == 0 時,Longest common substring 的長度都是 0:

最後算出來的 DP table 是:

DP 解的程式碼是:

using namespace std;

#include <iostream>
#include <string>
#include <vector>

class LCS {

public:
  int findLCSLength(const string &s1, const string &s2) {
    vector<vector<int>> dp(s1.length() + 1, vector<int>(s2.length() + 1));
    int maxLength = 0;
    for (int i = 1; i <= s1.length(); i++) {
      for (int j = 1; j <= s2.length(); j++) {
        if (s1[i - 1] == s2[j - 1]) {
          dp[i][j] = 1 + dp[i - 1][j - 1];
          maxLength = max(maxLength, dp[i][j]);
        }
      }
    }
    return maxLength;
  }
};

int main(int argc, char *argv[]) {
  LCS *lcs = new LCS();
  cout << lcs->findLCSLength("abdca", "cbda") << endl;
  cout << lcs->findLCSLength("passport", "ppsspt") << endl;

  delete lcs;
}

DP 解還可以進一步優化,只用兩個 row 就好:

using namespace std;

#include <iostream>
#include <string>
#include <vector>

class LCS {
public:
  int findLCSLength(const string &s1, const string &s2) {
    vector<vector<int>> dp(2, vector<int>(s2.length() + 1));

    int maxLength = 0;
    for (int i = 1; i <= s1.length(); i++) {
      for (int j = 1; j <= s2.length(); j++) {
        // Have to set dp[i%2][j] = 0
        // Because s1[i-1] might not equal to s2[j-1]
        // But previous result in dp[i%2][j] might not be 0
        dp[i%2][j] = 0;

        if (s1[i - 1] == s2[j - 1]) {
          dp[i%2][j] = 1 + dp[(i - 1)%2][j - 1];
          maxLength = max(maxLength, dp[i%2][j]);
        }
      }
    }

    return maxLength;
  }
};

Last updated