Longest Common Substring

核心精神

Recurrence formula 會是這種形式：

``````if s1[i] == s2[j]
dp[i][j] = 1 + dp[i-1][j-1]
else
dp[i][j] = 0
``````

dp[i][j] 存的是 s1[0]-s1[i] 跟 s2[0]-s2[j]，有包含 s1[i] 跟 s2[j] 這兩個 character 的 longest common substring 長度。

簡介

``````using namespace std;

#include <iostream>
#include <string>

class LCS {
public:
int findLCSLength(const string &s1, const string &s2) {
return findLCSLengthRecursive(s1, s2, 0, 0, 0);
}

private:
int findLCSLengthRecursive(const string &s1, const string &s2, int i1, int i2, int count) {
if (i1 == s1.length() || i2 == s2.length()) {
return count;
}
if (s1[i1] == s2[i2]) {
count = findLCSLengthRecursive(s1, s2, i1 + 1, i2 + 1, count + 1);
}
int c1 = findLCSLengthRecursive(s1, s2, i1, i2 + 1, 0);
int c2 = findLCSLengthRecursive(s1, s2, i1 + 1, i2, 0);
return max(count, max(c1, c2));
}
};

int main(int argc, char *argv[]) {
LCS *lcs = new LCS();
cout << lcs->findLCSLength("abdca", "cbda") << endl;
cout << lcs->findLCSLength("passport", "ppsspt") << endl;

delete lcs;
}
``````

Memoization 的程式碼如下：

``````using namespace std;

#include <iostream>
#include <string>
#include <vector>

class LCS {

public:
int findLCSLength(const string &s1, const string &s2) {
int maxLength = max(s1.length(), s2.length());
vector<vector<vector<int>>> dp(s1.length(),
vector<vector<int>>(s2.length(), vector<int>(maxLength, -1)));
return findLCSLengthRecursive(dp, s1, s2, 0, 0, 0);
}

private:
int findLCSLengthRecursive(vector<vector<vector<int>>> &dp, const string &s1, const string &s2,
int i1, int i2, int count) {
if (i1 == s1.length() || i2 == s2.length()) {
return count;
}

if (dp[i1][i2][count] == -1) {
int c1 = count;
if (s1[i1] == s2[i2]) {
c1 = findLCSLengthRecursive(dp, s1, s2, i1 + 1, i2 + 1, count + 1);
}
int c2 = findLCSLengthRecursive(dp, s1, s2, i1, i2 + 1, 0);
int c3 = findLCSLengthRecursive(dp, s1, s2, i1 + 1, i2, 0);
dp[i1][i2][count] = max(c1, max(c2, c3));
}

return dp[i1][i2][count];
}
};

int main(int argc, char *argv[]) {
LCS *lcs = new LCS();
cout << lcs->findLCSLength("abdca", "cbda") << endl;
cout << lcs->findLCSLength("passport", "ppsspt") << endl;

delete lcs;
}
``````

DP 解的程式碼是：

``````using namespace std;

#include <iostream>
#include <string>
#include <vector>

class LCS {

public:
int findLCSLength(const string &s1, const string &s2) {
vector<vector<int>> dp(s1.length() + 1, vector<int>(s2.length() + 1));
int maxLength = 0;
for (int i = 1; i <= s1.length(); i++) {
for (int j = 1; j <= s2.length(); j++) {
if (s1[i - 1] == s2[j - 1]) {
dp[i][j] = 1 + dp[i - 1][j - 1];
maxLength = max(maxLength, dp[i][j]);
}
}
}
return maxLength;
}
};

int main(int argc, char *argv[]) {
LCS *lcs = new LCS();
cout << lcs->findLCSLength("abdca", "cbda") << endl;
cout << lcs->findLCSLength("passport", "ppsspt") << endl;

delete lcs;
}
``````

DP 解還可以進一步優化，只用兩個 row 就好：

``````using namespace std;

#include <iostream>
#include <string>
#include <vector>

class LCS {
public:
int findLCSLength(const string &s1, const string &s2) {
vector<vector<int>> dp(2, vector<int>(s2.length() + 1));

int maxLength = 0;
for (int i = 1; i <= s1.length(); i++) {
for (int j = 1; j <= s2.length(); j++) {
// Have to set dp[i%2][j] = 0
// Because s1[i-1] might not equal to s2[j-1]
// But previous result in dp[i%2][j] might not be 0
dp[i%2][j] = 0;

if (s1[i - 1] == s2[j - 1]) {
dp[i%2][j] = 1 + dp[(i - 1)%2][j - 1];
maxLength = max(maxLength, dp[i%2][j]);
}
}
}

return maxLength;
}
};
``````