Longest Common Subsequence

"""
1143. Longest Common Subsequence

https://leetcode.com/problems/longest-common-subsequence

NOTES
  * Use dynamic programming (2D) or recursion.

  * A common subsequence is a sequence of letters that appears in both strings.
    Not every letter in the strings has to be used, but letters cannot be
    rearranged. In essence, a subsequence of a string 's' is a string we get by
    deleting some letters in 's'.

  * The most obvious approach would be to iterate through each subsequence of
    the first string and check whether or not it is also a subsequence of the
    second string. This, however, will require exponential time to run. The
    number of subsequences in a string is up to 2^L, where L is the length of
    the string.

  * There are a couple of strategies we use to design a tractable
    (non-exponential) algorithm for an optimization problem:

      1. Identifying a greedy algorithm
      2. Dynamic programming

  * There is no guarantee that either is possible. Additionally, greedy
    algorithms are strictly less common than dynamic programming algorithms and
    are often more difficult to identify. However, if a greedy algorithm
    exists, then it will almost always be better than a dynamic programming
    one. You should, therefore, at least give some thought to the potential
    existence of a greedy algorithm before jumping straight into dynamic
    programming.

  * Recall that there are two different techniques we can use to implement a
    dynamic programming solution; memoization and tabulation.

      * Memoization is where we add caching to a function (that has no side
        effects). In dynamic programming, it is typically used on recursive
        functions for a top-down solution that starts with the initial problem
        and then recursively calls itself to solve smaller problems.

      * Tabulation uses a table to keep track of subproblem results and works
        in a bottom-up manner: solving the smallest subproblems before the
        large ones, in an iterative manner. Often, people use the words
        "tabulation" and "dynamic programming" interchangeably.
"""

from typing import List


class Solution:
    """
    Observe that the subproblems have a natural "size" ordering; the largest
    subproblem is the one we start with, and the smallest subproblems are the
    ones with just one letter left in each word. The answer for each subproblem
    depends on the answers to some of the smaller subproblems.

    Remembering too that each subproblem is represented as a pair of indexes,
    and that there are text1.length() * text2.length() such possible
    subproblems, we can iterate through the subproblems, starting from the
    smallest ones, and storing the answer for each. When we get to the larger
    subproblems, the smaller ones that they depend on will already have been
    solved. The best way to do this is to use a 2D array.

    Remembering back to the memoization solution, there were two cases.

      1. The first letter of both strings are the same.
      2. The first letter of both strings are *not* the same.
    """

    def longestCommonSubsequence(self, text1: str, text2: str) -> int:
        # Initializing the table to 0 allows us to calculate the current
        # subproblem from previous subproblems.
        #
        #     a b c d e -    i →
        #   a 0 0 0 0 0 0  j
        #   c 0 0 0 0 0 0  ↓
        #   e 0 0 0 0 0 0
        #   - 0 0 0 0 0 0
        #
        #     a b c d e -    i →
        #   a 3 2 2 1 1 0  j
        #   c 2 2 2 1 1 0  ↓
        #   e 1 1 1 1 1 0
        #   - 0 0 0 0 0 0
        #
        # where a,a is (0,0) and e,e is (5,3) (for i,j).
        col, row = len(text1) + 1, len(text2) + 1
        dp: List[List[int]] = [[0 for _ in range(col)] for _ in range(row)]

        # Iterate over the table in reverse (first by column, then by row).
        for i in reversed(range(len(text2))):
            for j in reversed(range(len(text1))):
                # 1. The first letter of both strings are the same.
                if text1[j] == text2[i]:
                    dp[i][j] = 1 + dp[i + 1][j + 1]
                # 2. The first letter of both strings are *not* the same.
                else:
                    dp[i][j] = max(dp[i][j + 1], dp[i + 1][j])
        # NOTE: Uncomment to print the result of the table.
        # for r in dp:
        #     print(r)
        return dp[0][0]


class MemoizationSolution:
    """
    Some subproblems may be visited multiple times. As such, we should memoize
    the results of `lcs()` calls so that the answers of previously computed
    subproblems can immediately be returned without the need for
    re-computation.
    """

    def longestCommonSubsequence(self, text1: str, text2: str) -> int:
        # Initializing the memoization table to -1 allows us to determine
        # whether or not the value has been calculated.
        #
        #     a b c d e    i →
        #   a . . . . .  j
        #   c . . . . .  ↓
        #   e . . . . .
        #
        # where a,a is (0,0) and e,e is (5,3) (for i,j).
        col, row = len(text1), len(text2)
        memo: List[List[int]] = [[-1 for _ in range(col)] for _ in range(row)]

        def lcs(s1: str, s2: str, memo: List[List[int]]) -> int:
            col, row = len(memo[0]), len(memo)
            if s1 == "" or s2 == "":
                return 0
            # Check whether we've already solved the given subproblem.
            i, j = row - len(s2), col - len(s1)
            if memo[i][j] != -1:
                return memo[i][j]
            if s1[0] == s2[0]:
                memo[i][j] = 1 + lcs(s1[1:], s2[1:], memo)
            else:
                memo[i][j] = max(lcs(s1[0:], s2[1:], memo), lcs(s1[1:], s2[0:], memo))
            return memo[i][j]

        return lcs(text1, text2, memo)


class RecursiveSolution:
    """
    For problems like this, it is helpful to first break the problem down into
    its smallest subproblems and build up a solution from there.

    For example:

      Given two strings "abcde" (text1) and "ace" (text2), we can first break
      the problem down into the following base cases:

        1. If text1 = "" and text2 = "", return 0
        2. If text1 = "a" and text2 = "", return 0
        3. If text1 = "" and text2 = "a", return 0

      Next, we build upon the base cases with simplified cases for the general
      case:

        1. If text1 = "a" and text2 = "a", return 1
        2. If text1 = "ab" and text2 = "ac", return 1 + lcs("b", "c")
        3. If text1 = "bc" and text2 = "ac", return
           max(lcs("bc", "c"), lcs("c", "ac"))

      The third case is the most tricky to intuit, however, we are simply
      determining the longest subsquence using the next character in both
      strings.

      Finally, we formalize the above cases in code.

    This solution is O(M x N), where where `M` is the length of the first
    string and `N` is the length of the second string.

    NOTE: This solution exceeds the time limit.
    """

    def longestCommonSubsequence(self, text1: str, text2: str) -> int:
        def lcs(s1: str, s2: str) -> int:
            if s1 == "" or s2 == "":
                return 0
            if s1[0] == s2[0]:
                return 1 + lcs(s1[1:], s2[1:])
            return max(lcs(s1[0:], s2[1:]), lcs(s1[1:], s2[0:]))

        return lcs(text1, text2)
grind.rip