From 002215303a0fda40e81c459bf5ce8323304f9ca9 Mon Sep 17 00:00:00 2001 From: robonen Date: Thu, 11 Apr 2024 00:11:29 +0700 Subject: [PATCH] feat(packages/stdlib): levenshtein distance util --- packages/stdlib/src/text/index.ts | 1 + .../text/levenshtein-distance/index.test.ts | 32 ++++++++++++++ .../src/text/levenshtein-distance/index.ts | 44 +++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 packages/stdlib/src/text/index.ts create mode 100644 packages/stdlib/src/text/levenshtein-distance/index.test.ts create mode 100644 packages/stdlib/src/text/levenshtein-distance/index.ts diff --git a/packages/stdlib/src/text/index.ts b/packages/stdlib/src/text/index.ts new file mode 100644 index 0000000..106ddd8 --- /dev/null +++ b/packages/stdlib/src/text/index.ts @@ -0,0 +1 @@ +export * from './levenshtein-distance'; \ No newline at end of file diff --git a/packages/stdlib/src/text/levenshtein-distance/index.test.ts b/packages/stdlib/src/text/levenshtein-distance/index.test.ts new file mode 100644 index 0000000..4989e5b --- /dev/null +++ b/packages/stdlib/src/text/levenshtein-distance/index.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from 'vitest'; +import levenshteinDistance from '.'; + +describe('levenshteinDistance', () => { + it('should calculate edit distance between two strings', () => { + // just one substitution I at the beginning + expect(levenshteinDistance('islander', 'slander')).toBe(1); + + // substitution M->K, T->M and add an A to the end + expect(levenshteinDistance('mart', 'karma')).toBe(3); + + // substitution K->S, E->I and insert G at the end + expect(levenshteinDistance('kitten', 'sitting')).toBe(3); + + // should add 4 letters FOOT at the beginning + expect(levenshteinDistance('ball', 'football')).toBe(4); + + // should delete 4 letters FOOT at the beginning + expect(levenshteinDistance('football', 'foot')).toBe(4); + + // needs to substitute the first 5 chars INTEN->EXECU + expect(levenshteinDistance('intention', 'execution')).toBe(5); + }); + + it('should handle edge cases', () => { + expect(levenshteinDistance('', '')).toBe(0); + expect(levenshteinDistance('a', '')).toBe(1); + expect(levenshteinDistance('', 'a')).toBe(1); + expect(levenshteinDistance('abc', '')).toBe(3); + expect(levenshteinDistance('', 'abc')).toBe(3); + }); + }); \ No newline at end of file diff --git a/packages/stdlib/src/text/levenshtein-distance/index.ts b/packages/stdlib/src/text/levenshtein-distance/index.ts new file mode 100644 index 0000000..48728e2 --- /dev/null +++ b/packages/stdlib/src/text/levenshtein-distance/index.ts @@ -0,0 +1,44 @@ +/** + * Calculate the Levenshtein distance between two strings + * + * @param {string} a First string + * @param {string} b Second string + * @returns {number} The Levenshtein distance between the two strings + */ +export default function levenshteinDistance(a: string, b: string): number { + // If the strings are equal, the distance is 0 + if (a === b) return 0; + + // If either string is empty, the distance is the length of the other string + if (a.length === 0) return b.length; + if (b.length === 0) return a.length; + + // Create empty edit distance matrix for all possible modifications of + // substrings of a to substrings of b + const distanceMatrix = Array(b.length + 1).fill(null).map(() => Array(a.length + 1).fill(null)); + + // Fill the first row of the matrix + // If this is the first row, we're transforming from an empty string to a + // In this case, the number of operations equals the length of a substring + for (let i = 0; i <= a.length; i++) + distanceMatrix[0]![i]! = i; + + // Fill the first column of the matrix + // If this is the first column, we're transforming empty string to b + // In this case, the number of operations equals the length of b substring + for (let j = 0; j <= b.length; j++) + distanceMatrix[j]![0]! = j; + + for (let j = 1; j <= b.length; j++) { + for (let i = 1; i <= a.length; i++) { + const indicator = a[i - 1] === b[j - 1] ? 0 : 1; + distanceMatrix[j]![i]! = Math.min( + distanceMatrix[j]![i - 1]! + 1, // deletion + distanceMatrix[j - 1]![i]! + 1, // insertion + distanceMatrix[j - 1]![i - 1]! + indicator // substitution + ); + } + } + + return distanceMatrix[b.length]![a.length]!; +}