From 5045852e68f5a2cb52f09a0b2c4e422cbf567607 Mon Sep 17 00:00:00 2001 From: robonen Date: Thu, 11 Apr 2024 21:18:03 +0700 Subject: [PATCH] feat(packages/stdlib): add trigram distance utill --- .../stdlib/src/text/trigram-distance/index.ts | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 packages/stdlib/src/text/trigram-distance/index.ts diff --git a/packages/stdlib/src/text/trigram-distance/index.ts b/packages/stdlib/src/text/trigram-distance/index.ts new file mode 100644 index 0000000..186f055 --- /dev/null +++ b/packages/stdlib/src/text/trigram-distance/index.ts @@ -0,0 +1,49 @@ +export type Trigrams = Map; + +/** + * Extracts trigrams from a text and returns a map of trigram to count + * + * @param {string} text The text to extract trigrams + * @returns {Trigrams} A map of trigram to count + */ +export function trigramProfile(text: string): Trigrams { + text = '\n\n' + text + '\n\n'; + + const trigrams = new Map(); + + for (let i = 0; i < text.length - 2; i++) { + const trigram = text.slice(i, i + 3); + const count = trigrams.get(trigram) ?? 0; + trigrams.set(trigram, count + 1); + } + + return trigrams; +} + +/** + * Calculates the trigram distance between two strings + * + * @param {Trigrams} left First text trigram profile + * @param {Trigrams} right Second text trigram profile + * @returns {number} The trigram distance between the two strings + */ +export function trigramDistance(left: Trigrams, right: Trigrams) { + let distance = -4; + let total = -4; + + for (const [trigram, left_count] of left) { + total += left_count; + const right_count = right.get(trigram) ?? 0; + distance += Math.abs(left_count - right_count); + } + + for (const [trigram, right_count] of right) { + total += right_count; + const left_count = left.get(trigram) ?? 0; + distance += Math.abs(left_count - right_count); + } + + if (distance < 0) return 0; + + return distance / total; +} \ No newline at end of file