1
0
mirror of https://github.com/robonen/tools.git synced 2026-03-20 10:54:44 +00:00

refactor: change separate tools by category

This commit is contained in:
2025-05-19 17:43:42 +07:00
parent d55737df2f
commit 78fb4da82a
158 changed files with 32 additions and 24 deletions

View File

@@ -0,0 +1,93 @@
import { describe, it, expect } from 'vitest';
import { trigramDistance, trigramProfile } from '.';
describe('trigramProfile', () => {
it('trigram profile of a text with different trigrams', () => {
const different_trigrams = 'hello world';
const profile1 = trigramProfile(different_trigrams);
expect(profile1).toEqual(new Map([
['\n\nh', 1],
['\nhe', 1],
['hel', 1],
['ell', 1],
['llo', 1],
['lo ', 1],
['o w', 1],
[' wo', 1],
['wor', 1],
['orl', 1],
['rld', 1],
['ld\n', 1],
['d\n\n', 1]
]));
});
it('trigram profile of a text with repeated trigrams', () => {
const repeated_trigrams = 'hello hello';
const profile2 = trigramProfile(repeated_trigrams);
expect(profile2).toEqual(new Map([
['\n\nh', 1],
['\nhe', 1],
['hel', 2],
['ell', 2],
['llo', 2],
['lo ', 1],
['o h', 1],
[' he', 1],
['lo\n', 1],
['o\n\n', 1]
]));
});
it('trigram profile of an empty text', () => {
const text = '';
const profile = trigramProfile(text);
expect(profile).toEqual(new Map([
['\n\n\n', 2],
]));
});
});
describe('trigramDistance', () => {
it('zero when comparing the same text', () => {
const profile1 = trigramProfile('hello world');
const profile2 = trigramProfile('hello world');
expect(trigramDistance(profile1, profile2)).toBe(0);
});
it('one for completely different text', () => {
const profile1 = trigramProfile('hello world');
const profile2 = trigramProfile('lorem ipsum');
expect(trigramDistance(profile1, profile2)).toBe(1);
});
it('one for empty text and non-empty text', () => {
const profile1 = trigramProfile('hello world');
const profile2 = trigramProfile('');
expect(trigramDistance(profile1, profile2)).toBe(1);
});
it('approximately 0.5 for similar text', () => {
const profile1 = trigramProfile('hello world');
const profile2 = trigramProfile('hello lorem');
const approx = trigramDistance(profile1, profile2);
expect(approx).toBeGreaterThan(0.45);
expect(approx).toBeLessThan(0.55);
});
it('triangle inequality', () => {
const A = trigramDistance(trigramProfile('metric'), trigramProfile('123ric'));
const B = trigramDistance(trigramProfile('123ric'), trigramProfile('123456'));
const C = trigramDistance(trigramProfile('metric'), trigramProfile('123456'));
expect(A + B).toBeGreaterThanOrEqual(C);
});
});

View File

@@ -0,0 +1,57 @@
export type Trigrams = Map<string, number>;
/**
* @name trigramProfile
* @category Text
* @description Extracts trigrams from a text and returns a map of trigram to count
*
* @param {string} text The text to extract trigrams
* @returns {Trigrams} A map of trigram to count
*
* @since 0.0.1
*/
export function trigramProfile(text: string): Trigrams {
text = '\n\n' + text + '\n\n';
const trigrams = new Map<string, number>();
for (let i = 0; i < text.length - 2; i++) {
const trigram = text.slice(i, i + 3);
const count = trigrams.get(trigram) ?? 0;
trigrams.set(trigram, count + 1);
}
return trigrams;
}
/**
* @name trigramDistance
* @category Text
* @description Calculates the trigram distance between two strings
*
* @param {Trigrams} left First text trigram profile
* @param {Trigrams} right Second text trigram profile
* @returns {number} The trigram distance between the two strings
*
* @since 0.0.1
*/
export function trigramDistance(left: Trigrams, right: Trigrams): number {
let distance = -4;
let total = -4;
for (const [trigram, left_count] of left) {
total += left_count;
const right_count = right.get(trigram) ?? 0;
distance += Math.abs(left_count - right_count);
}
for (const [trigram, right_count] of right) {
total += right_count;
const left_count = left.get(trigram) ?? 0;
distance += Math.abs(left_count - right_count);
}
if (distance < 0) return 0;
return distance / total;
}