webd/language

A library for language processing. Includes string distance function (Levenshtein, Jaro-Winkler,...), stemming, etc.

Maintainers

Package info

github.com/tdebatty/php-language-processing

pkg:composer/webd/language

Statistics

Installs: 175 952

Dependents: 2

Suggesters: 0

Stars: 28

Open Issues: 0

0.4 2026-03-26 14:31 UTC

This package is auto-updated.

Last update: 2026-03-26 14:32:10 UTC


README

GitHub Workflows Packagist Version Packagist Downloads

A PHP library for language processing. Includes string distance function (Levenshtein, Jaro-Winkler, LCS-distance...), stemming, hashing etc.

Installation

composer require webd/language

Usage

// ------------ n-gram string similarity

use webd\language\StringSimilarity\DiceCoefficient;
use webd\language\StringSimilarity\JaccardSimilarity;


$dice = new DiceCoefficient(2);
$jaccard = new JaccardSimilarity(2);

$a = "context";
$b = "contact";

// 0.5
echo $dice->similarity($a, $b) . PHP_EOL;

// context : ["co", "on", "nt", "te", "ex", "xt"]
// contact : ["co", "on", "nt", "ta", "ac", "ct"]
// jacccard similarity : 3 / 9
// 0.33333
echo $jaccard->similarity($a, $b) . PHP_EOL;

// ------------ string distance

use webd\language\StringDistance;

$string1 = "You won 10000$";
$string2 = "You won 15500$";

// 2
echo "Edit distance : " . StringDistance::editDistance($string1, $string2) . PHP_EOL;

// 2
echo "Levenshtein : " . StringDistance::levenshtein($string1, $string2) . PHP_EOL;


$lcs = new \webd\language\LCS($string1, $string2);
// You won 100$
echo $lcs->value() . PHP_EOL;

// 12
echo $lcs->length() . PHP_EOL;

// 4
echo $lcs->distance() . PHP_EOL;

// -------------- jaro-winkler string similarity

// 0.96428571428571
echo "Jaro-Winkler : " . StringDistance::jaroWinkler($string1, $string2) . PHP_EOL;

// 0.98809523809524
echo "Jaro-Winkler (prefix scale = 0.2) : " . StringDistance::jaroWinkler($string1, $string2, 0.2) . PHP_EOL;

// -------------- stemming

use webd\language\PorterStemmer;

// analyz
echo "analyzing => " . PorterStemmer::stem("analyzing") . PHP_EOL;

// abandon
echo "abandoned => " . PorterStemmer::stem("abandoned") . PHP_EOL;

// inclin
echo "inclination => " . PorterStemmer::stem("inclination") . PHP_EOL;

// -------------  SpamSum, aka ssdeep, aka Context-Triggered Piecewize Hashing (CTPH)
$s = new \webd\language\SpamSum;
// 192:x+cMdRiWqk2YODjCoG4OU88/ffcQ+lsCYDIlp6+TF244htoJFUjw:krovCLA9byp6+52jhtnjw
echo $s->HashString(file_get_contents(__DIR__ . "/SpamSum.php")) . PHP_EOL;