diff --git a/composer.json b/composer.json index 486292c..7d89ffa 100644 --- a/composer.json +++ b/composer.json @@ -26,7 +26,8 @@ "symfony/http-client": "^7.1", "symfony/serializer": "^7.1", "symfony/property-access": "^7.1", - "webmozart/assert": "^1.11" + "webmozart/assert": "^1.11", + "league/commonmark": "^2.4" }, "require-dev": { "phpunit/phpunit": "^11.1", diff --git a/composer.lock b/composer.lock index 743112e..5fcee88 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,419 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "b591aec30c2312c773d1b52569f35fc7", + "content-hash": "1a1fc0bc0e899a1c9197cbd2c6d17d37", "packages": [ + { + "name": "dflydev/dot-access-data", + "version": "v3.0.3", + "source": { + "type": "git", + "url": "https://github.com/dflydev/dflydev-dot-access-data.git", + "reference": "a23a2bf4f31d3518f3ecb38660c95715dfead60f" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/dflydev/dflydev-dot-access-data/zipball/a23a2bf4f31d3518f3ecb38660c95715dfead60f", + "reference": "a23a2bf4f31d3518f3ecb38660c95715dfead60f", + "shasum": "" + }, + "require": { + "php": "^7.1 || ^8.0" + }, + "require-dev": { + "phpstan/phpstan": "^0.12.42", + "phpunit/phpunit": "^7.5 || ^8.5 || ^9.3", + "scrutinizer/ocular": "1.6.0", + "squizlabs/php_codesniffer": "^3.5", + "vimeo/psalm": "^4.0.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "3.x-dev" + } + }, + "autoload": { + "psr-4": { + "Dflydev\\DotAccessData\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Dragonfly Development Inc.", + "email": "info@dflydev.com", + "homepage": "http://dflydev.com" + }, + { + "name": "Beau Simensen", + "email": "beau@dflydev.com", + "homepage": "http://beausimensen.com" + }, + { + "name": "Carlos Frutos", + "email": "carlos@kiwing.it", + "homepage": "https://github.com/cfrutos" + }, + { + "name": "Colin O'Dell", + "email": "colinodell@gmail.com", + "homepage": "https://www.colinodell.com" + } + ], + "description": "Given a deep data structure, access data by dot notation.", + "homepage": "https://github.com/dflydev/dflydev-dot-access-data", + "keywords": [ + "access", + "data", + "dot", + "notation" + ], + "support": { + "issues": "https://github.com/dflydev/dflydev-dot-access-data/issues", + "source": "https://github.com/dflydev/dflydev-dot-access-data/tree/v3.0.3" + }, + "time": "2024-07-08T12:26:09+00:00" + }, + { + "name": "league/commonmark", + "version": "2.4.2", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/commonmark.git", + "reference": "91c24291965bd6d7c46c46a12ba7492f83b1cadf" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/commonmark/zipball/91c24291965bd6d7c46c46a12ba7492f83b1cadf", + "reference": "91c24291965bd6d7c46c46a12ba7492f83b1cadf", + "shasum": "" + }, + "require": { + "ext-mbstring": "*", + "league/config": "^1.1.1", + "php": "^7.4 || ^8.0", + "psr/event-dispatcher": "^1.0", + "symfony/deprecation-contracts": "^2.1 || ^3.0", + "symfony/polyfill-php80": "^1.16" + }, + "require-dev": { + "cebe/markdown": "^1.0", + "commonmark/cmark": "0.30.3", + "commonmark/commonmark.js": "0.30.0", + "composer/package-versions-deprecated": "^1.8", + "embed/embed": "^4.4", + "erusev/parsedown": "^1.0", + "ext-json": "*", + "github/gfm": "0.29.0", + "michelf/php-markdown": "^1.4 || ^2.0", + "nyholm/psr7": "^1.5", + "phpstan/phpstan": "^1.8.2", + "phpunit/phpunit": "^9.5.21 || ^10.5.9 || ^11.0.0", + "scrutinizer/ocular": "^1.8.1", + "symfony/finder": "^5.3 | ^6.0 || ^7.0", + "symfony/yaml": "^2.3 | ^3.0 | ^4.0 | ^5.0 | ^6.0 || ^7.0", + "unleashedtech/php-coding-standard": "^3.1.1", + "vimeo/psalm": "^4.24.0 || ^5.0.0" + }, + "suggest": { + "symfony/yaml": "v2.3+ required if using the Front Matter extension" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "2.5-dev" + } + }, + "autoload": { + "psr-4": { + "League\\CommonMark\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Colin O'Dell", + "email": "colinodell@gmail.com", + "homepage": "https://www.colinodell.com", + "role": "Lead Developer" + } + ], + "description": "Highly-extensible PHP Markdown parser which fully supports the CommonMark spec and GitHub-Flavored Markdown (GFM)", + "homepage": "https://commonmark.thephpleague.com", + "keywords": [ + "commonmark", + "flavored", + "gfm", + "github", + "github-flavored", + "markdown", + "md", + "parser" + ], + "support": { + "docs": "https://commonmark.thephpleague.com/", + "forum": "https://github.com/thephpleague/commonmark/discussions", + "issues": "https://github.com/thephpleague/commonmark/issues", + "rss": "https://github.com/thephpleague/commonmark/releases.atom", + "source": "https://github.com/thephpleague/commonmark" + }, + "funding": [ + { + "url": "https://www.colinodell.com/sponsor", + "type": "custom" + }, + { + "url": "https://www.paypal.me/colinpodell/10.00", + "type": "custom" + }, + { + "url": "https://github.com/colinodell", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/league/commonmark", + "type": "tidelift" + } + ], + "time": "2024-02-02T11:59:32+00:00" + }, + { + "name": "league/config", + "version": "v1.2.0", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/config.git", + "reference": "754b3604fb2984c71f4af4a9cbe7b57f346ec1f3" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/config/zipball/754b3604fb2984c71f4af4a9cbe7b57f346ec1f3", + "reference": "754b3604fb2984c71f4af4a9cbe7b57f346ec1f3", + "shasum": "" + }, + "require": { + "dflydev/dot-access-data": "^3.0.1", + "nette/schema": "^1.2", + "php": "^7.4 || ^8.0" + }, + "require-dev": { + "phpstan/phpstan": "^1.8.2", + "phpunit/phpunit": "^9.5.5", + "scrutinizer/ocular": "^1.8.1", + "unleashedtech/php-coding-standard": "^3.1", + "vimeo/psalm": "^4.7.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "1.2-dev" + } + }, + "autoload": { + "psr-4": { + "League\\Config\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Colin O'Dell", + "email": "colinodell@gmail.com", + "homepage": "https://www.colinodell.com", + "role": "Lead Developer" + } + ], + "description": "Define configuration arrays with strict schemas and access values with dot notation", + "homepage": "https://config.thephpleague.com", + "keywords": [ + "array", + "config", + "configuration", + "dot", + "dot-access", + "nested", + "schema" + ], + "support": { + "docs": "https://config.thephpleague.com/", + "issues": "https://github.com/thephpleague/config/issues", + "rss": "https://github.com/thephpleague/config/releases.atom", + "source": "https://github.com/thephpleague/config" + }, + "funding": [ + { + "url": "https://www.colinodell.com/sponsor", + "type": "custom" + }, + { + "url": "https://www.paypal.me/colinpodell/10.00", + "type": "custom" + }, + { + "url": "https://github.com/colinodell", + "type": "github" + } + ], + "time": "2022-12-11T20:36:23+00:00" + }, + { + "name": "nette/schema", + "version": "v1.3.0", + "source": { + "type": "git", + "url": "https://github.com/nette/schema.git", + "reference": "a6d3a6d1f545f01ef38e60f375d1cf1f4de98188" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/nette/schema/zipball/a6d3a6d1f545f01ef38e60f375d1cf1f4de98188", + "reference": "a6d3a6d1f545f01ef38e60f375d1cf1f4de98188", + "shasum": "" + }, + "require": { + "nette/utils": "^4.0", + "php": "8.1 - 8.3" + }, + "require-dev": { + "nette/tester": "^2.4", + "phpstan/phpstan-nette": "^1.0", + "tracy/tracy": "^2.8" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.3-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause", + "GPL-2.0-only", + "GPL-3.0-only" + ], + "authors": [ + { + "name": "David Grudl", + "homepage": "https://davidgrudl.com" + }, + { + "name": "Nette Community", + "homepage": "https://nette.org/contributors" + } + ], + "description": "📐 Nette Schema: validating data structures against a given Schema.", + "homepage": "https://nette.org", + "keywords": [ + "config", + "nette" + ], + "support": { + "issues": "https://github.com/nette/schema/issues", + "source": "https://github.com/nette/schema/tree/v1.3.0" + }, + "time": "2023-12-11T11:54:22+00:00" + }, + { + "name": "nette/utils", + "version": "v4.0.4", + "source": { + "type": "git", + "url": "https://github.com/nette/utils.git", + "reference": "d3ad0aa3b9f934602cb3e3902ebccf10be34d218" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/nette/utils/zipball/d3ad0aa3b9f934602cb3e3902ebccf10be34d218", + "reference": "d3ad0aa3b9f934602cb3e3902ebccf10be34d218", + "shasum": "" + }, + "require": { + "php": ">=8.0 <8.4" + }, + "conflict": { + "nette/finder": "<3", + "nette/schema": "<1.2.2" + }, + "require-dev": { + "jetbrains/phpstorm-attributes": "dev-master", + "nette/tester": "^2.5", + "phpstan/phpstan": "^1.0", + "tracy/tracy": "^2.9" + }, + "suggest": { + "ext-gd": "to use Image", + "ext-iconv": "to use Strings::webalize(), toAscii(), chr() and reverse()", + "ext-intl": "to use Strings::webalize(), toAscii(), normalize() and compare()", + "ext-json": "to use Nette\\Utils\\Json", + "ext-mbstring": "to use Strings::lower() etc...", + "ext-tokenizer": "to use Nette\\Utils\\Reflection::getUseStatements()" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause", + "GPL-2.0-only", + "GPL-3.0-only" + ], + "authors": [ + { + "name": "David Grudl", + "homepage": "https://davidgrudl.com" + }, + { + "name": "Nette Community", + "homepage": "https://nette.org/contributors" + } + ], + "description": "🛠 Nette Utils: lightweight utilities for string & array manipulation, image handling, safe JSON encoding/decoding, validation, slug or strong password generating etc.", + "homepage": "https://nette.org", + "keywords": [ + "array", + "core", + "datetime", + "images", + "json", + "nette", + "paginator", + "password", + "slugify", + "string", + "unicode", + "utf-8", + "utility", + "validation" + ], + "support": { + "issues": "https://github.com/nette/utils/issues", + "source": "https://github.com/nette/utils/tree/v4.0.4" + }, + "time": "2024-01-17T16:50:36+00:00" + }, { "name": "psr/container", "version": "2.0.2", @@ -59,6 +470,56 @@ }, "time": "2021-11-05T16:47:00+00:00" }, + { + "name": "psr/event-dispatcher", + "version": "1.0.0", + "source": { + "type": "git", + "url": "https://github.com/php-fig/event-dispatcher.git", + "reference": "dbefd12671e8a14ec7f180cab83036ed26714bb0" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/event-dispatcher/zipball/dbefd12671e8a14ec7f180cab83036ed26714bb0", + "reference": "dbefd12671e8a14ec7f180cab83036ed26714bb0", + "shasum": "" + }, + "require": { + "php": ">=7.2.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\EventDispatcher\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" + } + ], + "description": "Standard interfaces for event handling.", + "keywords": [ + "events", + "psr", + "psr-14" + ], + "support": { + "issues": "https://github.com/php-fig/event-dispatcher/issues", + "source": "https://github.com/php-fig/event-dispatcher/tree/1.0.0" + }, + "time": "2019-01-08T18:20:26+00:00" + }, { "name": "psr/log", "version": "3.0.0", @@ -666,6 +1127,86 @@ ], "time": "2024-01-29T20:11:03+00:00" }, + { + "name": "symfony/polyfill-php80", + "version": "v1.30.0", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-php80.git", + "reference": "77fa7995ac1b21ab60769b7323d600a991a90433" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-php80/zipball/77fa7995ac1b21ab60769b7323d600a991a90433", + "reference": "77fa7995ac1b21ab60769b7323d600a991a90433", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "type": "library", + "extra": { + "thanks": { + "name": "symfony/polyfill", + "url": "https://github.com/symfony/polyfill" + } + }, + "autoload": { + "files": [ + "bootstrap.php" + ], + "psr-4": { + "Symfony\\Polyfill\\Php80\\": "" + }, + "classmap": [ + "Resources/stubs" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ion Bazan", + "email": "ion.bazan@gmail.com" + }, + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill backporting some PHP 8.0+ features to lower PHP versions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "polyfill", + "portable", + "shim" + ], + "support": { + "source": "https://github.com/symfony/polyfill-php80/tree/v1.30.0" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2024-05-31T15:07:36+00:00" + }, { "name": "symfony/property-access", "version": "v7.1.1", @@ -828,16 +1369,16 @@ }, { "name": "symfony/serializer", - "version": "v7.1.1", + "version": "v7.1.2", "source": { "type": "git", "url": "https://github.com/symfony/serializer.git", - "reference": "74817ee48e37cce1a1b33c66ffdb750e7e048c3c" + "reference": "d2077674aaaff02a95f290de512aa358947e6bbe" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/serializer/zipball/74817ee48e37cce1a1b33c66ffdb750e7e048c3c", - "reference": "74817ee48e37cce1a1b33c66ffdb750e7e048c3c", + "url": "https://api.github.com/repos/symfony/serializer/zipball/d2077674aaaff02a95f290de512aa358947e6bbe", + "reference": "d2077674aaaff02a95f290de512aa358947e6bbe", "shasum": "" }, "require": { @@ -905,7 +1446,7 @@ "description": "Handles serializing and deserializing data structures, including object graphs, into array structures or other formats like XML and JSON.", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/serializer/tree/v7.1.1" + "source": "https://github.com/symfony/serializer/tree/v7.1.2" }, "funding": [ { @@ -921,7 +1462,7 @@ "type": "tidelift" } ], - "time": "2024-05-31T14:57:53+00:00" + "time": "2024-06-28T07:42:43+00:00" }, { "name": "symfony/service-contracts", diff --git a/ecs.php b/ecs.php index 9d9214c..1ec4e84 100644 --- a/ecs.php +++ b/ecs.php @@ -7,6 +7,7 @@ return ECSConfig::configure() ->withPaths([ __DIR__ . '/src', + __DIR__ . '/tests' ]) ->withSkip([ diff --git a/src/Model/Parser/MarkdownOutputParser.php b/src/Model/Parser/MarkdownOutputParser.php new file mode 100644 index 0000000..5e5c55f --- /dev/null +++ b/src/Model/Parser/MarkdownOutputParser.php @@ -0,0 +1,69 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace Devscast\Lugha\Model\Parser; + +use League\CommonMark\Environment\Environment; +use League\CommonMark\Exception\CommonMarkException; +use League\CommonMark\Extension\Autolink\AutolinkExtension; +use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension; +use League\CommonMark\Extension\DisallowedRawHtml\DisallowedRawHtmlExtension; +use League\CommonMark\Extension\Table\TableExtension; +use League\CommonMark\MarkdownConverter; + +/** + * Class MarkdownOutputParser. + * + * @see https://commonmark.thephpleague.com/ + * @author bernard-ng + */ +final readonly class MarkdownOutputParser implements OutputParserInterface +{ + private MarkdownConverter $converter; + + public function __construct(array $config = []) + { + $environment = new Environment([ + 'html_input' => 'strip', + 'allow_unsafe_links' => false, + 'disallowed_raw_html' => [ + 'disallowed_tags' => [ + 'title', + 'textarea', + 'style', + 'xmp', + 'iframe', + 'noembed', + 'noframes', + 'script', + 'plaintext', + ], + ], + ...$config, + ]); + $environment + ->addExtension(new CommonMarkCoreExtension()) + ->addExtension(new AutolinkExtension()) + ->addExtension(new DisallowedRawHtmlExtension()) + ->addExtension(new TableExtension()); + $this->converter = new MarkdownConverter($environment); + } + + /** + * @throws CommonMarkException + */ + public function __invoke(string $output): string + { + return trim($this->converter->convert($output)->getContent()); + } +} diff --git a/src/Model/Parser/OutputParserInterface.php b/src/Model/Parser/OutputParserInterface.php index 81a50ac..01ff131 100644 --- a/src/Model/Parser/OutputParserInterface.php +++ b/src/Model/Parser/OutputParserInterface.php @@ -15,6 +15,11 @@ /** * Interface OutputParserInterface. + * Parses the output of a LLM model and returns the desired output. + * The output can be a string, an array, or an object. + * + * eg: you may want to parse the output of a model that returns a JSON string + * or Markdown text. * * @author bernard-ng */ diff --git a/src/Model/Prompt/PromptTemplate.php b/src/Model/Prompt/PromptTemplate.php index f6fbebc..3405ea3 100644 --- a/src/Model/Prompt/PromptTemplate.php +++ b/src/Model/Prompt/PromptTemplate.php @@ -17,6 +17,7 @@ /** * Class PromptTemplate. + * Lets you create a prompt template that can be formatted with values. * * @author bernard-ng */ diff --git a/src/Retrieval/Document.php b/src/Retrieval/Document.php index fa7149b..25173d1 100644 --- a/src/Retrieval/Document.php +++ b/src/Retrieval/Document.php @@ -15,6 +15,7 @@ /** * Class Document. + * Represents a document that can be indexed and searched. * * @author bernard-ng */ @@ -23,7 +24,7 @@ class Document implements \Stringable public function __construct( public string $content, public array $embeddings = [], - public ?DocumentMetadata $metadata = null, + public ?Metadata $metadata = null, ) { } diff --git a/src/Retrieval/Embedder/EmbedderInterface.php b/src/Retrieval/Embedder/EmbedderInterface.php index 79d51d3..3805d76 100644 --- a/src/Retrieval/Embedder/EmbedderInterface.php +++ b/src/Retrieval/Embedder/EmbedderInterface.php @@ -17,6 +17,7 @@ /** * Interface EmbedderInterface. + * Actually creates embeddings for documents using a model. * * @author bernard-ng */ diff --git a/src/Retrieval/Loader/DirectoryLoader.php b/src/Retrieval/Loader/DirectoryLoader.php index da86dae..ddad6fa 100644 --- a/src/Retrieval/Loader/DirectoryLoader.php +++ b/src/Retrieval/Loader/DirectoryLoader.php @@ -18,6 +18,7 @@ /** * Class DirectoryLoader. + * Allows loading documents from a local directory. * * @author bernard-ng */ diff --git a/src/Retrieval/DocumentMetadata.php b/src/Retrieval/Metadata.php similarity index 87% rename from src/Retrieval/DocumentMetadata.php rename to src/Retrieval/Metadata.php index 81a6814..1341aae 100644 --- a/src/Retrieval/DocumentMetadata.php +++ b/src/Retrieval/Metadata.php @@ -14,11 +14,13 @@ namespace Devscast\Lugha\Retrieval; /** - * Class DocumentMetadata. + * Class Metadata. + * You can use this class to store metadata about a document. + * add your own metadata fields as needed by extending this class. * * @author bernard-ng */ -class DocumentMetadata implements \Stringable +class Metadata implements \Stringable { public function __construct( public ?string $hash = null, diff --git a/src/Retrieval/Splitter/TextSplitter.php b/src/Retrieval/Splitter/TextSplitter.php index 58150ec..e91bf5e 100644 --- a/src/Retrieval/Splitter/TextSplitter.php +++ b/src/Retrieval/Splitter/TextSplitter.php @@ -14,14 +14,15 @@ namespace Devscast\Lugha\Retrieval\Splitter; use Devscast\Lugha\Retrieval\Document; -use Devscast\Lugha\Retrieval\DocumentMetadata; +use Devscast\Lugha\Retrieval\Metadata; /** * Class TextSplitter. + * Represents a text splitter that can be used to split text into chunks. * * @author bernard-ng */ -readonly class TextSplitter implements SplitterInterface +final readonly class TextSplitter implements SplitterInterface { public function __construct( public int $chunkSize = 200, @@ -77,7 +78,7 @@ public function createDocuments(string $text): iterable * @var int $index */ foreach ($this->splitText($text) as $index => $split) { - yield new Document($split, metadata: new DocumentMetadata( + yield new Document($split, metadata: new Metadata( hash: md5($split), chunkNumber: $index )); diff --git a/src/Retrieval/VectorStore/Distance.php b/src/Retrieval/VectorStore/Distance.php index 2385729..08b4669 100644 --- a/src/Retrieval/VectorStore/Distance.php +++ b/src/Retrieval/VectorStore/Distance.php @@ -15,13 +15,33 @@ /** * Class Distance. + * Represents the distance metric to use when comparing vectors. * * @author bernard-ng */ enum Distance { + /** + * L1 distance. + * @see https://en.wikipedia.org/wiki/Taxicab_geometry + */ case L1; + + /** + * L2 distance. + * @see https://en.wikipedia.org/wiki/Euclidean_distance + */ case L2; + + /** + * Cosine similarity. + * @see https://en.wikipedia.org/wiki/Cosine_similarity + */ case COSINE; + + /** + * Inner product. + * @see https://en.wikipedia.org/wiki/Dot_product + */ case INNER_PRODUCT; } diff --git a/src/Retrieval/VectorStore/VectorStoreInterface.php b/src/Retrieval/VectorStore/VectorStoreInterface.php index 3ea9cf6..9e79f4a 100644 --- a/src/Retrieval/VectorStore/VectorStoreInterface.php +++ b/src/Retrieval/VectorStore/VectorStoreInterface.php @@ -17,6 +17,7 @@ /** * Interface VectorStoreInterface. + * Represents a vector store that can be used to index and search documents. * * @author bernard-ng */ @@ -25,7 +26,7 @@ interface VectorStoreInterface public function addDocument(Document $document): void; /** - * @param array $documents + * @param iterable $documents */ public function addDocuments(iterable $documents): void; @@ -37,5 +38,5 @@ public function similaritySearch(string $query, int $k = 4, Distance $distance = /** * @return array $documents */ - public function similaritySearchByVector(array $embedding, int $k = 4, Distance $distance = Distance::L2): array; + public function similaritySearchByVector(array $embeddings, int $k = 4, Distance $distance = Distance::L2): array; } diff --git a/tests/Model/Parser/MarkdownOutputParserTest.php b/tests/Model/Parser/MarkdownOutputParserTest.php new file mode 100644 index 0000000..a487fd5 --- /dev/null +++ b/tests/Model/Parser/MarkdownOutputParserTest.php @@ -0,0 +1,80 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace Devscast\Lugha\Tests\Model\Parser; + +use Devscast\Lugha\Model\Parser\MarkdownOutputParser; +use PHPUnit\Framework\TestCase; + +/** + * Class JsonOutputParserTest. + * + * @author bernard-ng + */ +final class MarkdownOutputParserTest extends TestCase +{ + public function testConvertsToHtml(): void + { + $parser = new MarkdownOutputParser(); + $this->assertSame('

Hello World

', $parser('# Hello World')); + } + + public function testConvertsToHtmlWithMultipleLines(): void + { + $parser = new MarkdownOutputParser(); + $this->assertSame( + expected: "

Hello World

\n

This is a test

", + actual: $parser("# Hello World\nThis is a test") + ); + } + + public function testConvertsLinksToHtml(): void + { + $parser = new MarkdownOutputParser(); + $this->assertSame( + expected: '

Example

', + actual: $parser('[Example](https://example.com)') + ); + } + + public function testConvertsAutoLinksToHtml(): void + { + $parser = new MarkdownOutputParser(); + $this->assertSame( + expected: '

https://example.com

', + actual: $parser('https://example.com') + ); + } + + public function testEscapeHtmlTags(): void + { + $parser = new MarkdownOutputParser([ + 'html_input' => 'escape', + ]); + $this->assertSame( + expected: '<title>Strong</title>', + actual: $parser('Strong') + ); + } + + public function testStripHtmlTags(): void + { + $parser = new MarkdownOutputParser([ + 'html_input' => 'strip', + ]); + $this->assertSame( + expected: '

Strong

', + actual: $parser('Strong') + ); + } +} diff --git a/tests/Model/Prompt/PromptTemplateTest.php b/tests/Model/Prompt/PromptTemplateTest.php index cef1cf6..9cb3267 100644 --- a/tests/Model/Prompt/PromptTemplateTest.php +++ b/tests/Model/Prompt/PromptTemplateTest.php @@ -13,8 +13,8 @@ namespace Devscast\Lugha\Tests\Model\Prompt; -use PHPUnit\Framework\TestCase; use Devscast\Lugha\Model\Prompt\PromptTemplate; +use PHPUnit\Framework\TestCase; /** * Class PromptTemplateTest. @@ -33,19 +33,24 @@ public function testItCanCreateAPromptTemplate(): void public function testItCanFormatAPromptTemplate(): void { $template = PromptTemplate::from('Hello {context}'); - $prompt = $template->format(['{context}' => 'some context...']); + $prompt = $template->format([ + '{context}' => 'some context...', + ]); $this->assertInstanceOf(PromptTemplate::class, $prompt); - $this->assertSame("Hello some context...", (string) $prompt); + $this->assertSame('Hello some context...', (string) $prompt); } public function testItCanFormatAPromptTemplateWithMultiplePlaceholders(): void { $template = PromptTemplate::from('Hello {context}, welcome to {place}'); - $prompt = $template->format(['{context}' => 'some context...', '{place}' => 'some place...']); + $prompt = $template->format([ + '{context}' => 'some context...', + '{place}' => 'some place...', + ]); $this->assertInstanceOf(PromptTemplate::class, $prompt); - $this->assertSame("Hello some context..., welcome to some place...", (string) $prompt); + $this->assertSame('Hello some context..., welcome to some place...', (string) $prompt); } public function testItFailsToCreateAPromptTemplateWithEmptyTemplate(): void diff --git a/tests/Retrieval/Splitter/TextSplitterTest.php b/tests/Retrieval/Splitter/TextSplitterTest.php index cacf4e9..40d728f 100644 --- a/tests/Retrieval/Splitter/TextSplitterTest.php +++ b/tests/Retrieval/Splitter/TextSplitterTest.php @@ -14,7 +14,6 @@ namespace Devscast\Lugha\Tests\Retrieval\Splitter; use Devscast\Lugha\Retrieval\Splitter\TextSplitter; - use PHPUnit\Framework\TestCase; /** @@ -43,7 +42,7 @@ public function testChunksWithOverlap(): void 'fghijklmno', 'klmnopqrst', 'pqrstuvwxy', - 'uvwxyz' + 'uvwxyz', ]; $chunks = iterator_to_array($splitter->splitText($text)); @@ -67,7 +66,7 @@ public function testVerySmallChunkSize(): void $expectedChunks = [ 'This ', 'is a ', - 'test.' + 'test.', ]; $chunks = iterator_to_array($splitter->splitText($text));