diff --git a/composer.json b/composer.json index 173fd01..4a027db 100644 --- a/composer.json +++ b/composer.json @@ -20,6 +20,7 @@ "ext-zip": "*", "aws/aws-sdk-php": "^3.281", "illuminate/contracts": "^10.0", + "jstewmc/rtf": "^0.5.2", "league/flysystem-aws-s3-v3": "^3.16", "openai-php/laravel": "^0.7.0", "prinsfrank/standards": "^2.1", diff --git a/src/TextLoader/Rtf.php b/src/TextLoader/Rtf.php new file mode 100644 index 0000000..6237cd4 --- /dev/null +++ b/src/TextLoader/Rtf.php @@ -0,0 +1,18 @@ +getRoot()->toText(); + + return new TextContent($text); + } +} diff --git a/src/TextLoaderFactory.php b/src/TextLoaderFactory.php index fbf955f..0d159a9 100644 --- a/src/TextLoaderFactory.php +++ b/src/TextLoaderFactory.php @@ -5,6 +5,7 @@ use HelgeSverre\ReceiptScanner\Contracts\TextLoader; use HelgeSverre\ReceiptScanner\TextLoader\Html; use HelgeSverre\ReceiptScanner\TextLoader\Pdf; +use HelgeSverre\ReceiptScanner\TextLoader\Rtf; use HelgeSverre\ReceiptScanner\TextLoader\Text; use HelgeSverre\ReceiptScanner\TextLoader\Textract; use HelgeSverre\ReceiptScanner\TextLoader\TextractUsingS3Upload; @@ -28,6 +29,7 @@ public function create(string $type): TextLoader 'html' => $this->container->make(Html::class), 'pdf' => $this->container->make(Pdf::class), 'text' => $this->container->make(Text::class), + 'rtf' => $this->container->make(Rtf::class), 'textract_s3' => $this->container->make(TextractUsingS3Upload::class), 'textract' => $this->container->make(Textract::class), 'web' => $this->container->make(Web::class), @@ -52,6 +54,11 @@ public function text(mixed $data): ?TextContent return $this->create('text')->load($data); } + public function rtf(mixed $data): ?TextContent + { + return $this->create('rtf')->load($data); + } + public function textractUsingS3Upload(mixed $data): ?TextContent { return $this->create('textract_s3')->load($data); diff --git a/tests/TextLoaderTest.php b/tests/TextLoaderTest.php index 6d72eee..ed3cb07 100644 --- a/tests/TextLoaderTest.php +++ b/tests/TextLoaderTest.php @@ -42,7 +42,7 @@ ); }); -it('Can load Word Documents', function () { +it('Can load a Word documents i found on the internet', function () { $text = Text::word(file_get_contents(__DIR__.'/samples/word-document.doc')); expect($text)->toBeInstanceOf(TextContent::class)->and($text->toString())->toContain( @@ -50,6 +50,15 @@ ); }); +it('Can load a Word document exported from google docs', function () { + $text = Text::word(file_get_contents(__DIR__.'/samples/contract.docx')); + + expect($text)->toBeInstanceOf(TextContent::class)->and($text->toString())->toContain( + 'Contract Agreement', + 'Termination of the Agreement', + ); +}); + it('Can load text from website', function () { $text = Text::web('https://sparksuite.github.io/simple-html-invoice-template/'); @@ -67,3 +76,12 @@ 'NOK 1,246.25', ); }); + +it('Can load rtf files', function () { + $text = Text::rtf(file_get_contents(__DIR__.'/samples/contract.rtf')); + + expect($text)->toBeInstanceOf(TextContent::class)->and($text->toString())->toContain( + 'Contract Agreement', + 'Termination of the Agreement', + ); +});