Skip to content

Commit

Permalink
implement rtf loader
Browse files Browse the repository at this point in the history
  • Loading branch information
HelgeSverre committed Oct 8, 2023
1 parent f6a4153 commit 228e741
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 1 deletion.
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"ext-zip": "*",
"aws/aws-sdk-php": "^3.281",
"illuminate/contracts": "^10.0",
"jstewmc/rtf": "^0.5.2",
"league/flysystem-aws-s3-v3": "^3.16",
"openai-php/laravel": "^0.7.0",
"prinsfrank/standards": "^2.1",
Expand Down
18 changes: 18 additions & 0 deletions src/TextLoader/Rtf.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

namespace HelgeSverre\ReceiptScanner\TextLoader;

use HelgeSverre\ReceiptScanner\Contracts\TextLoader;
use HelgeSverre\ReceiptScanner\TextContent;
use Jstewmc\Rtf\Document;

class Rtf implements TextLoader
{
public function load(mixed $data): ?TextContent
{
$document = new Document($data);
$text = $document->getRoot()->toText();

return new TextContent($text);
}
}
7 changes: 7 additions & 0 deletions src/TextLoaderFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use HelgeSverre\ReceiptScanner\Contracts\TextLoader;
use HelgeSverre\ReceiptScanner\TextLoader\Html;
use HelgeSverre\ReceiptScanner\TextLoader\Pdf;
use HelgeSverre\ReceiptScanner\TextLoader\Rtf;
use HelgeSverre\ReceiptScanner\TextLoader\Text;
use HelgeSverre\ReceiptScanner\TextLoader\Textract;
use HelgeSverre\ReceiptScanner\TextLoader\TextractUsingS3Upload;
Expand All @@ -28,6 +29,7 @@ public function create(string $type): TextLoader
'html' => $this->container->make(Html::class),
'pdf' => $this->container->make(Pdf::class),
'text' => $this->container->make(Text::class),
'rtf' => $this->container->make(Rtf::class),
'textract_s3' => $this->container->make(TextractUsingS3Upload::class),
'textract' => $this->container->make(Textract::class),
'web' => $this->container->make(Web::class),
Expand All @@ -52,6 +54,11 @@ public function text(mixed $data): ?TextContent
return $this->create('text')->load($data);
}

public function rtf(mixed $data): ?TextContent
{
return $this->create('rtf')->load($data);
}

public function textractUsingS3Upload(mixed $data): ?TextContent
{
return $this->create('textract_s3')->load($data);
Expand Down
20 changes: 19 additions & 1 deletion tests/TextLoaderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,23 @@
);
});

it('Can load Word Documents', function () {
it('Can load a Word documents i found on the internet', function () {
$text = Text::word(file_get_contents(__DIR__.'/samples/word-document.doc'));

expect($text)->toBeInstanceOf(TextContent::class)->and($text->toString())->toContain(
'Mauris',
);
});

it('Can load a Word document exported from google docs', function () {
$text = Text::word(file_get_contents(__DIR__.'/samples/contract.docx'));

expect($text)->toBeInstanceOf(TextContent::class)->and($text->toString())->toContain(
'Contract Agreement',
'Termination of the Agreement',
);
});

it('Can load text from website', function () {
$text = Text::web('https://sparksuite.github.io/simple-html-invoice-template/');

Expand All @@ -67,3 +76,12 @@
'NOK 1,246.25',
);
});

it('Can load rtf files', function () {
$text = Text::rtf(file_get_contents(__DIR__.'/samples/contract.rtf'));

expect($text)->toBeInstanceOf(TextContent::class)->and($text->toString())->toContain(
'Contract Agreement',
'Termination of the Agreement',
);
});

0 comments on commit 228e741

Please sign in to comment.