diff --git a/src/EmailReplyParser/Parser/EmailParser.php b/src/EmailReplyParser/Parser/EmailParser.php index 5d8e780..6652a73 100644 --- a/src/EmailReplyParser/Parser/EmailParser.php +++ b/src/EmailReplyParser/Parser/EmailParser.php @@ -18,12 +18,14 @@ */ class EmailParser { + const QUOTE_REGEX = '/>+$/s'; + /** * Regex to match signatures + * + * @var string */ - const SIG_REGEX = '/(?:^\s*--|^\s*__|^-\w|^-- $)|(?:^Sent from my (?:\s*\w+){1,4}$)|(?:^={30,}$)$/s'; - - const QUOTE_REGEX = '/>+$/s'; + private $signatureRegex = '/(?:^\s*--|^\s*__|^-\w|^-- $)|(?:^Sent from my (?:\s*\w+){1,4}$)|(?:^={30,}$)$/s'; /** * @var string[] @@ -141,6 +143,28 @@ public function setQuoteHeadersRegex(array $quoteHeadersRegex) return $this; } + /** + * @return string + * @since 2.7.0 + */ + public function getSignatureRegex() + { + return $this->signatureRegex; + } + + /** + * @param string $signatureRegex + * + * @return EmailParser + * @since 2.7.0 + */ + public function setSignatureRegex($signatureRegex) + { + $this->signatureRegex = $signatureRegex; + + return $this; + } + /** * @param FragmentDTO[] $fragmentDTOs * @@ -174,11 +198,12 @@ private function isQuoteHeader($line) private function isSignature($line) { - return preg_match(static::SIG_REGEX, strrev($line)) ? true : false; + return preg_match($this->signatureRegex, strrev($line)) ? true : false; } /** * @param string $line + * @return bool */ private function isQuote($line) { @@ -191,8 +216,10 @@ private function isEmpty(FragmentDTO $fragment) } /** + * @param FragmentDTO $fragment * @param string $line * @param boolean $isQuoted + * @return bool */ private function isFragmentLine(FragmentDTO $fragment, $line, $isQuoted) { diff --git a/tests/EmailReplyParser/Tests/Parser/EmailParserTest.php b/tests/EmailReplyParser/Tests/Parser/EmailParserTest.php index ddf182f..698e3af 100644 --- a/tests/EmailReplyParser/Tests/Parser/EmailParserTest.php +++ b/tests/EmailReplyParser/Tests/Parser/EmailParserTest.php @@ -410,6 +410,24 @@ public function testEmailWithFairAmountOfContent() $this->assertRegexp('/^On Thursday/', (string) $fragments[0]); } + /** + * override regexp, not to match too greedy signature. + * + * See: https://github.com/willdurand/EmailReplyParser/pull/42 + */ + public function testCustomSignatureRegex() + { + $signatureRegex = '/(?:^\s*--|^\s*__|^-- $)|(?:^Sent from my (?:\s*\w+){1,3})$/s'; + $this->parser->setSignatureRegex($signatureRegex); + $email = $this->parser->parse($this->getFixtures('email_ls-l.txt')); + $fragments = $email->getFragments(); + + // this should match two blocks, body and a signature + $this->assertCount(2, $fragments); + $this->assertFalse($fragments[0]->isSignature()); + $this->assertTrue($fragments[1]->isSignature()); + } + /** * @dataProvider getDateFormats */ diff --git a/tests/Fixtures/email_ls-l.txt b/tests/Fixtures/email_ls-l.txt new file mode 100644 index 0000000..afd2270 --- /dev/null +++ b/tests/Fixtures/email_ls-l.txt @@ -0,0 +1,35 @@ +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8; format=flowed +Content-Transfer-Encoding: 7bit + +here's some funny one + + +$ LC_ALL=C ls -l /tmp|grep sess|head +-rw------- 1 http http 62 Feb 15 12:45 +sess_07ncrlhq50obbd5kp1vp02lp97 +-rw------- 1 http http 0 Feb 15 10:18 +sess_0g01akj9ccmq6r001p2klb55s0 +-rw------- 1 http http 0 Feb 14 23:26 +sess_0gcjo35c35f330p4qm31c1ovv1 +-rw------- 1 http http 4410 Feb 15 12:32 +sess_0i16be4lk5derhdfeas2uomnf4 +-rw------- 1 http http 172 Feb 15 11:59 +sess_0jv5f5i6eu7qfp41mc6hkfjpq3 +-rw------- 1 http http 0 Feb 15 10:12 +sess_0njep2fkt6v5j45t5r0hcfup77 +-rw------- 1 http http 0 Feb 15 09:57 +sess_1j44bltbjpkej984sfor5461u3 +-rw------- 1 http http 0 Feb 14 23:37 +sess_1r9r8a6kaqscq46psrcf1ssm24 +-rw------- 1 http http 0 Feb 15 10:17 +sess_25cvldfhk0nann15asctkrg3b2 +-rw------- 1 http http 59 Feb 14 23:43 +sess_25tni1suqgasqk8osnmk098sc6 + + + +-- +glen + +