From 2fd97d56667c52cbf3d566d968513e2f5cea82bf Mon Sep 17 00:00:00 2001 From: Sofiane SADOUD Date: Wed, 27 Sep 2017 10:22:49 +0200 Subject: [PATCH] feat(email-reply-parser): parse email with simplified chinese headers --- src/EmailReplyParser/Parser/EmailParser.php | 1 + .../Tests/Parser/EmailParserTest.php | 20 ++++++------------- tests/Fixtures/email_22.txt | 14 +++++++++++++ 3 files changed, 21 insertions(+), 14 deletions(-) create mode 100644 tests/Fixtures/email_22.txt diff --git a/src/EmailReplyParser/Parser/EmailParser.php b/src/EmailReplyParser/Parser/EmailParser.php index 89fcf65..5d8e780 100644 --- a/src/EmailReplyParser/Parser/EmailParser.php +++ b/src/EmailReplyParser/Parser/EmailParser.php @@ -42,6 +42,7 @@ class EmailParser '/^(20[0-9]{2}\/.+のメッセージ:)$/m', // DATE TIME、NAME のメッセージ: '/^(.+\s<.+>\sschrieb:)$/m', // NAME schrieb: '/^\s*(From\s?:.+\s?(\[|<).+(\]|>))/mu', // "From: NAME " OR "From : NAME " OR "From : NAME"(With support whitespace before start and before <) + '/^\s*(发件人\s?:.+\s?(\[|<).+(\]|>))/mu', // "发件人: NAME " OR "发件人 : NAME " OR "发件人 : NAME"(With support whitespace before start and before <) '/^\s*(De\s?:.+\s?(\[|<).+(\]|>))/mu', // "De: NAME " OR "De : NAME " OR "De : NAME" (With support whitespace before start and before <) '/^\s*(Van\s?:.+\s?(\[|<).+(\]|>))/mu', // "Van: NAME " OR "Van : NAME " OR "Van : NAME" (With support whitespace before start and before <) '/^\s*(Da\s?:.+\s?(\[|<).+(\]|>))/mu', // "Da: NAME " OR "Da : NAME " OR "Da : NAME" (With support whitespace before start and before <) diff --git a/tests/EmailReplyParser/Tests/Parser/EmailParserTest.php b/tests/EmailReplyParser/Tests/Parser/EmailParserTest.php index 127bca9..ddf182f 100644 --- a/tests/EmailReplyParser/Tests/Parser/EmailParserTest.php +++ b/tests/EmailReplyParser/Tests/Parser/EmailParserTest.php @@ -148,7 +148,6 @@ public function testEmailItalian() $email = $this->parser->parse($this->getFixtures('email_7.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailDutch() @@ -156,7 +155,6 @@ public function testEmailDutch() $email = $this->parser->parse($this->getFixtures('email_8.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailSignatureWithEqual() @@ -164,7 +162,6 @@ public function testEmailSignatureWithEqual() $email = $this->parser->parse($this->getFixtures('email_9.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailHotmail() @@ -172,7 +169,6 @@ public function testEmailHotmail() $email = $this->parser->parse($this->getFixtures('email_10.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailWhitespaceBeforeHeader() @@ -180,7 +176,6 @@ public function testEmailWhitespaceBeforeHeader() $email = $this->parser->parse($this->getFixtures('email_11.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailWithSquareBrackets() @@ -188,7 +183,6 @@ public function testEmailWithSquareBrackets() $email = $this->parser->parse($this->getFixtures('email_12.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailDaIntoItalian() @@ -196,7 +190,6 @@ public function testEmailDaIntoItalian() $email = $this->parser->parse($this->getFixtures('email_13.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailHeaderPolish() @@ -204,7 +197,13 @@ public function testEmailHeaderPolish() $email = $this->parser->parse($this->getFixtures('email_14.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); + } + public function testEmailHeaderSimplifiedChinese() + { + $email = $this->parser->parse($this->getFixtures('email_22.txt')); + $fragments = $email->getFragments(); + $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); } public function testEmailSentFromMy() @@ -212,7 +211,6 @@ public function testEmailSentFromMy() $email = $this->parser->parse($this->getFixtures('email_15.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailHeaderPolishWithDniaAndNapisala() @@ -220,7 +218,6 @@ public function testEmailHeaderPolishWithDniaAndNapisala() $email = $this->parser->parse($this->getFixtures('email_16.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailHeaderPolishWithDateInIso8601() @@ -228,7 +225,6 @@ public function testEmailHeaderPolishWithDateInIso8601() $email = $this->parser->parse($this->getFixtures('email_17.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } public function testEmailOutlookEn() @@ -236,10 +232,8 @@ public function testEmailOutlookEn() $email = $this->parser->parse($this->getFixtures('email_18.txt')); $fragments = $email->getFragments(); $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); - } - public function testGetVisibleTextReturnsOnlyVisibleFragments() { $email = $this->parser->parse($this->getFixtures('email_2_1.txt')); @@ -250,7 +244,6 @@ public function testGetVisibleTextReturnsOnlyVisibleFragments() $this->assertEquals(rtrim(implode("\n", $visibleFragments)), $email->getVisibleText()); } - public function testEmailGmailNo() { $email = $this->parser->parse($this->getFixtures('email_norwegian_gmail.txt')); @@ -258,7 +251,6 @@ public function testEmailGmailNo() $this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0])); } - public function testReadsEmailWithCorrectSignature() { $email = $this->parser->parse($this->getFixtures('correct_sig.txt')); diff --git a/tests/Fixtures/email_22.txt b/tests/Fixtures/email_22.txt new file mode 100644 index 0000000..276a89c --- /dev/null +++ b/tests/Fixtures/email_22.txt @@ -0,0 +1,14 @@ +Fusce bibendum, quam hendrerit sagittis tempor, dui turpis tempus erat, pharetra sodales ante sem sit amet metus. +Nulla malesuada, orci non vulputate lobortis, massa felis pharetra ex, convallis consectetur ex libero eget ante. +Nam vel turpis posuere, rhoncus ligula in, venenatis orci. Duis interdum venenatis ex a rutrum. +Duis ut libero eu lectus consequat consequat ut vel lorem. Vestibulum convallis lectus urna, +et mollis ligula rutrum quis. Fusce sed odio id arcu varius aliquet nec nec nibh. + +发件人: Company Questions +发送时间: 2017年8月2日 15:37 +收件人: Sylvie +主题: Company France - China + +Etiam non sagittis orci, non rutrum urna. Suspendisse ut sapien id dolor posuere placerat et vitae felis. +Fusce mollis condimentum nulla. Donec luctus justo eu purus placerat, non suscipit ex facilisis. +Sed risus lorem, porta eget imperdiet in, euismod eu nisl. Integer vel metus felis.