Skip to content

Commit

Permalink
feat(email-reply-parser): parse email with simplified chinese headers
Browse files Browse the repository at this point in the history
  • Loading branch information
kabylixx committed Sep 27, 2017
1 parent 747ed12 commit 2fd97d5
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 14 deletions.
1 change: 1 addition & 0 deletions src/EmailReplyParser/Parser/EmailParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class EmailParser
'/^(20[0-9]{2}\/.+のメッセージ:)$/m', // DATE TIME、NAME のメッセージ:
'/^(.+\s<.+>\sschrieb:)$/m', // NAME <EMAIL> schrieb:
'/^\s*(From\s?:.+\s?(\[|<).+(\]|>))/mu', // "From: NAME <EMAIL>" OR "From : NAME <EMAIL>" OR "From : NAME<EMAIL>"(With support whitespace before start and before <)
'/^\s*(发件人\s?:.+\s?(\[|<).+(\]|>))/mu', // "发件人: NAME <EMAIL>" OR "发件人 : NAME <EMAIL>" OR "发件人 : NAME<EMAIL>"(With support whitespace before start and before <)
'/^\s*(De\s?:.+\s?(\[|<).+(\]|>))/mu', // "De: NAME <EMAIL>" OR "De : NAME <EMAIL>" OR "De : NAME<EMAIL>" (With support whitespace before start and before <)
'/^\s*(Van\s?:.+\s?(\[|<).+(\]|>))/mu', // "Van: NAME <EMAIL>" OR "Van : NAME <EMAIL>" OR "Van : NAME<EMAIL>" (With support whitespace before start and before <)
'/^\s*(Da\s?:.+\s?(\[|<).+(\]|>))/mu', // "Da: NAME <EMAIL>" OR "Da : NAME <EMAIL>" OR "Da : NAME<EMAIL>" (With support whitespace before start and before <)
Expand Down
20 changes: 6 additions & 14 deletions tests/EmailReplyParser/Tests/Parser/EmailParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -148,98 +148,92 @@ public function testEmailItalian()
$email = $this->parser->parse($this->getFixtures('email_7.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailDutch()
{
$email = $this->parser->parse($this->getFixtures('email_8.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailSignatureWithEqual()
{
$email = $this->parser->parse($this->getFixtures('email_9.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailHotmail()
{
$email = $this->parser->parse($this->getFixtures('email_10.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailWhitespaceBeforeHeader()
{
$email = $this->parser->parse($this->getFixtures('email_11.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailWithSquareBrackets()
{
$email = $this->parser->parse($this->getFixtures('email_12.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailDaIntoItalian()
{
$email = $this->parser->parse($this->getFixtures('email_13.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailHeaderPolish()
{
$email = $this->parser->parse($this->getFixtures('email_14.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));
}

public function testEmailHeaderSimplifiedChinese()
{
$email = $this->parser->parse($this->getFixtures('email_22.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));
}

public function testEmailSentFromMy()
{
$email = $this->parser->parse($this->getFixtures('email_15.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailHeaderPolishWithDniaAndNapisala()
{
$email = $this->parser->parse($this->getFixtures('email_16.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailHeaderPolishWithDateInIso8601()
{
$email = $this->parser->parse($this->getFixtures('email_17.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}

public function testEmailOutlookEn()
{
$email = $this->parser->parse($this->getFixtures('email_18.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));

}


public function testGetVisibleTextReturnsOnlyVisibleFragments()
{
$email = $this->parser->parse($this->getFixtures('email_2_1.txt'));
Expand All @@ -250,15 +244,13 @@ public function testGetVisibleTextReturnsOnlyVisibleFragments()
$this->assertEquals(rtrim(implode("\n", $visibleFragments)), $email->getVisibleText());
}


public function testEmailGmailNo()
{
$email = $this->parser->parse($this->getFixtures('email_norwegian_gmail.txt'));
$fragments = $email->getFragments();
$this->assertEquals(static::COMMON_FIRST_FRAGMENT, trim($fragments[0]));
}


public function testReadsEmailWithCorrectSignature()
{
$email = $this->parser->parse($this->getFixtures('correct_sig.txt'));
Expand Down
14 changes: 14 additions & 0 deletions tests/Fixtures/email_22.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Fusce bibendum, quam hendrerit sagittis tempor, dui turpis tempus erat, pharetra sodales ante sem sit amet metus.
Nulla malesuada, orci non vulputate lobortis, massa felis pharetra ex, convallis consectetur ex libero eget ante.
Nam vel turpis posuere, rhoncus ligula in, venenatis orci. Duis interdum venenatis ex a rutrum.
Duis ut libero eu lectus consequat consequat ut vel lorem. Vestibulum convallis lectus urna,
et mollis ligula rutrum quis. Fusce sed odio id arcu varius aliquet nec nec nibh.

发件人: Company Questions <[email protected]>
发送时间: 2017年8月2日 15:37
收件人: Sylvie
主题: Company France - China

Etiam non sagittis orci, non rutrum urna. Suspendisse ut sapien id dolor posuere placerat et vitae felis.
Fusce mollis condimentum nulla. Donec luctus justo eu purus placerat, non suscipit ex facilisis.
Sed risus lorem, porta eget imperdiet in, euismod eu nisl. Integer vel metus felis.

0 comments on commit 2fd97d5

Please sign in to comment.