diff --git a/textminer/extractor.py b/textminer/extractor.py index e69de29..c85d38e 100644 --- a/textminer/extractor.py +++ b/textminer/extractor.py @@ -0,0 +1,23 @@ +import re + +text = """Dear Mr. Davis, + +I got to know of your company through our mutual friend Fiona Williams and the +training you offer to graduate students in Advertising. + +I am a graduate student of Mass Communications with specialization in +Advertising. I am currently pursuing the last year of my course. +I would very much like to see firsthand the work environment in an advertising +agency. + +If you would like a reference, my advisor can be reached at (454) 999-1212. + +You can contact me at (919) 123-4569 at your convenience.""" + + + +def phone_numbers(text): + phone_book = re.findall(r"\(?(\d{3})\)?[\.\-]?\s*(\d{3})[\.\-]?(\d{4})", text) + return phone_book + +print(phone_numbers(text)) diff --git a/textminer/tests/test_validator.py b/textminer/tests/test_validator.py index b7e1364..2b3c07c 100644 --- a/textminer/tests/test_validator.py +++ b/textminer/tests/test_validator.py @@ -4,7 +4,7 @@ import textminer.validator as v -@xfail +# @xfail def test_binary_numbers(): assert v.binary("0") assert v.binary("1") @@ -15,7 +15,7 @@ def test_binary_numbers(): assert not v.binary("911") -@xfail +# @xfail def test_binary_even(): """String must be a binary number and be even.""" @@ -24,7 +24,7 @@ def test_binary_even(): assert not v.binary_even("1011") -@xfail +# @xfail def test_hexadecimal(): assert v.hex("CAFE") assert v.hex("9F9") @@ -34,7 +34,7 @@ def test_hexadecimal(): assert not v.hex("COFFEE") -@xfail +# @xfail def test_word(): assert v.word("hello") assert v.word("wonderful") @@ -47,7 +47,7 @@ def test_word(): assert not v.word("bar*us") -@xfail +# @xfail def test_words(): """words can take an optional count argument. In case it exists, the text must match that number of words.""" @@ -69,7 +69,7 @@ def test_words(): assert not v.words("18-wheeler tarbox", count=3) -@xfail +# @xfail def test_phone_numbers(): """US phone numbers only.""" diff --git a/textminer/validator.py b/textminer/validator.py index e69de29..46e8125 100644 --- a/textminer/validator.py +++ b/textminer/validator.py @@ -0,0 +1,22 @@ +import re +import collections + +def binary(string): + return re.findall(r"(\A[01]+)", string) + +def binary_even(string): + if binary(string): + return re.findall(r"[0]\Z", string) + +def hex(string): + return re.findall(r"(\b[A-Fa-f0-9]{1,}\b)", string) + +def word(string): + return re.findall(r"[\w-]+\w[^\*!]$", string) +# +# def words(string, count=None): +# if word(string) != []: +# return (word(string), count) + +def phone_number(string): + return re.findall(r"\b\(?\d{,3}\)?[-.\s]?\d{,3}[-.]?\d{,4}\b", string)