diff --git a/LICENCE.md b/LICENCE.md
new file mode 100644
index 0000000..8ea8fc5
--- /dev/null
+++ b/LICENCE.md
@@ -0,0 +1,191 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright
+owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to software source code, documentation source, and configuration
+files.
+
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object code,
+generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made
+available under the License, as indicated by a copyright notice that is included
+in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version
+of the Work and any modifications or additions to that Work or Derivative Works
+thereof, that is intentionally submitted to Licensor for inclusion in the Work
+by the copyright owner or by an individual or Legal Entity authorized to submit
+on behalf of the copyright owner. For the purposes of this definition,
+"submitted" means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems, and
+issue tracking systems that are managed by, or on behalf of, the Licensor for
+the purpose of discussing and improving the Work, but excluding communication
+that is conspicuously marked or otherwise designated in writing by the copyright
+owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of Copyright License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the Work and such
+Derivative Works in Source or Object form.
+
+3. Grant of Patent License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable (except as stated in this section) patent license to make, have
+made, use, offer to sell, sell, import, and otherwise transfer the Work, where
+such license applies only to those patent claims licensable by such Contributor
+that are necessarily infringed by their Contribution(s) alone or by combination
+of their Contribution(s) with the Work to which such Contribution(s) was
+submitted. If You institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work or a
+Contribution incorporated within the Work constitutes direct or contributory
+patent infringement, then any patent licenses granted to You under this License
+for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution.
+
+You may reproduce and distribute copies of the Work or Derivative Works thereof
+in any medium, with or without modifications, and in Source or Object form,
+provided that You meet the following conditions:
+
+You must give any other recipients of the Work or Derivative Works a copy of
+this License; and
+You must cause any modified files to carry prominent notices stating that You
+changed the files; and
+You must retain, in the Source form of any Derivative Works that You distribute,
+all copyright, patent, trademark, and attribution notices from the Source form
+of the Work, excluding those notices that do not pertain to any part of the
+Derivative Works; and
+If the Work includes a "NOTICE" text file as part of its distribution, then any
+Derivative Works that You distribute must include a readable copy of the
+attribution notices contained within such NOTICE file, excluding those notices
+that do not pertain to any part of the Derivative Works, in at least one of the
+following places: within a NOTICE text file distributed as part of the
+Derivative Works; within the Source form or documentation, if provided along
+with the Derivative Works; or, within a display generated by the Derivative
+Works, if and wherever such third-party notices normally appear. The contents of
+the NOTICE file are for informational purposes only and do not modify the
+License. You may add Your own attribution notices within Derivative Works that
+You distribute, alongside or as an addendum to the NOTICE text from the Work,
+provided that such additional attribution notices cannot be construed as
+modifying the License.
+You may add Your own copyright statement to Your modifications and may provide
+additional or different license terms and conditions for use, reproduction, or
+distribution of Your modifications, or for any such Derivative Works as a whole,
+provided Your use, reproduction, and distribution of the Work otherwise complies
+with the conditions stated in this License.
+
+5. Submission of Contributions.
+
+Unless You explicitly state otherwise, any Contribution intentionally submitted
+for inclusion in the Work by You to the Licensor shall be under the terms and
+conditions of this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify the terms of
+any separate license agreement you may have executed with Licensor regarding
+such Contributions.
+
+6. Trademarks.
+
+This License does not grant permission to use the trade names, trademarks,
+service marks, or product names of the Licensor, except as required for
+reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty.
+
+Unless required by applicable law or agreed to in writing, Licensor provides the
+Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
+including, without limitation, any warranties or conditions of TITLE,
+NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
+solely responsible for determining the appropriateness of using or
+redistributing the Work and assume any risks associated with Your exercise of
+permissions under this License.
+
+8. Limitation of Liability.
+
+In no event and under no legal theory, whether in tort (including negligence),
+contract, or otherwise, unless required by applicable law (such as deliberate
+and grossly negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special, incidental,
+or consequential damages of any character arising as a result of this License or
+out of the use or inability to use the Work (including but not limited to
+damages for loss of goodwill, work stoppage, computer failure or malfunction, or
+any and all other commercial damages or losses), even if such Contributor has
+been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability.
+
+While redistributing the Work or Derivative Works thereof, You may choose to
+offer, and charge a fee for, acceptance of support, warranty, indemnity, or
+other liability obligations and/or rights consistent with this License. However,
+in accepting such obligations, You may act only on Your own behalf and on Your
+sole responsibility, not on behalf of any other Contributor, and only if You
+agree to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work
+
+To apply the Apache License to your work, attach the following boilerplate
+notice, with the fields enclosed by brackets "[]" replaced with your own
+identifying information. (Don't include the brackets!) The text should be
+enclosed in the appropriate comment syntax for the file format. We also
+recommend that a file or class name and description of purpose be included on
+the same "printed page" as the copyright notice for easier identification within
+third-party archives.
+
+ Copyright 2015 Interactive Intelligence
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 8f71f43..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,202 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "{}"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright {yyyy} {name of copyright owner}
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
diff --git a/README.md b/README.md
index c1c6781..915f40c 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,79 @@
-# elasticsearch-phone
-An Elasticsearch Phone Number Analyzer Plugin
+# Elasticsearch-Phone
+
+Indexing phone numbers & sip addresses in lucene is complicated. Most people use ngram tokenizers. We did that for a while with ngram min=3 & max=35, but the result was often 100s of tokens per sip address. Working in a call center focused company we quickly figured out how wasteful that is on the storage front. For us 6/7ths of our indexes were waisted on useless sip address tokens.
+
+It's a hard problem to regex your way out of. An international phone number often includes a country code, but that can be 1, 2, or 3+ digits. A lot of people have requested elasticsearch integrate google's libphone library into a custom lucene analyzer. It hasn't happened yet, so here's a plugin that attempts to do just that.
+
+Note: This is a young project we're just starting to testing 8/3/2015. We'll improve as time goes on, but use at your own risk.
+
+# Building and installing the plugin
+mvn package
+./bin/plugin --url file:///....elasticsearch-phone/target/releases/elasticsearch-phone-1.0.0.zip --install elasticsearch-phone;
+
+## Example inputs
+
+Provide a telephone or sip address prefixed by "tel:" or "sip:" with no spaces or symbols.
+
+Your indexing template will need to specify the analyzer for the field. EG
+ "dnis": {
+ "type": "string",
+ "analyzer": "phone"
+ },
+
+
+Sample allowed inputs (see PhoneIntegrationTest for more)
+tel:+441344840400
+tel:+498362930830
+sip:abc@autosbcpc
+sip:+13119310462;ext=2244@178.12.10.115:8060
+
+## Example tokenization
+
+INPUT (with country code derived with google liphone)
+
+sip:+13169410766;ext=2233@172.17.10.117:8060
+
+TOKENS
+
++13169410766;ext=2233
+1
+2233
+3169410766
+3
+13
+31
+131
+316
+1316
+3169
+13169
+31694
+131694
+316941
+1316941
+3169410
+13169410
+31694107
+131694107
+316941076
+1316941076
+3169410766
+13169410766
+
+INPUT (without a country code)
+
+tel:8177148350
+
+TOKENS
+
+8177148350
+8
+81
+817
+8177
+81771
+817714
+8177148
+81771483
+817714835
+8177148350
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..44f155e
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,176 @@
+
+
+ 4.0.0
+
+ com.inin.analytics
+ elasticsearch-phone
+ jar
+ 1.0.0
+ elasticsearch-phone
+ Elasticsearch Plugin for Phone and SIP Analysis
+ https://github.com/MyPureCloud/elasticsearch-phone
+
+ UTF-8
+ 1.7
+ 4.11
+ /tmp
+
+
+
+
+ The Apache License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+
+
+
+
+ https://github.com/MyPureCloud/elasticsearch-phone.git
+ https://github.com/MyPureCloud/elasticsearch-phone.git
+ https://github.com/MyPureCloud/elasticsearch-phone.git
+
+
+
+
+
+ oss-sonatype
+ oss-sonatype
+ https://oss.sonatype.org/content/groups/public
+
+
+
+
+
+ com.googlecode.libphonenumber
+ libphonenumber
+ 7.0.7
+
+
+
+ org.apache.lucene
+ lucene-test-framework
+ 4.10.4
+ test
+
+
+
+ org.elasticsearch
+ elasticsearch
+ 1.6.0
+
+
+
+ com.carrotsearch.randomizedtesting
+ randomizedtesting-runner
+ 2.1.11
+ test
+
+
+ org.elasticsearch
+ elasticsearch
+ 1.6.0
+ test
+ test-jar
+
+
+
+ org.hamcrest
+ hamcrest-all
+ 1.3
+ test
+
+
+
+ org.apache.commons
+ commons-lang3
+ 3.4
+
+
+
+ commons-io
+ commons-io
+ 2.4
+
+
+
+
+ junit
+ junit
+ ${junit.version}
+ test
+
+
+
+
+
+ Drew Dahlke
+ justin.dahlke@gmail.com
+ Interactive Intelligence
+ http://www.inin.com
+
+
+ Michael Mulligan
+ Michael.Mulligan@inin.com
+ Interactive Intelligence
+ http://www.inin.com
+
+
+
+
+
+ ossrh
+ https://oss.sonatype.org/content/repositories/snapshots
+
+
+ ossrh
+ https://oss.sonatype.org/service/local/staging/deploy/maven2/
+
+
+
+
+
+
+ src/main/resources
+ true
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+
+ com.carrotsearch.randomizedtesting
+ junit4-maven-plugin
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+
+
+ maven-assembly-plugin
+ 2.3
+
+ false
+ ${project.build.directory}/releases/
+
+ ${basedir}/src/main/assemblies/plugin.xml
+
+
+
+
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml
new file mode 100644
index 0000000..7d02d86
--- /dev/null
+++ b/src/main/assemblies/plugin.xml
@@ -0,0 +1,27 @@
+
+
+ plugin
+
+ zip
+
+ false
+
+
+ /
+ true
+ true
+
+ org.elasticsearch:elasticsearch
+
+
+
+ /
+ true
+ true
+
+ com.googlecode.libphonenumber:libphonenumber
+ org.apache.commons:commons-lang3
+
+
+
+
diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/PhoneAnalyzer.java
new file mode 100644
index 0000000..88a5ffb
--- /dev/null
+++ b/src/main/java/org/elasticsearch/index/analysis/PhoneAnalyzer.java
@@ -0,0 +1,15 @@
+package org.elasticsearch.index.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Tokenizer;
+
+public class PhoneAnalyzer extends Analyzer {
+
+ @Override
+ protected TokenStreamComponents createComponents(String field, Reader reader) {
+ Tokenizer tokenizer = new PhoneTokenizer(reader);
+ return new TokenStreamComponents(tokenizer);
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneTokenizer.java b/src/main/java/org/elasticsearch/index/analysis/PhoneTokenizer.java
new file mode 100644
index 0000000..aa21d45
--- /dev/null
+++ b/src/main/java/org/elasticsearch/index/analysis/PhoneTokenizer.java
@@ -0,0 +1,157 @@
+package org.elasticsearch.index.analysis;
+
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.elasticsearch.common.lang3.StringUtils;
+
+import com.google.i18n.phonenumbers.NumberParseException;
+import com.google.i18n.phonenumbers.PhoneNumberUtil;
+import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
+public class PhoneTokenizer extends Tokenizer {
+
+ // The raw input
+ private String stringToTokenize = null;
+
+ // Position in the tokens array. We build all the tokens and return them one at a time as incrementToken gets called.
+ private int position = 0;
+
+ /**
+ * The tokens are determined on the first iteration and then returned one at a time
+ * thereafter.
+ */
+
+ private List tokens = null;
+
+ // The base class grabs the charTermAttribute each time incrementToken returns
+ protected CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
+
+ public PhoneTokenizer(Reader reader) {
+ super(reader);
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ // Clear anything that is already saved in this.charTermAttribute
+ this.charTermAttribute.setEmpty();
+
+ if(tokens == null) {
+ // It's the 1st iteration, chop it up into tokens.
+ generateTokens();
+ }
+
+ // Return those tokens
+ return returnTokensOneAtATime();
+ }
+
+ private boolean returnTokensOneAtATime() {
+ // Token have already been generated. Return them 1 at a time
+ if(tokens != null) {
+ if(this.position == tokens.size()) {
+ // No more tokens
+ return false;
+ }
+
+ // return each token, 1 at a time
+ this.charTermAttribute.append(tokens.get(this.position));
+ this.position += 1;
+ return true;
+ }
+ return false;
+ }
+
+ private void generateTokens() {
+ String uri = getStringToTokenize();
+
+ tokens = new ArrayList();
+ tokens.add(getStringToTokenize());
+
+ // Rip off the "tel:" or "sip:" prefix
+ if (uri.indexOf("tel:") != -1 || uri.indexOf("sip:") != -1) {
+ uri = uri.substring(4);
+ } else {
+ // If it's not formatted at least this correctly then the whole string is 1 token. Sorry, put a tel: or sip: at the beginning so we know how to treat it
+ tokens.add(getStringToTokenize());
+ return;
+ }
+
+ // Drop anything after @. Most likely there's nothing of interest
+ String[] parts = StringUtils.split(uri, "@");
+ String number = parts[0];
+
+ // Add a token for the raw unmanipulated address. Note this could be a username (sip) instead of telephone number so take it as is
+ tokens.add(number);
+
+ // Let google's libphone try to parse it
+ PhoneNumberUtil phoneUtil = PhoneNumberUtil.getInstance();
+ PhoneNumber numberProto = null;
+ String countryCode = null;
+ try{
+ // ZZ is the generic "I don't know the country code" region. Wish we got country code from our edge, but we don't currently.
+ numberProto = phoneUtil.parse(number, "ZZ");
+ if(numberProto != null) {
+ // Libphone likes it!
+ countryCode = String.valueOf(numberProto.getCountryCode());
+ number = String.valueOf(numberProto.getNationalNumber());
+
+ // Add Country code, extension, and the number as tokens
+ tokens.add(countryCode);
+ if(!StringUtils.isEmpty(numberProto.getExtension())) {
+ tokens.add(numberProto.getExtension());
+ }
+
+ tokens.add(number);
+ }
+ } catch(NumberParseException e) {
+ // Libphone didn't like it, no biggie. We'll just ngram the number as it is.
+ }
+
+ // ngram the phone number EG 19198243333 produces 9, 91, 919, etc
+ if(NumberUtils.isNumber(number)) {
+ for(int count = 1; count <= number.length(); count++) {
+ String token = number.substring(0, count);
+ tokens.add(token);
+ if(countryCode != null) {
+ // If there was a country code, add more ngrams such that 19198243333 produces 19, 191, 1919, etc
+ tokens.add(countryCode + token);
+ }
+ }
+ }
+ }
+
+ /**
+ * Read the input into a local variable
+ * @return
+ */
+ private String getStringToTokenize() {
+ if(this.stringToTokenize == null) {
+ try {
+ this.stringToTokenize = IOUtils.toString(input);
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return this.stringToTokenize;
+ }
+
+ /**
+ * Nuke all state after each use (lucene will re-use an instance of this tokenizer over and over again)
+ */
+ @Override
+ public final void reset() throws IOException {
+ super.reset();
+ this.position = 0;
+ tokens = null;
+ this.stringToTokenize = null;
+ clearAttributes();
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/org/elasticsearch/plugins/analysis/phone/PhoneAnalyzerProvider.java b/src/main/java/org/elasticsearch/plugins/analysis/phone/PhoneAnalyzerProvider.java
new file mode 100644
index 0000000..ff121d1
--- /dev/null
+++ b/src/main/java/org/elasticsearch/plugins/analysis/phone/PhoneAnalyzerProvider.java
@@ -0,0 +1,36 @@
+package org.elasticsearch.plugins.analysis.phone;
+
+
+import java.io.IOException;
+
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.inject.assistedinject.Assisted;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
+import org.elasticsearch.index.analysis.PhoneAnalyzer;
+import org.elasticsearch.index.settings.IndexSettings;
+
+public class PhoneAnalyzerProvider extends AbstractIndexAnalyzerProvider {
+ protected PhoneAnalyzer analyzer = new PhoneAnalyzer();
+ public static final String NAME = "phone";
+
+ @Inject
+ public PhoneAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) throws IOException {
+ super(index, indexSettings, name, settings);
+ }
+
+ public PhoneAnalyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ public PhoneAnalyzer get() {
+ return analyzer;
+ }
+
+ public static String getName() {
+ return NAME;
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/java/org/elasticsearch/plugins/analysis/phone/PhoneBinderProcessor.java b/src/main/java/org/elasticsearch/plugins/analysis/phone/PhoneBinderProcessor.java
new file mode 100644
index 0000000..57136c1
--- /dev/null
+++ b/src/main/java/org/elasticsearch/plugins/analysis/phone/PhoneBinderProcessor.java
@@ -0,0 +1,10 @@
+package org.elasticsearch.plugins.analysis.phone;
+import org.elasticsearch.index.analysis.AnalysisModule;
+
+public class PhoneBinderProcessor extends AnalysisModule.AnalysisBinderProcessor {
+
+ @Override
+ public void processAnalyzers(AnalyzersBindings analyzersBindings) {
+ analyzersBindings.processAnalyzer(PhoneAnalyzerProvider.NAME, PhoneAnalyzerProvider.class);
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/org/elasticsearch/plugins/analysis/phone/PhonePlugin.java b/src/main/java/org/elasticsearch/plugins/analysis/phone/PhonePlugin.java
new file mode 100644
index 0000000..7a49968
--- /dev/null
+++ b/src/main/java/org/elasticsearch/plugins/analysis/phone/PhonePlugin.java
@@ -0,0 +1,31 @@
+package org.elasticsearch.plugins.analysis.phone;
+
+import org.elasticsearch.common.inject.Module;
+import org.elasticsearch.index.analysis.AnalysisModule;
+import org.elasticsearch.plugins.AbstractPlugin;
+import org.elasticsearch.plugins.Plugin;
+
+public class PhonePlugin extends AbstractPlugin implements Plugin {
+
+ /* Return a description of this plugin. */
+ public String description() {
+ return "Makes a best attempt at tokenizing a phone number or sip address";
+ }
+
+ /* This is the function that will register our analyzer with Elasticsearch. */
+ public void onModule(AnalysisModule analysisModule) {
+ analysisModule.addProcessor(new PhoneBinderProcessor());
+ }
+
+ @Override
+ public void processModule(Module module) {
+ if (module instanceof AnalysisModule) {
+ AnalysisModule analysisModule = (AnalysisModule) module;
+ analysisModule.addProcessor(new PhoneBinderProcessor());
+ }
+ }
+
+ public String name() {
+ return "phone-plugin";
+ }
+}
\ No newline at end of file
diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties
new file mode 100644
index 0000000..b061165
--- /dev/null
+++ b/src/main/resources/es-plugin.properties
@@ -0,0 +1,3 @@
+plugin=org.elasticsearch.plugins.analysis.phone.PhonePlugin
+version=.1
+lucene=4.10.4
diff --git a/src/test/java/tests/PhoneIntegrationTest.java b/src/test/java/tests/PhoneIntegrationTest.java
new file mode 100644
index 0000000..c36b94e
--- /dev/null
+++ b/src/test/java/tests/PhoneIntegrationTest.java
@@ -0,0 +1,167 @@
+package tests;
+
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.hamcrest.CoreMatchers.is;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.ExecutionException;
+
+import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.lang3.StringUtils;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.plugins.PluginsService;
+import org.elasticsearch.test.ElasticsearchIntegrationTest;
+import org.junit.Before;
+import org.junit.Test;
+
+
+@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.SUITE)
+public class PhoneIntegrationTest extends ElasticsearchIntegrationTest {
+
+ static {
+ ClassLoader.getSystemClassLoader().setDefaultAssertionStatus(true);
+ }
+
+ @Before
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ createIndex("test");
+ ensureGreen("test");
+ final XContentBuilder mapping = jsonBuilder().startObject()
+ .startObject("type")
+ .startObject("properties")
+ .startObject("foo")
+ .field("type", "string")
+ .field("analyzer", "phone")
+ .endObject()
+ .endObject()
+ .endObject()
+ .endObject();
+
+
+ client().admin().indices().preparePutMapping("test").setType("type").setSource(mapping).get();
+ ensureGreen("test");
+ Locale.setDefault(new Locale("en_US"));
+ }
+
+ @Override
+ protected Settings nodeSettings(int nodeOrdinal) {
+ org.elasticsearch.common.settings.ImmutableSettings.Builder builder = ImmutableSettings.builder()
+ .put(super.nodeSettings(nodeOrdinal))
+ .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true);
+ return builder.build();
+ }
+
+ @Test
+ public void testPluginIsLoaded() {
+ NodesInfoResponse infos = client().admin().cluster().prepareNodesInfo().setPlugins(true).execute().actionGet();
+ assertThat(infos.getNodes()[0].getPlugins().getInfos().get(0).getName(), is("phone-plugin"));
+ }
+
+ @Test
+ public void testEurope() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("tel:+441344840400", Arrays.asList("44", "1344", "1344840400", "441344840400"));
+ }
+
+ @Test
+ public void testGermanCastle() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("tel:+498362930830", Arrays.asList("49", "498362930830", "8362930830"));
+ }
+
+ @Test
+ public void testBMWofSydney() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("tel:+61293344555", Arrays.asList("61", "293344555", "61293344555"));
+ }
+
+ @Test
+ public void coffeeShopInIreland() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("tel:+442890319416", Arrays.asList("44", "289", "2890319416", "442890319416"));
+ }
+
+ @Test
+ public void testTelWithCountryCode() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("tel:+17177158163", Arrays.asList("1", "717", "7177", "17177158163"));
+ }
+
+ @Test
+ public void testTelWithCountryCode2() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("tel:+12177148350", Arrays.asList("1", "217", "2177", "2177148350","12177148350"));
+ }
+
+ @Test
+ public void testMissingCountryCode() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("tel:8177148350", Arrays.asList("817", "8177", "81771", "817714", "8177148350"));
+ }
+
+ @Test
+ public void testSipWithNumericUsername() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("sip:222@autosbcpc", Arrays.asList("222"));
+ }
+
+ @Test
+ public void testTruncatedNumber() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("tel:5551234", Arrays.asList("5551234"));
+ }
+
+ @Test
+ public void testSipWithAlphabeticUsername() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("sip:abc@autosbcpc", Arrays.asList("abc"));
+ }
+
+ @Test
+ public void testGarbageInGarbageOut() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("test", Arrays.asList("test"));
+ }
+
+ @Test
+ public void testSipWithCountryCode() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("sip:+14177141363@178.97.105.13;isup-oli=0;pstn-params=808481808882", Arrays.asList("417", "4177", "14177"));
+ }
+
+ @Test
+ public void testSipWithTelephoneExtension() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("sip:+13169410766;ext=2233@178.17.10.117:8060", Arrays.asList("316", "2233", "1316"));
+ }
+
+ @Test
+ public void testSipWithUsername() throws ExecutionException, InterruptedException, IOException {
+ assertIncludes("sip:JeffSIP@178.12.220.18", Arrays.asList("JeffSIP"));
+ }
+
+ private void assertIncludes(String ani, List expectedTokens) throws ExecutionException, InterruptedException, IOException {
+ AnalyzeResponse response = client().admin().indices().prepareAnalyze(ani).setField("foo").setIndex("test").execute().get();
+ index("test", "type", "1", "foo", ani);
+
+
+ // Verify all the expected tokens are in there
+ List tokens = new ArrayList();
+ for(AnalyzeToken token : response.getTokens()) {
+ assertFalse(StringUtils.isEmpty(token.getTerm()));
+ tokens.add(token.getTerm());
+ //System.out.println(token.getTerm());
+ }
+
+ flush();
+ refresh();
+
+ for(String expectedToken : expectedTokens) {
+ assertTrue(tokens.contains(expectedToken));
+ SearchResponse sr = client().prepareSearch("test").setQuery(QueryBuilders.termQuery("foo", expectedToken)).execute().actionGet();
+ assertThat(sr.getHits().getTotalHits(), is(1L));
+ sr = client().prepareSearch("test").setQuery(QueryBuilders.termQuery("foo", "bogussearchterm")).execute().actionGet();
+ assertThat(sr.getHits().getTotalHits(), is(0l));
+
+ }
+ }
+}