From 317e0c1b0b549b2ee5e000fa4d698e167155a2eb Mon Sep 17 00:00:00 2001 From: zoltanmaric Date: Tue, 17 Nov 2015 23:35:20 +0100 Subject: [PATCH 1/6] #24 make tests reproduce bug, fix ranges --- .../rockymadden/stringmetric/transform.scala | 12 ++-- .../stringmetric/transformSpec.scala | 56 +++++++++---------- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala index c3c5afcc..7d4372a1 100644 --- a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala @@ -3,12 +3,12 @@ package com.rockymadden.stringmetric import scala.collection.immutable.NumericRange trait transform { - private val Ascii = NumericRange(0x00, 0x7F, 1) - private val ExtendedAscii = NumericRange(0x00, 0x7F, 1) - private val Latin = NumericRange(0x00, 0x24F, 1) - private val LowerCase = NumericRange(0x61, 0x7A, 1) - private val Numbers = NumericRange(0x30, 0x39, 1) - private val UpperCase = NumericRange(0x41, 0x5A, 1) + private val Ascii = NumericRange.inclusive(0x00, 0x7F, 1) + private val ExtendedAscii = NumericRange.inclusive(0x00, 0x7F, 1) + private val Latin = NumericRange.inclusive(0x00, 0x24F, 1) + private val LowerCase = NumericRange.inclusive(0x61, 0x7A, 1) + private val Numbers = NumericRange.inclusive(0x30, 0x39, 1) + private val UpperCase = NumericRange.inclusive(0x41, 0x5A, 1) private val filter: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => ca.filter(c => f(c)).mkString diff --git a/core/src/test/scala/com/rockymadden/stringmetric/transformSpec.scala b/core/src/test/scala/com/rockymadden/stringmetric/transformSpec.scala index 01fa3a33..f62d3ffa 100644 --- a/core/src/test/scala/com/rockymadden/stringmetric/transformSpec.scala +++ b/core/src/test/scala/com/rockymadden/stringmetric/transformSpec.scala @@ -6,17 +6,17 @@ object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { "filterAlpha()" should { "return transformed" in { filterAlpha( - ("aBc123" + 0x250.toChar).toCharArray - ) must beEqualTo("aBc".toCharArray) + ("aBc123zZ" + 0x5B.toChar + 0x7B.toChar).toCharArray + ) must beEqualTo("aBczZ".toCharArray) } } "filterNotAlpha()" should { "return transformed" in { filterNotAlpha( - ("aBc123" + 0x250.toChar).toCharArray + ("aBc123zZ" + 0x5B.toChar + 0x7B.toChar).toCharArray ) must beEqualTo( - ("123" + 0x250.toChar).toCharArray + ("123" + 0x5B.toChar + 0x7B.toChar).toCharArray ) } } @@ -24,17 +24,17 @@ object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { "filterAlphaNumeric()" should { "return transformed" in { filterAlphaNumeric( - ("aBc123" + 0x250.toChar).toCharArray - ) must beEqualTo("aBc123".toCharArray) + ("aBc123zZ9" + 0x3A.toChar + 0x5B.toChar + 0x7B.toChar).toCharArray + ) must beEqualTo("aBc123zZ9".toCharArray) } } "filterNotAlphaNumeric()" should { "return transformed" in { filterNotAlphaNumeric( - ("aBc123" + 0x250.toChar).toCharArray + ("aBc123zZ9" + 0x3A.toChar + 0x5B.toChar + 0x7B.toChar).toCharArray ) must beEqualTo( - ("" + 0x250.toChar).toCharArray + ("" + 0x3A.toChar + 0x5B.toChar + 0x7B.toChar).toCharArray ) } } @@ -42,15 +42,15 @@ object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { "filterAscii()" should { "return transformed" in { filterAscii( - ("aBc" + 0x80.toChar).toCharArray - ) must beEqualTo("aBc".toCharArray) + ("aBc" + 0x7F.toChar + 0x100.toChar).toCharArray + ) must beEqualTo(("aBc" + 0x7F.toChar).toCharArray) } } "filterNotAscii()" should { "return transformed" in { filterNotAscii( - ("aBc" + 0x100.toChar).toCharArray + ("aBc" + 0x7F.toChar + 0x100.toChar).toCharArray ) must beEqualTo( ("" + 0x100.toChar).toCharArray ) @@ -60,15 +60,15 @@ object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { "filterExtendedAscii()" should { "return transformed" in { filterExtendedAscii( - ("aBc" + 0x100.toChar).toCharArray - ) must beEqualTo("aBc".toCharArray) + ("aBc" + 0x7F.toChar + 0x100.toChar).toCharArray + ) must beEqualTo(("aBc" + 0x7F.toChar).toCharArray) } } "filterNotExtendedAscii()" should { "return transformed" in { filterNotExtendedAscii( - ("aBc" + 0x250.toChar).toCharArray + ("aBc" + 0x7F.toChar + 0x250.toChar).toCharArray ) must beEqualTo( ("" + 0x250.toChar).toCharArray ) @@ -78,15 +78,15 @@ object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { "filterLatin()" should { "return transformed" in { filterLatin( - ("aBc" + 0x250.toChar).toCharArray - ) must beEqualTo("aBc".toCharArray) + ("aBc" + 0x24F.toChar + 0x250.toChar).toCharArray + ) must beEqualTo(("aBc" + 0x24F.toChar).toCharArray) } } "filterNotLatin()" should { "return transformed" in { filterNotLatin( - ("aBc" + 0x300.toChar).toCharArray + ("aBc" + 0x24F.toChar + 0x300.toChar).toCharArray ) must beEqualTo( ("" + 0x300.toChar).toCharArray ) @@ -96,15 +96,15 @@ object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { "filterLowerCase()" should { "return transformed" in { filterLowerCase( - "aBc123" + 0x250.toChar - ) must beEqualTo("ac".toCharArray) + "aBc123z" + 0x250.toChar + ) must beEqualTo("acz".toCharArray) } } "filterNotLowerCase()" should { "return transformed" in { filterNotLowerCase( - ("aBc123" + 0x250.toChar).toCharArray + ("aBc123z" + 0x250.toChar).toCharArray ) must beEqualTo( ("B123" + 0x250.toChar).toCharArray ) @@ -114,15 +114,15 @@ object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { "filterNumeric()" should { "return transformed" in { filterNumeric( - ("aBc123" + 0x250.toChar).toCharArray - ) must beEqualTo("123".toCharArray) + ("aBc1239" + 0x250.toChar).toCharArray + ) must beEqualTo("1239".toCharArray) } } "filterNotNumeric()" should { "return transformed" in { filterNotNumeric( - ("aBc123" + 0x250.toChar).toCharArray + ("aBc1239" + 0x250.toChar).toCharArray ) must beEqualTo( ("aBc" + 0x250.toChar).toCharArray ) @@ -132,15 +132,15 @@ object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { "filterUpperCase()" should { "return transformed" in { filterUpperCase( - ("aBc123" + 0x250.toChar).toCharArray - ) must beEqualTo("B".toCharArray) + ("aBc123Z" + 0x250.toChar).toCharArray + ) must beEqualTo("BZ".toCharArray) } } "filterNotUpperCase()" should { "return transformed" in { filterNotUpperCase( - ("aBc123" + 0x250.toChar).toCharArray + ("aBc123Z" + 0x250.toChar).toCharArray ) must beEqualTo( ("ac123" + 0x250.toChar).toCharArray ) @@ -150,9 +150,9 @@ object transformSpec extends org.specs2.mutable.SpecificationWithJUnit { "ignoreAlphaCase()" should { "return transformed" in { ignoreAlphaCase( - ("aBc123" + 0x250.toChar).toCharArray + ("aBc123zZ" + 0x250.toChar).toCharArray ) must beEqualTo( - ("abc123" + 0x250.toChar).toCharArray + ("abc123zz" + 0x250.toChar).toCharArray ) } } From ec554e779dd825f182e217e0db41ab42e7c16ca5 Mon Sep 17 00:00:00 2001 From: zoltanmaric Date: Tue, 17 Nov 2015 23:48:52 +0100 Subject: [PATCH 2/6] #24 use more idiomatic expressions for ranges, simplify filters --- .../rockymadden/stringmetric/transform.scala | 52 ++++++++----------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala index 7d4372a1..e0fc6332 100644 --- a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala @@ -1,14 +1,12 @@ package com.rockymadden.stringmetric -import scala.collection.immutable.NumericRange - trait transform { - private val Ascii = NumericRange.inclusive(0x00, 0x7F, 1) - private val ExtendedAscii = NumericRange.inclusive(0x00, 0x7F, 1) - private val Latin = NumericRange.inclusive(0x00, 0x24F, 1) - private val LowerCase = NumericRange.inclusive(0x61, 0x7A, 1) - private val Numbers = NumericRange.inclusive(0x30, 0x39, 1) - private val UpperCase = NumericRange.inclusive(0x41, 0x5A, 1) + private val Ascii = 0x00.toChar to 0x7F.toChar + private val ExtendedAscii = 0x00.toChar to 0x7F.toChar + private val Latin = 0x00.toChar to 0x24F.toChar + private val LowerCase = 'a' to 'z' + private val Numbers = '0' to '9' + private val UpperCase = 'A' to 'Z' private val filter: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => ca.filter(c => f(c)).mkString @@ -17,50 +15,46 @@ trait transform { ca.filterNot(c => f(c)).mkString val filterAlpha: StringTransform = (ca) => filter(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) + LowerCase.contains(c) || UpperCase.contains(c) }) val filterNotAlpha: StringTransform = (ca) => filterNot(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) + LowerCase.contains(c) || UpperCase.contains(c) }) val filterAlphaNumeric: StringTransform = (ca) => filter(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) + LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c) }) val filterNotAlphaNumeric: StringTransform = (ca) => filterNot(ca, c => { - val ci = c.toInt - LowerCase.contains(ci) || UpperCase.contains(ci) || Numbers.contains(ci) + LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c) }) - val filterAscii: StringTransform = (ca) => filter(ca, c => Ascii.contains(c.toInt)) + val filterAscii: StringTransform = (ca) => filter(ca, Ascii.contains) - val filterNotAscii: StringTransform = (ca) => filterNot(ca, c => Ascii.contains(c.toInt)) + val filterNotAscii: StringTransform = (ca) => filterNot(ca, Ascii.contains) - val filterExtendedAscii: StringTransform = (ca) => filter(ca, c => ExtendedAscii.contains(c.toInt)) + val filterExtendedAscii: StringTransform = (ca) => filter(ca, ExtendedAscii.contains) - val filterNotExtendedAscii: StringTransform = (ca) => filterNot(ca, c => ExtendedAscii.contains(c.toInt)) + val filterNotExtendedAscii: StringTransform = (ca) => filterNot(ca, ExtendedAscii.contains) - val filterLatin: StringTransform = (ca) => filter(ca, c => Latin.contains(c.toInt)) + val filterLatin: StringTransform = (ca) => filter(ca, Latin.contains) - val filterNotLatin: StringTransform = (ca) => filterNot(ca, c => Latin.contains(c.toInt)) + val filterNotLatin: StringTransform = (ca) => filterNot(ca, Latin.contains) - val filterLowerCase: StringTransform = (ca) => filter(ca, c => LowerCase.contains(c.toInt)) + val filterLowerCase: StringTransform = (ca) => filter(ca, LowerCase.contains) - val filterNotLowerCase: StringTransform = (ca) => filterNot(ca, c => LowerCase.contains(c.toInt)) + val filterNotLowerCase: StringTransform = (ca) => filterNot(ca, LowerCase.contains) - val filterNumeric: StringTransform = (ca) => filter(ca, c => Numbers.contains(c.toInt)) + val filterNumeric: StringTransform = (ca) => filter(ca, Numbers.contains) - val filterNotNumeric: StringTransform = (ca) => filterNot(ca, c => Numbers.contains(c.toInt)) + val filterNotNumeric: StringTransform = (ca) => filterNot(ca, Numbers.contains) - val filterUpperCase: StringTransform = (ca) => filter(ca, c => UpperCase.contains(c.toInt)) + val filterUpperCase: StringTransform = (ca) => filter(ca, UpperCase.contains) - val filterNotUpperCase: StringTransform = (ca) => filterNot(ca, c => UpperCase.contains(c.toInt)) + val filterNotUpperCase: StringTransform = (ca) => filterNot(ca, UpperCase.contains) - val ignoreAlphaCase: StringTransform = (ca) => ca.map(c => if (c >= 65 && c <= 90) (c + 32).toChar else c) + val ignoreAlphaCase: StringTransform = (ca) => ca.map(_.toLower) } object transform extends transform From 438e1b8d13f9fe9245b64b0a068786433d7b7b6e Mon Sep 17 00:00:00 2001 From: zoltanmaric Date: Tue, 17 Nov 2015 23:51:17 +0100 Subject: [PATCH 3/6] #24 simplify filters --- .../rockymadden/stringmetric/transform.scala | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala index e0fc6332..0195c10b 100644 --- a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala @@ -14,21 +14,17 @@ trait transform { private val filterNot: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => ca.filterNot(c => f(c)).mkString - val filterAlpha: StringTransform = (ca) => filter(ca, c => { - LowerCase.contains(c) || UpperCase.contains(c) - }) + val filterAlpha: StringTransform = (ca) => + filter(ca, c => LowerCase.contains(c) || UpperCase.contains(c)) - val filterNotAlpha: StringTransform = (ca) => filterNot(ca, c => { - LowerCase.contains(c) || UpperCase.contains(c) - }) + val filterNotAlpha: StringTransform = (ca) => + filterNot(ca, c => LowerCase.contains(c) || UpperCase.contains(c)) - val filterAlphaNumeric: StringTransform = (ca) => filter(ca, c => { - LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c) - }) + val filterAlphaNumeric: StringTransform = (ca) => + filter(ca, c => LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c)) - val filterNotAlphaNumeric: StringTransform = (ca) => filterNot(ca, c => { - LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c) - }) + val filterNotAlphaNumeric: StringTransform = (ca) => + filterNot(ca, c => LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c)) val filterAscii: StringTransform = (ca) => filter(ca, Ascii.contains) From 6e69965b2cd45d3d1f70df5ff085f47753b04238 Mon Sep 17 00:00:00 2001 From: zoltanmaric Date: Wed, 18 Nov 2015 00:01:40 +0100 Subject: [PATCH 4/6] #24 simplify filters --- .../rockymadden/stringmetric/transform.scala | 46 ++++++++----------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala index 0195c10b..32d59add 100644 --- a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala @@ -8,47 +8,41 @@ trait transform { private val Numbers = '0' to '9' private val UpperCase = 'A' to 'Z' - private val filter: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => - ca.filter(c => f(c)).mkString + val filterAlpha: StringTransform = + _.filter(c => LowerCase.contains(c) || UpperCase.contains(c)) - private val filterNot: ((Array[Char], (Char => Boolean)) => String) = (ca, f) => - ca.filterNot(c => f(c)).mkString + val filterNotAlpha: StringTransform = + _.filterNot(c => LowerCase.contains(c) || UpperCase.contains(c)) - val filterAlpha: StringTransform = (ca) => - filter(ca, c => LowerCase.contains(c) || UpperCase.contains(c)) + val filterAlphaNumeric: StringTransform = + _.filter(c => LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c)) - val filterNotAlpha: StringTransform = (ca) => - filterNot(ca, c => LowerCase.contains(c) || UpperCase.contains(c)) + val filterNotAlphaNumeric: StringTransform = + _.filterNot(c => LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c)) - val filterAlphaNumeric: StringTransform = (ca) => - filter(ca, c => LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c)) + val filterAscii: StringTransform = _.filter(Ascii.contains) - val filterNotAlphaNumeric: StringTransform = (ca) => - filterNot(ca, c => LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c)) + val filterNotAscii: StringTransform = _.filterNot(Ascii.contains) - val filterAscii: StringTransform = (ca) => filter(ca, Ascii.contains) + val filterExtendedAscii: StringTransform = _.filter(ExtendedAscii.contains) - val filterNotAscii: StringTransform = (ca) => filterNot(ca, Ascii.contains) + val filterNotExtendedAscii: StringTransform = _.filterNot(ExtendedAscii.contains) - val filterExtendedAscii: StringTransform = (ca) => filter(ca, ExtendedAscii.contains) + val filterLatin: StringTransform = _.filter(Latin.contains) - val filterNotExtendedAscii: StringTransform = (ca) => filterNot(ca, ExtendedAscii.contains) + val filterNotLatin: StringTransform = _.filterNot(Latin.contains) - val filterLatin: StringTransform = (ca) => filter(ca, Latin.contains) + val filterLowerCase: StringTransform = _.filter(LowerCase.contains) - val filterNotLatin: StringTransform = (ca) => filterNot(ca, Latin.contains) + val filterNotLowerCase: StringTransform = _.filterNot(LowerCase.contains) - val filterLowerCase: StringTransform = (ca) => filter(ca, LowerCase.contains) + val filterNumeric: StringTransform = _.filter(Numbers.contains) - val filterNotLowerCase: StringTransform = (ca) => filterNot(ca, LowerCase.contains) + val filterNotNumeric: StringTransform = _.filterNot(Numbers.contains) - val filterNumeric: StringTransform = (ca) => filter(ca, Numbers.contains) + val filterUpperCase: StringTransform = _.filter(UpperCase.contains) - val filterNotNumeric: StringTransform = (ca) => filterNot(ca, Numbers.contains) - - val filterUpperCase: StringTransform = (ca) => filter(ca, UpperCase.contains) - - val filterNotUpperCase: StringTransform = (ca) => filterNot(ca, UpperCase.contains) + val filterNotUpperCase: StringTransform = _.filterNot(UpperCase.contains) val ignoreAlphaCase: StringTransform = (ca) => ca.map(_.toLower) } From 408b7e54cd93b3c296018448ce15ecba9e1289e9 Mon Sep 17 00:00:00 2001 From: zoltanmaric Date: Wed, 18 Nov 2015 00:19:12 +0100 Subject: [PATCH 5/6] #24 make `ignoreAlphaCase` backwards-compatible --- .../src/main/scala/com/rockymadden/stringmetric/transform.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala index 32d59add..4cf11984 100644 --- a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala @@ -44,7 +44,7 @@ trait transform { val filterNotUpperCase: StringTransform = _.filterNot(UpperCase.contains) - val ignoreAlphaCase: StringTransform = (ca) => ca.map(_.toLower) + val ignoreAlphaCase: StringTransform = _.map(c => if (UpperCase contains c) c.toLower else c) } object transform extends transform From b605102d06c6da9a1a285c3389656b36cbec56ce Mon Sep 17 00:00:00 2001 From: zoltanmaric Date: Wed, 18 Nov 2015 00:58:15 +0100 Subject: [PATCH 6/6] #24 introduce aggregate character classes, simplify filters --- .../rockymadden/stringmetric/transform.scala | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala index 4cf11984..e851fb35 100644 --- a/core/src/main/scala/com/rockymadden/stringmetric/transform.scala +++ b/core/src/main/scala/com/rockymadden/stringmetric/transform.scala @@ -1,24 +1,22 @@ package com.rockymadden.stringmetric trait transform { - private val Ascii = 0x00.toChar to 0x7F.toChar - private val ExtendedAscii = 0x00.toChar to 0x7F.toChar - private val Latin = 0x00.toChar to 0x24F.toChar - private val LowerCase = 'a' to 'z' - private val Numbers = '0' to '9' - private val UpperCase = 'A' to 'Z' + private lazy val Ascii = 0x00.toChar to 0x7F.toChar + private lazy val ExtendedAscii = 0x00.toChar to 0x7F.toChar + private lazy val Latin = 0x00.toChar to 0x24F.toChar + private lazy val LowerCase = 'a' to 'z' + private lazy val Numbers = '0' to '9' + private lazy val UpperCase = 'A' to 'Z' + private lazy val Alpha = LowerCase ++ UpperCase + private lazy val AlphaNumeric = Alpha ++ Numbers - val filterAlpha: StringTransform = - _.filter(c => LowerCase.contains(c) || UpperCase.contains(c)) + val filterAlpha: StringTransform = _.filter(Alpha.contains) - val filterNotAlpha: StringTransform = - _.filterNot(c => LowerCase.contains(c) || UpperCase.contains(c)) + val filterNotAlpha: StringTransform = _.filterNot(Alpha.contains) - val filterAlphaNumeric: StringTransform = - _.filter(c => LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c)) + val filterAlphaNumeric: StringTransform = _.filter(AlphaNumeric.contains) - val filterNotAlphaNumeric: StringTransform = - _.filterNot(c => LowerCase.contains(c) || UpperCase.contains(c) || Numbers.contains(c)) + val filterNotAlphaNumeric: StringTransform = _.filterNot(AlphaNumeric.contains) val filterAscii: StringTransform = _.filter(Ascii.contains)