From 13fe106cd02759e958086eba766104e60d0d78ab Mon Sep 17 00:00:00 2001
From: Haozhun Jin <hjin@fb.com>
Date: Fri, 20 Mar 2015 12:01:05 -0700
Subject: [PATCH] bugfix: char class casefold for certain chars

When a character is less than or equal to single byte size (0xff),
yet it takes more than 1 byte in the current encoding, the
case folding code incorrectly put it in bitset instead of code
range. As a result, for utf8 encoding, casefold works incorrectly
on characters in range \u0080 to \u00ff (latin1 supplement).

Before fix:

* `"\u00c2"` `[\u00e0-\u00e5]` returns false
* `"\u00c2"` `[\u00e2]` returns false
* `"\u00c2"` `\u00e2` returns true
---
 src/org/joni/ApplyCaseFold.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/org/joni/ApplyCaseFold.java b/src/org/joni/ApplyCaseFold.java
index 7dd84ce1..6a8d1c35 100644
--- a/src/org/joni/ApplyCaseFold.java
+++ b/src/org/joni/ApplyCaseFold.java
@@ -41,7 +41,7 @@ public void apply(int from, int[]to, int length, Object o) {
 
             if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) {
                 if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) {
-                    if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) {
+                    if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE || enc.codeToMbcLength(to[0]) > 1) {
                         cc.addCodeRange(env, to[0], to[0]);
                     } else {
                         /* /(?i:[^A-C])/.match("a") ==> fail. */