Skip to content

Commit

Permalink
Fix utf8_check_validity (pytorch#6543)
Browse files Browse the repository at this point in the history
index is already increased in the loop, so we should not over advance
  • Loading branch information
kirklandsign authored Oct 29, 2024
1 parent a4d09bd commit 6b01b91
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions extension/android/jni/jni_layer_llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,21 @@ bool utf8_check_validity(const char* str, size_t length) {
uint8_t next_byte = static_cast<uint8_t>(str[i + 1]);
if ((byte & 0xE0) == 0xC0 &&
(next_byte & 0xC0) == 0x80) { // 2-byte sequence
i += 2;
i += 1;
} else if (
(byte & 0xF0) == 0xE0 && (next_byte & 0xC0) == 0x80 &&
(i + 2 < length) &&
(static_cast<uint8_t>(str[i + 2]) & 0xC0) ==
0x80) { // 3-byte sequence
i += 3;
i += 2;
} else if (
(byte & 0xF8) == 0xF0 && (next_byte & 0xC0) == 0x80 &&
(i + 2 < length) &&
(static_cast<uint8_t>(str[i + 2]) & 0xC0) == 0x80 &&
(i + 3 < length) &&
(static_cast<uint8_t>(str[i + 3]) & 0xC0) ==
0x80) { // 4-byte sequence
i += 4;
i += 3;
} else {
return false; // Invalid sequence
}
Expand Down

0 comments on commit 6b01b91

Please sign in to comment.