Skip to content

Commit

Permalink
CTable improvements. Fixes #738 (#739)
Browse files Browse the repository at this point in the history
  • Loading branch information
kamphaus authored and elliotchance committed Jun 4, 2018
1 parent a3ca656 commit 7aaf223
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 12 deletions.
24 changes: 14 additions & 10 deletions darwin/ctype.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,48 +42,52 @@ const (
// to handle this, so if you know one please consider putting in a PR :)
func IsType(_c CtRuneT, _f uint32) uint32 {
// These are the easy ones.
if _f&CtypeA != 0 && unicode.IsLetter(rune(_c)) {
if _f&CtypeA != 0 && unicode.IsLetter(rune(_c)) && rune(_c) < 0x80 {
return 1
}

if _f&CtypeC != 0 && unicode.IsControl(rune(_c)) {
if _f&CtypeC != 0 && unicode.IsControl(rune(_c)) && rune(_c) < 0x80 {
return 1
}

if _f&CtypeD != 0 && unicode.IsDigit(rune(_c)) {
if _f&CtypeD != 0 && unicode.IsDigit(rune(_c)) && rune(_c) < 0x80 {
return 1
}

// The IsSpace check is required because Go treats spaces as graphic
// characters, which C does not.
if _f&CtypeG != 0 && unicode.IsGraphic(rune(_c)) && !unicode.IsSpace(rune(_c)) {
if _f&CtypeG != 0 && unicode.IsGraphic(rune(_c)) && !unicode.IsSpace(rune(_c)) && rune(_c) < 0x80 {
return 1
}

if _f&CtypeL != 0 && unicode.IsLower(rune(_c)) {
if _f&CtypeL != 0 && unicode.IsLower(rune(_c)) && rune(_c) < 0x80 {
return 1
}

if _f&CtypeP != 0 && unicode.IsPunct(rune(_c)) {
// Need to check for 0x24, 0x2b, 0x3c-0x3e, 0x5e, 0x60, 0x7c, 0x7e
// because Go doesn't treat $+<=>^`|~ as punctuation.
if _f&CtypeP != 0 && rune(_c) < 0x80 && (unicode.IsPunct(rune(_c)) || rune(_c) == 0x24 || rune(_c) == 0x2b ||
(rune(_c) >= 0x3c && rune(_c) <= 0x3e) || rune(_c) == 0x5e || rune(_c) == 0x60 ||
rune(_c) == 0x7c || rune(_c) == 0x7e) {
return 1
}

if _f&CtypeS != 0 && unicode.IsSpace(rune(_c)) {
if _f&CtypeS != 0 && unicode.IsSpace(rune(_c)) && rune(_c) < 0x80 {
return 1
}

if _f&CtypeU != 0 && unicode.IsUpper(rune(_c)) {
if _f&CtypeU != 0 && unicode.IsUpper(rune(_c)) && rune(_c) < 0x80 {
return 1
}

if _f&CtypeR != 0 && unicode.IsPrint(rune(_c)) {
if _f&CtypeR != 0 && unicode.IsPrint(rune(_c)) && rune(_c) < 0x80 {
return 1
}

// TODO: Is this really the right way to do this?
if _f&CtypeX != 0 && (unicode.IsDigit(rune(_c)) ||
(_c >= 'a' && _c <= 'f') ||
(_c >= 'A' && _c <= 'F')) {
(_c >= 'A' && _c <= 'F')) && rune(_c) < 0x80 {
return 1
}

Expand Down
11 changes: 9 additions & 2 deletions linux/ctype.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
var characterTable []uint16

func generateCharacterTable() {
for i := 0; i < 255; i++ {
for i := 0; i < 0x80; i++ {
var c uint16

// Each of the bitwise expressions below were copied from the enum
Expand Down Expand Up @@ -60,7 +60,10 @@ func generateCharacterTable() {
c |= ((1 << (9)) >> 8)
}

if unicode.IsPunct(rune(i)) {
// Need to check for 0x24, 0x2b, 0x3c-0x3e, 0x5e, 0x60, 0x7c, 0x7e
// because Go doesn't treat $+<=>^`|~ as punctuation.
if unicode.IsPunct(rune(i)) || i == 0x24 || i == 0x2b || (i >= 0x3c && i <= 0x3e) || i == 0x5e || i == 0x60 ||
i == 0x7c || i == 0x7e {
c |= ((1 << (10)) >> 8)
}

Expand All @@ -72,6 +75,10 @@ func generateCharacterTable() {
// test if this works right now.
characterTable = append(characterTable, c)
}
for i := 0x80; i < 256; i++ {
// false for all characters > 0x7f
characterTable = append(characterTable, 0)
}
}

// CtypeLoc handles __ctype_b_loc(). It returns a character table.
Expand Down
19 changes: 19 additions & 0 deletions tests/ctype.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
char *strnul = "this string has a \0 NUL";
char arrnul[] = "this string has a \0 NUL";

#define PRINTF_BOOL(v) { if(v) printf("T"); else printf("F"); }

int main()
{
plan(104);
Expand All @@ -49,6 +51,23 @@ int main()
_CTYPE(isupper, F, T, F, F, F, F, F, F);
CTYPE(isxdigit, T, T, T, F, F, F, F, F);

diag("char properties for characters 0-255:");
for(int i=0; i<256; i++) {
printf("%x: ", i);
PRINTF_BOOL(isalnum(i));
PRINTF_BOOL(isalpha(i));
PRINTF_BOOL(iscntrl(i));
PRINTF_BOOL(isdigit(i));
PRINTF_BOOL(isgraph(i));
PRINTF_BOOL(islower(i));
PRINTF_BOOL(isprint(i));
PRINTF_BOOL(ispunct(i));
PRINTF_BOOL(isspace(i));
PRINTF_BOOL(isupper(i));
PRINTF_BOOL(isxdigit(i));
printf("\n");
}

diag("tolower");
is_eq(tolower('a'), 'a');
is_eq(tolower('B'), 'b');
Expand Down

0 comments on commit 7aaf223

Please sign in to comment.