-
Notifications
You must be signed in to change notification settings - Fork 486
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix issue with non utf8 encodings and environment variable expansion (#…
…5515) * Fix issue with encoding not being utf8, since envar shoves utf8 into an utfX file. Now we convert all to utf8. * Fix linting and test on linux. * update changelog * update go.mod * update go.sum * Allow strict usage of utf8. * fix spelling * reorganize to prevent cyclic dependencies
- Loading branch information
1 parent
549dc33
commit 49b569e
Showing
14 changed files
with
119 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package encoder | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"io" | ||
"unicode/utf8" | ||
|
||
"github.com/dimchansky/utfbom" | ||
"golang.org/x/text/encoding" | ||
uni "golang.org/x/text/encoding/unicode" | ||
"golang.org/x/text/encoding/unicode/utf32" | ||
) | ||
|
||
// EnsureUTF8 will convert from the most common encodings to UTF8. | ||
// If useStrictUTF8 is enabled then if the file is not already utf8 then an error will be returned. | ||
func EnsureUTF8(config []byte, useStrictUTF8 bool) ([]byte, error) { | ||
buffer := bytes.NewBuffer(config) | ||
src, enc := utfbom.Skip(buffer) | ||
var converted []byte | ||
skippedBytes, err := io.ReadAll(src) | ||
if err != nil { | ||
return nil, err | ||
} | ||
var encoder encoding.Encoding | ||
switch enc { | ||
case utfbom.UTF16BigEndian: | ||
encoder = uni.UTF16(uni.BigEndian, uni.IgnoreBOM) | ||
case utfbom.UTF16LittleEndian: | ||
encoder = uni.UTF16(uni.LittleEndian, uni.IgnoreBOM) | ||
case utfbom.UTF32BigEndian: | ||
encoder = utf32.UTF32(utf32.BigEndian, utf32.IgnoreBOM) | ||
case utfbom.UTF32LittleEndian: | ||
encoder = utf32.UTF32(utf32.LittleEndian, utf32.IgnoreBOM) | ||
case utfbom.UTF8: // This only checks utf8 bom | ||
return config, nil | ||
default: | ||
// If its utf8 valid then return. | ||
if utf8.Valid(config) { | ||
return config, nil | ||
} | ||
return nil, fmt.Errorf("unknown encoding for config") | ||
} | ||
if useStrictUTF8 { | ||
return nil, fmt.Errorf("configuration is encoded with %s but must be utf8", enc.String()) | ||
} | ||
decoder := encoder.NewDecoder() | ||
converted, err = decoder.Bytes(skippedBytes) | ||
return converted, err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
ñ: | ||
log_level: ${TESTz�߭�߭�� |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
server: | ||
log_level: ${TEST} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
server: | ||
log_level: ${TEST} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters