diff options
| author | b5f0d6c3 <[email protected]> | 2022-05-06 14:29:39 +0800 |
|---|---|---|
| committer | b5f0d6c3 <[email protected]> | 2022-05-06 14:29:39 +0800 |
| commit | b9c36db2e76f56a339b6eee0ade65d64f50e32fe (patch) | |
| tree | cab103decb5824a49612fe8e56ff3fd338ac2499 /mkvlib/utils.go | |
| parent | 889a5ecdf99bdde2f021fda93d137f0fe61e1448 (diff) | |
update mkvlib:add chardets
Diffstat (limited to 'mkvlib/utils.go')
| -rw-r--r-- | mkvlib/utils.go | 19 |
1 files changed, 18 insertions, 1 deletions
diff --git a/mkvlib/utils.go b/mkvlib/utils.go index 85ef657..a72c5bd 100644 --- a/mkvlib/utils.go +++ b/mkvlib/utils.go @@ -4,6 +4,8 @@ import ( "errors" "fmt" "github.com/gogs/chardet" + "golang.org/x/text/encoding/simplifiedchinese" + "golang.org/x/text/encoding/traditionalchinese" "golang.org/x/text/encoding/unicode" "golang.org/x/text/transform" "io" @@ -220,8 +222,23 @@ func randomStr(l int) string { func toUTF8(data []byte) string { d := chardet.NewTextDetector() if r, err := d.DetectBest(data); err == nil { - if r.Charset == "UTF-16LE" { + switch r.Charset { + case "UTF-16LE": data, _, _ = transform.Bytes(unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder(), data) + break + case "UTF-16BE": + data, _, _ = transform.Bytes(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder(), data) + break + case "GBK", "GB2312": + data, _, _ = transform.Bytes(simplifiedchinese.GBK.NewDecoder(), data) + break + case "GB18030": + data, _, _ = transform.Bytes(simplifiedchinese.GB18030.NewDecoder(), data) + break + case "Big5": + data, _, _ = transform.Bytes(traditionalchinese.Big5.NewDecoder(), data) + break + } } return string(data) |
