From 53c605f178d69a46ebaaffef58d9527c847fc31f Mon Sep 17 00:00:00 2001 From: aiclaudev <88221233+aiclaudev@users.noreply.github.com> Date: Tue, 5 Oct 2021 15:42:39 +0900 Subject: [PATCH] Create UTF8Charset to support Korean language Co-Authored-By: SangYeon Min Co-Authored-By: JK Park --- euphony/src/main/cpp/CMakeLists.txt | 1 + euphony/src/main/cpp/core/UTF8Charset.h | 17 +++++ .../src/main/cpp/core/source/UTF8Charset.cpp | 26 ++++++++ euphony/src/main/cpp/tests/CMakeLists.txt | 1 + .../src/main/cpp/tests/utf8CharsetTest.cpp | 63 +++++++++++++++++++ 5 files changed, 108 insertions(+) create mode 100644 euphony/src/main/cpp/core/UTF8Charset.h create mode 100644 euphony/src/main/cpp/core/source/UTF8Charset.cpp create mode 100644 euphony/src/main/cpp/tests/utf8CharsetTest.cpp diff --git a/euphony/src/main/cpp/CMakeLists.txt b/euphony/src/main/cpp/CMakeLists.txt index 5135069..fbb7be7 100644 --- a/euphony/src/main/cpp/CMakeLists.txt +++ b/euphony/src/main/cpp/CMakeLists.txt @@ -26,6 +26,7 @@ set(EUPHONY_SRC arms/kiss_fftr.c core/source/AudioStreamCallback.cpp core/source/ASCIICharset.cpp + core/source/UTF8Charset.cpp core/source/Base2.cpp core/source/Base16.cpp core/source/Base16Exception.cpp diff --git a/euphony/src/main/cpp/core/UTF8Charset.h b/euphony/src/main/cpp/core/UTF8Charset.h new file mode 100644 index 0000000..de55be4 --- /dev/null +++ b/euphony/src/main/cpp/core/UTF8Charset.h @@ -0,0 +1,17 @@ +#ifndef EUPHONY_UTF8CHARSET_H +#define EUPHONY_UTF8CHARSET_H + +#include "Charset.h" + +namespace Euphony { + + class UTF8Charset : public Charset { + public: + UTF8Charset() = default; + ~UTF8Charset() = default; + HexVector encode(std::string src); + std::string decode(const HexVector &src); + }; +} + +#endif //EUPHONY_UTF8CHARSET_H \ No newline at end of file diff --git a/euphony/src/main/cpp/core/source/UTF8Charset.cpp b/euphony/src/main/cpp/core/source/UTF8Charset.cpp new file mode 100644 index 0000000..858c67c --- /dev/null +++ b/euphony/src/main/cpp/core/source/UTF8Charset.cpp @@ -0,0 +1,26 @@ +#include "UTF8Charset.h" + +#include +#include + +using namespace Euphony; + +HexVector UTF8Charset::encode(std::string src) { + HexVector result = HexVector(src.size()); + + for (int i = 0; i < src.size(); i++) + result.pushBack(src[i]); + + return result; +} + +std::string UTF8Charset::decode(const HexVector& src) { + std::string result; + std::vector hexSource = src.getHexSource(); + + for (int i = 0; i < hexSource.size() / 2; i++) + result += (hexSource[2 * i] << 4) | hexSource[2 * i + 1]; + + return result; +} + diff --git a/euphony/src/main/cpp/tests/CMakeLists.txt b/euphony/src/main/cpp/tests/CMakeLists.txt index 7a780b4..4857af7 100644 --- a/euphony/src/main/cpp/tests/CMakeLists.txt +++ b/euphony/src/main/cpp/tests/CMakeLists.txt @@ -11,6 +11,7 @@ target_include_directories (gtest PUBLIC ${GOOGLETEST_ROOT}/include) add_executable( ${TEST_EUPHONY} asciiCharsetTest.cpp + utf8CharsetTest.cpp base2Test.cpp base16Test.cpp defaultCharsetTest.cpp diff --git a/euphony/src/main/cpp/tests/utf8CharsetTest.cpp b/euphony/src/main/cpp/tests/utf8CharsetTest.cpp new file mode 100644 index 0000000..8dd85c5 --- /dev/null +++ b/euphony/src/main/cpp/tests/utf8CharsetTest.cpp @@ -0,0 +1,63 @@ +#include +#include +#include + +#include + +using namespace Euphony; + +typedef std::tuple TestParamType; + +class UTF8CharsetTestFixture : public ::testing::TestWithParam { + +public: + void openCharset() { + EXPECT_EQ(charset, nullptr); + charset = new UTF8Charset(); + ASSERT_NE(charset, nullptr); + } + + Charset *charset = nullptr; +}; + +TEST_P(UTF8CharsetTestFixture, EncodingTest) { + openCharset(); + + std::string source; + std::string expectedResult; + + std::tie(source, expectedResult) = GetParam(); + + HexVector actualResult = charset->encode(source); + EXPECT_EQ(actualResult.toString(), expectedResult); +} + +TEST_P(UTF8CharsetTestFixture, DecodingTest) { + openCharset(); + + std::string source; + std::string expectedResult; + + std::tie(expectedResult, source) = GetParam(); + HexVector hv = HexVector(source); + + std::string actualResult = charset->decode(hv); + EXPECT_EQ(actualResult, expectedResult); +} + +INSTANTIATE_TEST_CASE_P( + ChrasetDecodingTestSuite, + UTF8CharsetTestFixture, + ::testing::Values( + TestParamType("a", "61"), + TestParamType("b", "62"), + TestParamType("가", "eab080"), + TestParamType("각", "eab081"), + TestParamType("나", "eb8298"), + TestParamType("홍길동", "ed998deab8b8eb8f99"), + TestParamType("@XYZ", "4058595a"), + TestParamType(".com", "2e636f6d"), + TestParamType("서울특별시", "ec849cec9ab8ed8ab9ebb384ec8b9c"), + TestParamType("010-1234-5678", "3031302d313233342d35363738"), + TestParamType("36.5℃", "33362e35e28483") + )); \ No newline at end of file