Skip to content

Commit

Permalink
impl truncation
Browse files Browse the repository at this point in the history
Committed-by: xiaolei.zl from Dev container
  • Loading branch information
zhanglei1949 committed Dec 16, 2024
1 parent 94048ef commit 8a75b74
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
12 changes: 12 additions & 0 deletions flex/tests/hqps/trucate_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "flex/utils/property/column.h"

#include <string>
#include <string_view>

int main(int argc, char** argv) {
std::string str = "abcdefO(1/ε^2)";
std::cout << "str: " << str << ", size: " << str.size() << std::endl;
std::string_view sv = gs::truncate_utf8(str, 12);
std::cout << sv << ", size:" << sv.size() << std::endl;
return 0;
}
29 changes: 29 additions & 0 deletions flex/utils/property/column.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,35 @@

namespace gs {

std::string_view truncate_utf8(std::string_view str, size_t length) {
if (str.size() <= length) {
return str;
}
size_t byte_count = 0;

for (const char* p = str.data(); *p && byte_count < length;) {
unsigned char ch = *p;
size_t char_length = 0;
if ((ch & 0x80) == 0) {
char_length = 1;
} else if ((ch & 0xE0) == 0xC0) {
char_length = 2;
} else if ((ch & 0xF0) == 0xE0) {
char_length = 3;
} else if ((ch & 0xF8) == 0xF0) {
char_length = 4;
}
LOG(INFO) << "current char length: " << char_length
<< ", byte_count: " << byte_count;
if (byte_count + char_length > length) {
break;
}
p += char_length;
byte_count += char_length;
}
return str.substr(0, byte_count);
}

template <typename T>
class TypedEmptyColumn : public ColumnBase {
public:
Expand Down
4 changes: 3 additions & 1 deletion flex/utils/property/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

namespace gs {

std::string_view truncate_utf8(std::string_view str, size_t length);

class ColumnBase {
public:
virtual ~ColumnBase() {}
Expand Down Expand Up @@ -503,7 +505,7 @@ class TypedColumn<std::string_view> : public ColumnBase {
if (copied_val.size() >= width_) {
VLOG(1) << "String length" << copied_val.size()
<< " exceeds the maximum length: " << width_ << ", cut off.";
copied_val = copied_val.substr(0, width_);
copied_val = truncate_utf8(copied_val, width_);
}
if (idx >= basic_size_ && idx < basic_size_ + extra_size_) {
size_t offset = pos_.fetch_add(copied_val.size());
Expand Down

0 comments on commit 8a75b74

Please sign in to comment.