Skip to content

Commit

Permalink
Add utf8 string view range/iter (#1047)
Browse files Browse the repository at this point in the history
  • Loading branch information
wengxt authored May 10, 2024
1 parent 882f98c commit 2afbe74
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 14 deletions.
22 changes: 8 additions & 14 deletions src/lib/fcitx-utils/inputbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,33 +71,27 @@ const std::string &InputBuffer::userInput() const {

bool InputBuffer::typeImpl(const char *s, size_t length) {
FCITX_D();
auto utf8Length = fcitx::utf8::lengthValidated(s, s + length);
std::string_view view(s, length);
auto utf8Length = fcitx::utf8::lengthValidated(view);
if (utf8Length == fcitx::utf8::INVALID_LENGTH) {
throw std::invalid_argument("Invalid UTF-8 string");
}
if (d->isAsciiOnly() && utf8Length != length) {
if (d->isAsciiOnly() && utf8Length != view.size()) {
throw std::invalid_argument(
"ascii only buffer only accept ascii only string");
}
if (d->maxSize_ && (utf8Length + size() > d->maxSize_)) {
return false;
}
d->input_.insert(std::next(d->input_.begin(), cursorByChar()), s,
s + length);
d->input_.insert(std::next(d->input_.begin(), cursorByChar()), view.begin(),
view.end());
if (!d->isAsciiOnly()) {
const auto *iter = s;
auto func = [&iter]() {
const auto *next = fcitx::utf8::nextChar(iter);
auto diff = std::distance(iter, next);
iter = next;
return diff;
};

auto pos = d->cursor_;
while (iter < s + length) {
d->sz_.insert(std::next(d->sz_.begin(), pos), func());
for (auto chrView : utf8::MakeUTF8StringViewRange(view)) {
d->sz_.insert(std::next(d->sz_.begin(), pos), chrView.size());
pos++;
}

d->acc_.resize(d->sz_.size() + 1);
auto newDirty = d->cursor_ > 0 ? d->cursor_ - 1 : 0;
if (d->accDirty_ > newDirty) {
Expand Down
71 changes: 71 additions & 0 deletions src/lib/fcitx-utils/utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
/// \file
/// \brief C++ Utility functions for handling utf8 strings.

#include <iterator>
#include <stdexcept>
#include <string>
#include <string_view>
#include <fcitx-utils/cutf8.h>
#include <fcitx-utils/misc.h>
#include "fcitxutils_export.h"
Expand Down Expand Up @@ -222,6 +224,75 @@ auto MakeUTF8CharRange(const T &str) {
MakeUTF8CharIterator(std::end(str), std::end(str)));
}

template <typename Iter>
class UTF8StringViewIter {
public:
using iterator_category = std::input_iterator_tag;
using value_type = std::string_view;
using difference_type = std::ptrdiff_t;
using reference = const value_type &;
using pointer = const value_type *;

UTF8StringViewIter(Iter iter, Iter end) : iter_(iter), end_(end) {
update();
}
FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_COPY(UTF8StringViewIter)

reference operator*() const { return currentView_; }

pointer operator->() const { return &currentView_; }

size_t charLength() const { return currentView_.size(); }

uint32_t chr() const { return currentChar_; }

UTF8StringViewIter &operator++() {
iter_ = next_;
update();
return *this;
}

UTF8StringViewIter operator++(int) {
auto old = *this;
++(*this);
return old;
}

bool operator==(const UTF8StringViewIter &other) {
return iter_ == other.iter_;
}
bool operator!=(const UTF8StringViewIter &other) {
return !operator==(other);
}

private:
void update() {
next_ = getNextChar(iter_, end_, &currentChar_);
if (iter_ != end_ && iter_ == next_) {
throw std::runtime_error("Invalid UTF8 character.");
}
currentView_ = std::string_view(&*iter_, std::distance(iter_, next_));
}

std::string_view currentView_;
uint32_t currentChar_ = 0;
Iter iter_;
Iter next_;
Iter end_;
};

template <typename Iter>
auto MakeUTF8StringViewIterator(Iter iter, Iter end) {
return UTF8StringViewIter<Iter>(iter, end);
}

template <typename T>
auto MakeUTF8StringViewRange(const T &str) {
return MakeIterRange(
MakeUTF8StringViewIterator(std::begin(str), std::end(str)),
MakeUTF8StringViewIterator(std::end(str), std::end(str)));
}

} // namespace fcitx::utf8

#endif // _FCITX_UTILS_UTF8_H_
8 changes: 8 additions & 0 deletions test/testutf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ int main() {
FCITX_ASSERT(iter.view() == expectCharStr[i]);
}

auto rangeView = fcitx::utf8::MakeUTF8StringViewRange(str);
i = 0;
for (auto iter = std::begin(rangeView), end = std::end(rangeView);
iter != end; ++iter, ++i) {
FCITX_ASSERT(iter->size() == expectLength[i]);
FCITX_ASSERT(*iter == expectCharStr[i]);
}

FCITX_ASSERT(fcitx::utf8::getLastChar(str) == 0xa);

std::string invalidStr = "\xe4\xff";
Expand Down

0 comments on commit 2afbe74

Please sign in to comment.