Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(windows): handle CRLF specific to windows #10697

Closed
wants to merge 8 commits into from
122 changes: 122 additions & 0 deletions windows/src/engine/keyman32/appcontext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,125 @@ ContextItemToAppContext(km_core_context_item *contextItems, PWSTR outBuf, DWORD
delete[] buf;
return TRUE;
}

LBType normalize_line_breaks(LPCWSTR windows_context, LPWSTR core_context, uint32_t core_context_size_in_chars) {

// Error Checking
if (windows_context == nullptr || core_context == nullptr || core_context_size_in_chars == 0) {
return lbERROR;
}
auto windows_context_length = wcsnlen_s(windows_context, MAXCONTEXT);

if (core_context_size_in_chars <= windows_context_length) {
return lbERROR;
}

// Replace all line breaks with LF and determine line break type
LBType match = lbNONE;
LPCWSTR win_ptr = windows_context;
LPWSTR core_ptr = core_context;
while (*win_ptr != L'\0') {
if (*win_ptr == L'\r' && *(win_ptr + 1) == L'\n') {
win_ptr++; // skip '\r'
*core_ptr++ = *win_ptr++;
match = lbCRLF;
} else if (*win_ptr == L'\r') {
*core_ptr++ = L'\n';
win_ptr++;
match = lbCR;
} else if (*win_ptr == L'\n') {
*core_ptr++ = *win_ptr++;
match = lbLF;
}
else {
*core_ptr++ = *win_ptr++;
}
}
*core_ptr = L'\0';

return match;
}

BOOL
restore_line_breaks(LPWSTR win_out_str, uint32_t win_out_size_in_chars, LBType line_break, LBType default_lb ){
if (win_out_str == nullptr) {
return rsERROR;
}

if (line_break == lbNONE){
line_break = default_lb;
}

// return early if doesn't contain any '\n';
if (wcsstr(win_out_str, L"\n") == nullptr) {
return rsNO_LB;
}

size_t buf_length = wcslen(win_out_str) * 2;
LPWSTR buf_string = new WCHAR[buf_length];
Comment on lines +221 to +222
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't allow space for final terminating nul if every char is doubled


LPCWSTR in_ptr = win_out_str;
LPWSTR buf_ptr = buf_string;

while (*in_ptr != L'\0') {
if (*in_ptr == '\n') {
Comment on lines +227 to +228
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not consistently using L prefix, here and elsewhere in this PR

switch (line_break) {
case lbLF:
*buf_ptr++ = *in_ptr++;
break;
case lbCRLF:
*buf_ptr++ = '\r';
*buf_ptr++ = *in_ptr++;
break;
case lbCR:
*buf_ptr++ = '\r';
*in_ptr++;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
*in_ptr++;
in_ptr++;

break;
}
} else {
*buf_ptr++ = *in_ptr++;
}
}
*buf_ptr = '\0'; // Null terminate the modified string

// may now need to truncate the string preserving the end closest the caret.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of doing this, couldn't we just return the longer buffer?

auto final_length = wcsnlen_s(buf_string, buf_length);
if (final_length < win_out_size_in_chars) {
wcscpy_s(win_out_str, win_out_size_in_chars, buf_string);
} else {
auto diff = final_length + 1 - win_out_size_in_chars; // +1 for null termination
wcscpy_s(win_out_str, win_out_size_in_chars, buf_string + diff);
Comment on lines +253 to +254
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could break a surrogate pair

}
delete[] buf_string;
return rsSUCCESS;
}

BOOL
context_char32_char16(const km_core_usv *core_output, LPWSTR win_out_str, uint32_t win_output_size_in_char) {
if (core_output == nullptr || win_out_str == nullptr || win_output_size_in_char <= 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (core_output == nullptr || win_out_str == nullptr || win_output_size_in_char <= 0) {
if (core_output == nullptr || win_out_str == nullptr || win_output_size_in_char == 0) {

uint32_t cannot be <0

return FALSE;
}
uint8_t idx = 0;
size_t buf_length = (MAXCONTEXT * 2) + 1;
WCHAR *buf = new WCHAR[(MAXCONTEXT * 2) + 1]; // if every character is a surrogate
while (*core_output) {
if (Uni_IsSMP(*core_output)) {
buf[idx++] = static_cast<WCHAR> Uni_UTF32ToSurrogate1(*core_output);
buf[idx++] = static_cast<WCHAR> Uni_UTF32ToSurrogate2(*core_output);
} else {
buf[idx++] = static_cast<WCHAR>(*core_output);
}
core_output++;
}
buf[idx] = 0; // Null terminate character array

auto final_length = wcsnlen_s(buf, buf_length);
if (final_length < win_output_size_in_char) {
wcscpy_s(win_out_str, win_output_size_in_char, buf);
} else {
auto diff = final_length + 1 - win_output_size_in_char; // +1 for null termination
wcscpy_s(win_out_str, win_output_size_in_char, buf + diff);
}
Comment on lines +279 to +285
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same risk of splitting a surrogate pair

delete[] buf;
return TRUE;
}
58 changes: 58 additions & 0 deletions windows/src/engine/keyman32/appcontext.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,64 @@ class AppContext {
*/
BOOL ContextItemToAppContext(km_core_context_item *contextItems, PWSTR outBuf, DWORD len);

/**
* lbNONE: No line break.
* lbLF: Line feed only .
* lbCRLF: Carriage return and line feed.
* lbCR: lbCR Carriage return only.
* lbERROR: lbERROR Error encountered during line break processing.
*/
enum LBType { lbNONE, lbLF, lbCRLF, lbCR, lbERROR };

enum RestoreLBResult { rsSUCCESS, rsNO_LB, rsERROR};

/**
* Normalizes line breaks in a wide-character string to LF (line feed) and
* identifies the original line break type. Testing has shown WORD and
* WordPad have just "\r" in the context. Whereas Firefox has just "\n"
*
* @param windows_context The wide-character string to be normalized.
* @param[in,out] core_context The buffer to store the normalized string.
* @param core_context_size_in_chars The size of the `core_context` buffer in
* wide characters.
* @return The type of line break originally found in
* `windows_context`.
*/
LBType normalize_line_breaks(LPCWSTR windows_context, LPWSTR core_context, uint32_t core_context_size_in_chars);

/**
* Restores line breaks in a wide-character string based on the provided line
* break type. It handles potential truncation of the string to fit within
* `win_out_size_in_chars` by preserving the end closest to the caret.
*
* @param[in,out] win_out_str The wide-character string to have its line
* breaks restored.
* @param win_out_size_in_chars The maximum length of the `win_out_str` buffer
* in wide characters.
* @param line_break The desired line break type to use for
* restoration.
* @param default_lb The default line break type to use if
* `line_break` is lbNONE.
* @return TRUE if successful, FALSE if errors occur
*/
BOOL restore_line_breaks(LPWSTR win_out_str, uint32_t win_out_size_in_chars, LBType line_break, LBType default_lb);

/**
* Converts a UTF-32 string from the Keyman core output to a UTF-16
* wide-character string. The function handles surrogate pairs for characters
* exceeding the Basic Multilingual Plane (BMP) in UTF-32. If the converted
* string does not fit within the provided `win_out_str` buffer size, the string
* is truncated preserving the end closest to the caret.
*
* @param core_output Pointer to the `km_core_usv` structure containing
* the UTF-32 string.
* @param[in,out] win_out_str The wide-character string buffer to store
* the converted UTF-16 string.
* @param win_output_size_in_char The size of the `win_out_str` buffer in wide
* characters.
* @return TRUE if successful, FALSE is not supported
*/
BOOL context_char32_char16(const km_core_usv *core_output, LPWSTR win_out_str, uint32_t win_output_size_in_char);


#endif
3 changes: 3 additions & 0 deletions windows/src/engine/keyman32/globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@

#include "serialkeyeventclient.h"
#include "SharedBuffers.h"
#include "appcontext.h"

#include "..\..\..\include\kmtip_guids.h"

Expand Down Expand Up @@ -213,6 +214,8 @@ typedef struct tagKEYMAN64THREADDATA

BOOL CoreProcessEventRun; // True if core process event has been run

LBType line_break;

BOOL FInRefreshKeyboards;
BOOL RefreshRequired;
LONG RefreshTag_Thread; // TODO: we may be able to eliminate this with our delayed refresh pattern?
Expand Down
15 changes: 14 additions & 1 deletion windows/src/engine/keyman32/kmprocess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,24 @@ BOOL fOutputKeystroke;
static BOOL
Process_Event_Core(PKEYMAN64THREADDATA _td) {
WCHAR application_context[MAXCONTEXT];
WCHAR core_context[MAXCONTEXT];
LPWSTR context_ptr = application_context;
if (_td->app->ReadContext(application_context)) {
_td->line_break = normalize_line_breaks(application_context, core_context, MAXCONTEXT);
if (_td->line_break == lbERROR) {
SendDebugMessageFormat(0, sdmGlobal, 0, "Process_Event_Core: normalize_line_break failed.");
} else if (_td->line_break != lbNONE) { // line break found
context_ptr = core_context;
} else {
context_ptr = application_context;
}
km_core_context_status result;
result = km_core_state_context_set_if_needed(_td->lpActiveKeyboard->lpCoreKeyboardState, reinterpret_cast<const km_core_cp *>(application_context));
result = km_core_state_context_set_if_needed(
_td->lpActiveKeyboard->lpCoreKeyboardState, reinterpret_cast<const km_core_cp*>(context_ptr));
if (result == KM_CORE_CONTEXT_STATUS_ERROR || result == KM_CORE_CONTEXT_STATUS_INVALID_ARGUMENT) {
SendDebugMessageFormat(0, sdmGlobal, 0, "Process_Event_Core: km_core_state_context_set_if_needed returned [%d]", result);
}
SendDebugMessageFormatW(0, sdmGlobal, 0, L"Process_Event_Core: set_if_needed result: %d", result);
}

SendDebugMessageFormat(
Expand All @@ -98,6 +110,7 @@ Process_Event_Core(PKEYMAN64THREADDATA _td) {
SendDebugMessageFormat(0, sdmGlobal, 0, "ProcessEvent CoreProcessEvent Result:False %d ", FALSE);
return FALSE;
}

return TRUE;
}

Expand Down
28 changes: 16 additions & 12 deletions windows/src/engine/keyman32/kmprocessactions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,18 @@
#include "pch.h"

/**
* Adds characters to the queue to be inserted into the application
* Places the win_output characters in the AITIP queue
* in the queue.
* @param app A pointer to the AITIP instance.
* @param outputString A null-terminated string of characters to insert into the application
* @param core_output A null-terminated string of characters in UTF-32 format to insert into the application
*/


static void
processUnicodeChar(AITIP* app, const km_core_usv* outputString) {
while (*outputString) {
if (Uni_IsSMP(*outputString)) {
app->QueueAction(QIT_CHAR, (Uni_UTF32ToSurrogate1(*outputString)));
app->QueueAction(QIT_CHAR, (Uni_UTF32ToSurrogate2(*outputString)));
} else {
app->QueueAction(QIT_CHAR, *outputString);
}
outputString++;
process_output_string(AITIP* app, LPWSTR win_output) {
while (*win_output) {
app->QueueAction(QIT_CHAR, *win_output);
win_output++;
}
}

Expand Down Expand Up @@ -110,7 +107,14 @@ BOOL ProcessActions(BOOL* emitKeystroke)
_td->CoreProcessEventRun = FALSE;

processBack(_td->app, core_actions->code_points_to_delete, core_actions->deleted_context);
processUnicodeChar(_td->app, core_actions->output);
WCHAR win_out_str[MAXCONTEXT];
context_char32_char16(core_actions->output, win_out_str, MAXCONTEXT);
if (restore_line_breaks(win_out_str, MAXCONTEXT, _td->line_break, lbCRLF) == rsERROR) {
SendDebugMessageFormat(0, sdmGlobal, 0, "ProcessActions restore_line_breaks failed");
}

process_output_string(_td->app, win_out_str);

if (core_actions->persist_options != NULL) {
processPersistOpt(core_actions, _td->lpActiveKeyboard);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,58 @@ TEST(AppContext, AppContext_Delete) {
testContext.Delete();

}

// Test normalize and restore line break functions
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need some tests for surrogate pair edge cases, especially boundary conditions


TEST(AppContext, normalize_restore_CRLF) {
wchar_t windows_context[] = L"Hello\r\nWorld\r\nYou Rock";
wchar_t core_context[MAXCONTEXT];
wchar_t expected_string[] = L"Hello\nWorld\nYou Rock";
LBType lb_type;
lb_type = normalize_line_breaks(windows_context, core_context, MAXCONTEXT);
EXPECT_TRUE(lb_type == lbCRLF);
EXPECT_STREQ(expected_string, core_context);
EXPECT_TRUE(restore_line_breaks(core_context, MAXCONTEXT, lb_type, lbCRLF) == rsSUCCESS);
// should be the same as the original windows_context.
EXPECT_STREQ(windows_context, core_context);
}

// No CRLF `\r\n` found
TEST(AppContext, normalize_restore_NONE) {
wchar_t windows_context[] = L"Hello World You Rock";
wchar_t core_context[MAXCONTEXT];
wchar_t expected_string[] = L"Hello World You Rock";
LBType lb_type;
lb_type = normalize_line_breaks(windows_context, core_context, MAXCONTEXT);
EXPECT_TRUE(lb_type == lbNONE);
EXPECT_STREQ(expected_string, core_context);
EXPECT_TRUE(restore_line_breaks(core_context, MAXCONTEXT, lb_type, lbCRLF) == rsNO_LB);
// should be the same as the original windows_context.
EXPECT_STREQ(windows_context, core_context);
}

TEST(AppContext, normalize_restore_CR) {
wchar_t windows_context[] = L"Hello\rWorld\rYou Rock";
wchar_t core_context[MAXCONTEXT];
wchar_t expected_string[] = L"Hello\nWorld\nYou Rock";
LBType lb_type;
lb_type = normalize_line_breaks(windows_context, core_context, MAXCONTEXT);
EXPECT_TRUE(lb_type == lbCR);
EXPECT_STREQ(expected_string, core_context);
EXPECT_TRUE(restore_line_breaks(core_context, MAXCONTEXT, lb_type, lbCRLF) == rsSUCCESS);
// should be the same as the original windows_context.
EXPECT_STREQ(windows_context, core_context);
}

TEST(AppContext, normalize_restore_LF) {
wchar_t windows_context[] = L"Hello\nWorld\nYou Rock";
wchar_t core_context[MAXCONTEXT];
wchar_t expected_string[] = L"Hello\nWorld\nYou Rock";
LBType lb_type;
lb_type = normalize_line_breaks(windows_context, core_context, MAXCONTEXT);
EXPECT_TRUE(lb_type == lbLF);
EXPECT_STREQ(expected_string, core_context);
EXPECT_TRUE(restore_line_breaks(core_context, MAXCONTEXT, lb_type, lbCRLF) == rsSUCCESS);
// should be the same as the original windows_context.
EXPECT_STREQ(windows_context, core_context);
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,46 @@ class KMPROCESSACTIONS : public ::testing::Test {
// Now that the AITIP no longer contains a context
// this test is just reduced to checking the size of the
// action queue
TEST_F(KMPROCESSACTIONS, processUnicodeChartest) {
TEST_F(KMPROCESSACTIONS, processOutputStringtest) {
// just check something has been added to the que
WCHAR callbuf[MAXCONTEXT];
AITIP testApp;
int expectedQueueSize = 3; // 1 + surrogate pair
km_core_usv output_string[] = {0x0041, 0x010000, 0};
processUnicodeChar(&testApp, &output_string[0]);
context_char32_char16(output_string, callbuf, MAXCONTEXT);
process_output_string(&testApp, callbuf);
int queueSize = testApp.GetQueueSize();
EXPECT_EQ(queueSize, expectedQueueSize);
}


// Test char32_char16_test
TEST_F(KMPROCESSACTIONS, context_char32_char16_test) {
WCHAR wchar16_string[MAXCONTEXT];
AITIP testApp;
// L"Hello\nWorld\nYou 𝄞Rock";
km_core_usv core_string[] = {0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x000A, 0x0057, 0x006F, 0x0072, 0x006C, 0x0064,
0x000A, 0x0059, 0x006F, 0x0075, 0x0020, 0x1D11E, 0x0052, 0x006F, 0x0063, 0x006B, 0x0000};

wchar_t expected_string[] = {0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x000A, 0x0057, 0x006F, 0x0072, 0x006C, 0x0064, 0x000A,
0x0059, 0x006F, 0x0075, 0x0020, 0xD834, 0xDD1E, 0x0052, 0x006F, 0x0063, 0x006B, 0x0000};

context_char32_char16(&core_string[0], wchar16_string, MAXCONTEXT);
EXPECT_STREQ(expected_string, wchar16_string);
}

// Test char32_char16_test
TEST_F(KMPROCESSACTIONS, context_char32_char16_small_context) {
const int small_context = 15;
WCHAR wchar16_string[small_context];
AITIP testApp;
// L"Hello\nWorld\nYou 𝄞Rock"
km_core_usv core_string[] = {0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x000A, 0x0057, 0x006F, 0x0072, 0x006C, 0x0064,
0x000A, 0x0059, 0x006F, 0x0075, 0x0020, 0x1D11E, 0x0052, 0x006F, 0x0063, 0x006B, 0x0000};
// L"rld\nYou 𝄞Rock"
wchar_t expected_string[] = {0x0072, 0x006C, 0x0064, 0x000A, 0x0059, 0x006F, 0x0075, 0x0020,
0xD834, 0xDD1E, 0x0052, 0x006F, 0x0063, 0x006B, 0x0000};

context_char32_char16(&core_string[0], wchar16_string, small_context);
EXPECT_STREQ(expected_string, wchar16_string);
}
Loading