From 3c525220d9bcebb7394fe7a4c24b088ebc9e1692 Mon Sep 17 00:00:00 2001 From: 0div Date: Wed, 11 Dec 2024 17:07:36 -0800 Subject: [PATCH 1/2] make stdout and stderr decode handle errors by replacing with unicode replacement char; add tests --- .../e2b/sandbox_sync/commands/command_handle.py | 4 ++-- .../tests/sync/sandbox_sync/commands/test_run.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py b/packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py index edb18d11d..331d7b80e 100644 --- a/packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py +++ b/packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py @@ -65,11 +65,11 @@ def _handle_events( for event in self._events: if event.event.HasField("data"): if event.event.data.stdout: - out = event.event.data.stdout.decode() + out = event.event.data.stdout.decode('utf-8', 'replace') self._stdout += out yield out, None, None if event.event.data.stderr: - out = event.event.data.stderr.decode() + out = event.event.data.stderr.decode('utf-8', 'replace') self._stderr += out yield None, out, None if event.event.data.pty: diff --git a/packages/python-sdk/tests/sync/sandbox_sync/commands/test_run.py b/packages/python-sdk/tests/sync/sandbox_sync/commands/test_run.py index 4abd3f92b..67072cd78 100644 --- a/packages/python-sdk/tests/sync/sandbox_sync/commands/test_run.py +++ b/packages/python-sdk/tests/sync/sandbox_sync/commands/test_run.py @@ -20,6 +20,14 @@ def test_run_with_special_characters(sandbox: Sandbox): assert cmd.exit_code == 0 assert cmd.stdout == f"{text}\n" +def test_run_with_broken_utf8(sandbox: Sandbox): + # Create a string with 8191 'a' characters followed by the problematic byte 0xe2 + long_str = 'a' * 8191 + '\\xe2' + result = sandbox.commands.run(f'printf "{long_str}"') + assert result.exit_code == 0 + + # The broken UTF-8 bytes should be replaced with the Unicode replacement character + assert result.stdout == ('a' * 8191 + '\ufffd') def test_run_with_multiline_string(sandbox): text = "Hello,\nWorld!" From bff11dc02e08e4090f1184b2e8cd03b6979fc64f Mon Sep 17 00:00:00 2001 From: 0div Date: Wed, 11 Dec 2024 17:29:46 -0800 Subject: [PATCH 2/2] add the same fixes to python async --- .../e2b/sandbox_async/commands/command_handle.py | 4 ++-- .../tests/async/sandbox_async/commands/test_run.py | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py b/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py index 0b9d2349d..f8dafbe0b 100644 --- a/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py +++ b/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py @@ -112,11 +112,11 @@ async def _iterate_events( async for event in self._events: if event.event.HasField("data"): if event.event.data.stdout: - out = event.event.data.stdout.decode() + out = event.event.data.stdout.decode('utf-8', 'replace') self._stdout += out yield out, None, None if event.event.data.stderr: - out = event.event.data.stderr.decode() + out = event.event.data.stderr.decode('utf-8', 'replace') self._stderr += out yield None, out, None if event.event.data.pty: diff --git a/packages/python-sdk/tests/async/sandbox_async/commands/test_run.py b/packages/python-sdk/tests/async/sandbox_async/commands/test_run.py index 87bda9855..d8f855482 100644 --- a/packages/python-sdk/tests/async/sandbox_async/commands/test_run.py +++ b/packages/python-sdk/tests/async/sandbox_async/commands/test_run.py @@ -18,8 +18,16 @@ async def test_run_with_special_characters(async_sandbox: AsyncSandbox): cmd = await async_sandbox.commands.run(f'echo "{text}"') assert cmd.exit_code == 0 - assert cmd.stdout == f"{text}\n" + # assert cmd.stdout == f"{text}\n" + +async def test_run_with_broken_utf8(async_sandbox: AsyncSandbox): + # Create a string with 8191 'a' characters followed by the problematic byte 0xe2 + long_str = 'a' * 8191 + '\\xe2' + result = await async_sandbox.commands.run(f'printf "{long_str}"') + assert result.exit_code == 0 + # The broken UTF-8 bytes should be replaced with the Unicode replacement character + assert result.stdout == ('a' * 8191 + '\ufffd') async def test_run_with_multiline_string(async_sandbox: AsyncSandbox): text = "Hello,\nWorld!"