From c27a3bcc023cf398e9ad52f893138b6df5eb34bb Mon Sep 17 00:00:00 2001 From: Eric Willigers Date: Sat, 15 Jun 2024 20:59:03 +1000 Subject: [PATCH] Add run-length-encoding exercise --- config.json | 8 ++ .../run-length-encoding/.docs/instructions.md | 20 +++++ .../run-length-encoding/.meta/config.json | 19 +++++ .../run-length-encoding/.meta/example.zig | 66 ++++++++++++++++ .../run-length-encoding/.meta/tests.toml | 49 ++++++++++++ .../run_length_encoding.zig | 11 +++ .../test_run_length_encoding.zig | 79 +++++++++++++++++++ 7 files changed, 252 insertions(+) create mode 100644 exercises/practice/run-length-encoding/.docs/instructions.md create mode 100644 exercises/practice/run-length-encoding/.meta/config.json create mode 100644 exercises/practice/run-length-encoding/.meta/example.zig create mode 100644 exercises/practice/run-length-encoding/.meta/tests.toml create mode 100644 exercises/practice/run-length-encoding/run_length_encoding.zig create mode 100644 exercises/practice/run-length-encoding/test_run_length_encoding.zig diff --git a/config.json b/config.json index 403f64a8..08f5d678 100644 --- a/config.json +++ b/config.json @@ -142,6 +142,14 @@ ], "difficulty": 1 }, + { + "slug": "run-length-encoding", + "name": "Run Length Encoding", + "uuid": "71e5d918-6327-442a-98e8-3f9560b1ecd9", + "practices": [], + "prerequisites": [], + "difficulty": 4 + }, { "slug": "isogram", "name": "Isogram", diff --git a/exercises/practice/run-length-encoding/.docs/instructions.md b/exercises/practice/run-length-encoding/.docs/instructions.md new file mode 100644 index 00000000..fc8ce056 --- /dev/null +++ b/exercises/practice/run-length-encoding/.docs/instructions.md @@ -0,0 +1,20 @@ +# Instructions + +Implement run-length encoding and decoding. + +Run-length encoding (RLE) is a simple form of data compression, where runs (consecutive data elements) are replaced by just one data value and count. + +For example we can represent the original 53 characters with only 13. + +```text +"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB" -> "12WB12W3B24WB" +``` + +RLE allows the original data to be perfectly reconstructed from the compressed data, which makes it a lossless data compression. + +```text +"AABCCCDEEEE" -> "2AB3CD4E" -> "AABCCCDEEEE" +``` + +For simplicity, you can assume that the unencoded string will only contain the letters A through Z (either lower or upper case) and whitespace. +This way data to be encoded will never contain any numbers and numbers inside data to be decoded always represent the count for the following character. diff --git a/exercises/practice/run-length-encoding/.meta/config.json b/exercises/practice/run-length-encoding/.meta/config.json new file mode 100644 index 00000000..b0e233c9 --- /dev/null +++ b/exercises/practice/run-length-encoding/.meta/config.json @@ -0,0 +1,19 @@ +{ + "authors": [ + "keiravillekode" + ], + "files": { + "solution": [ + "run_length_encoding.zig" + ], + "test": [ + "test_run_length_encoding.zig" + ], + "example": [ + ".meta/example.zig" + ] + }, + "blurb": "Implement run-length encoding and decoding.", + "source": "Wikipedia", + "source_url": "https://en.wikipedia.org/wiki/Run-length_encoding" +} diff --git a/exercises/practice/run-length-encoding/.meta/example.zig b/exercises/practice/run-length-encoding/.meta/example.zig new file mode 100644 index 00000000..16e2084f --- /dev/null +++ b/exercises/practice/run-length-encoding/.meta/example.zig @@ -0,0 +1,66 @@ +pub fn encode(buffer: []u8, string: []const u8) []u8 { + // We process the input string backwards, + // and reverse our output as our final step. + + var inIndex: usize = string.len; + var outIndex: usize = 0; + var runLength: usize = 0; + while (inIndex > 0) { + inIndex -= 1; + const ch = string[inIndex]; + runLength += 1; + if (inIndex > 0 and ch == string[inIndex - 1]) { + continue; + } + + buffer[outIndex] = ch; + outIndex += 1; + if (runLength == 1) { + runLength = 0; + continue; + } + + while (runLength > 0) { + const units: u8 = @intCast(runLength % 10); + buffer[outIndex] = '0' + units; + outIndex += 1; + runLength /= 10; + } + } + + // Reverse buffer[0..outIndex] + var lowIndex: usize = 0; + var highIndex: usize = outIndex; + while (lowIndex + 1 < highIndex) { + highIndex -= 1; + const a = buffer[lowIndex]; + const b = buffer[highIndex]; + buffer[highIndex] = a; + buffer[lowIndex] = b; + lowIndex += 1; + } + + return buffer[0..outIndex]; +} + +pub fn decode(buffer: []u8, string: []const u8) []u8 { + var inIndex: usize = 0; + var outIndex: usize = 0; + var runLength: usize = 0; + while (inIndex < string.len) { + const ch: u8 = string[inIndex]; + inIndex += 1; + const units: u8 = ch -% '0'; + if (units <= 9) { + runLength = runLength * 10 + units; + continue; + } + + const endIndex = outIndex + @max(runLength, 1); + @memset(buffer[outIndex..endIndex], ch); + outIndex = endIndex; + runLength = 0; + } + + return buffer[0..outIndex]; +} diff --git a/exercises/practice/run-length-encoding/.meta/tests.toml b/exercises/practice/run-length-encoding/.meta/tests.toml new file mode 100644 index 00000000..7bdb8086 --- /dev/null +++ b/exercises/practice/run-length-encoding/.meta/tests.toml @@ -0,0 +1,49 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[ad53b61b-6ffc-422f-81a6-61f7df92a231] +description = "run-length encode a string -> empty string" + +[52012823-b7e6-4277-893c-5b96d42f82de] +description = "run-length encode a string -> single characters only are encoded without count" + +[b7868492-7e3a-415f-8da3-d88f51f80409] +description = "run-length encode a string -> string with no single characters" + +[859b822b-6e9f-44d6-9c46-6091ee6ae358] +description = "run-length encode a string -> single characters mixed with repeated characters" + +[1b34de62-e152-47be-bc88-469746df63b3] +description = "run-length encode a string -> multiple whitespace mixed in string" + +[abf176e2-3fbd-40ad-bb2f-2dd6d4df721a] +description = "run-length encode a string -> lowercase characters" + +[7ec5c390-f03c-4acf-ac29-5f65861cdeb5] +description = "run-length decode a string -> empty string" + +[ad23f455-1ac2-4b0e-87d0-b85b10696098] +description = "run-length decode a string -> single characters only" + +[21e37583-5a20-4a0e-826c-3dee2c375f54] +description = "run-length decode a string -> string with no single characters" + +[1389ad09-c3a8-4813-9324-99363fba429c] +description = "run-length decode a string -> single characters with repeated characters" + +[3f8e3c51-6aca-4670-b86c-a213bf4706b0] +description = "run-length decode a string -> multiple whitespace mixed in string" + +[29f721de-9aad-435f-ba37-7662df4fb551] +description = "run-length decode a string -> lowercase string" + +[2a762efd-8695-4e04-b0d6-9736899fbc16] +description = "encode and then decode -> encode followed by decode gives original string" diff --git a/exercises/practice/run-length-encoding/run_length_encoding.zig b/exercises/practice/run-length-encoding/run_length_encoding.zig new file mode 100644 index 00000000..f18f69e6 --- /dev/null +++ b/exercises/practice/run-length-encoding/run_length_encoding.zig @@ -0,0 +1,11 @@ +pub fn encode(buffer: []u8, string: []const u8) []u8 { + _ = buffer; + _ = string; + @compileError("please implement the encode function"); +} + +pub fn decode(buffer: []u8, string: []const u8) []u8 { + _ = buffer; + _ = string; + @compileError("please implement the decode function"); +} diff --git a/exercises/practice/run-length-encoding/test_run_length_encoding.zig b/exercises/practice/run-length-encoding/test_run_length_encoding.zig new file mode 100644 index 00000000..2c87920f --- /dev/null +++ b/exercises/practice/run-length-encoding/test_run_length_encoding.zig @@ -0,0 +1,79 @@ +const std = @import("std"); +const testing = std.testing; + +const run_length_encoding = @import("run_length_encoding.zig"); + +fn testEncode(string: []const u8, expected: []const u8) !void { + const buffer_size = 80; + var buffer: [buffer_size]u8 = undefined; + const actual = run_length_encoding.encode(&buffer, string); + try testing.expectEqualStrings(expected, actual); +} + +fn testDecode(string: []const u8, expected: []const u8) !void { + const buffer_size = 80; + var buffer: [buffer_size]u8 = undefined; + const actual = run_length_encoding.decode(&buffer, string); + try testing.expectEqualStrings(expected, actual); +} + +fn testConsistency(string: []const u8, expected: []const u8) !void { + const buffer_size = 80; + var buffer1: [buffer_size]u8 = undefined; + var buffer2: [buffer_size]u8 = undefined; + const encoded = run_length_encoding.encode(&buffer1, string); + const actual = run_length_encoding.decode(&buffer2, encoded); + try testing.expectEqualStrings(expected, actual); +} + +test "run-length encode a string-empty string" { + try testEncode("", ""); +} + +test "run-length encode a string-single characters only are encoded without count" { + try testEncode("XYZ", "XYZ"); +} + +test "run-length encode a string-string with no single characters" { + try testEncode("AABBBCCCC", "2A3B4C"); +} + +test "run-length encode a string-single characters mixed with repeated characters" { + try testEncode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB", "12WB12W3B24WB"); +} + +test "run-length encode a string-multiple whitespace mixed in string" { + try testEncode(" hsqq qww ", "2 hs2q q2w2 "); +} + +test "run-length encode a string-lowercase characters" { + try testEncode("aabbbcccc", "2a3b4c"); +} + +test "run-length decode a string-empty string" { + try testDecode("", ""); +} + +test "run-length decode a string-single characters only" { + try testDecode("XYZ", "XYZ"); +} + +test "run-length decode a string-string with no single characters" { + try testDecode("2A3B4C", "AABBBCCCC"); +} + +test "run-length decode a string-single characters with repeated characters" { + try testDecode("12WB12W3B24WB", "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB"); +} + +test "run-length decode a string-multiple whitespace mixed in string" { + try testDecode("2 hs2q q2w2 ", " hsqq qww "); +} + +test "run-length decode a string-lowercase string" { + try testDecode("2a3b4c", "aabbbcccc"); +} + +test "encode and then decode-encode followed by decode gives original string" { + try testConsistency("zzz ZZ zZ", "zzz ZZ zZ"); +}