From 2f155ffdf21c1fe85bfffe8165bef7acb778aaed Mon Sep 17 00:00:00 2001 From: William Sanville Date: Wed, 13 Mar 2024 15:50:35 -0700 Subject: [PATCH] IRAssembler support for fill-array-data Summary: I'd like to start improving some support for array handling like https://fburl.com/code/eabffe38, but first make it possible to write s-expr test cases on the various ways to define an array. Using an integ test is not very nice, as it does not give control on how the array gets populated. It would be nice to define char[] with actual chars like a b c but I'm not going to bother doing escapes and whatnot. Just simple expression list format with hex values should be good enough. Reviewed By: agampe Differential Revision: D53624243 fbshipit-source-id: 9d0881789019da06a86fa0105c42003acbc21472 --- libredex/DexInstruction.h | 37 ++++++++++++ libredex/IRAssembler.cpp | 62 ++++++++++++++++++-- libredex/IRAssembler.h | 4 -- libredex/Show.cpp | 34 +++++++---- libredex/Show.h | 6 ++ test/unit/IRAssemblerTest.cpp | 107 ++++++++++++++++++++++++++++++++++ 6 files changed, 232 insertions(+), 18 deletions(-) diff --git a/libredex/DexInstruction.h b/libredex/DexInstruction.h index 5249ffec02e..3186e1be93d 100644 --- a/libredex/DexInstruction.h +++ b/libredex/DexInstruction.h @@ -7,11 +7,13 @@ #pragma once +#include #include #include #include #include +#include "CppUtil.h" #include "Debug.h" #include "DexDefs.h" #include "DexOpcode.h" @@ -363,6 +365,22 @@ class DexOpcodeProto : public DexInstruction { void set_proto(DexProto* proto) { m_proto = proto; } }; +inline uint16_t fill_array_data_payload_width(const DexOpcodeData* op_data) { + always_assert_log(op_data->opcode() == FOPCODE_FILLED_ARRAY, + "DexOpcodeData is not an array payload"); + always_assert(op_data->data_size() >= 3); + return *op_data->data(); +} + +inline uint32_t fill_array_data_payload_element_count( + const DexOpcodeData* op_data) { + always_assert_log(op_data->opcode() == FOPCODE_FILLED_ARRAY, + "DexOpcodeData is not an array payload"); + always_assert(op_data->data_size() >= 3); + auto size_ptr = (uint32_t*)(op_data->data() + 1); + return *size_ptr; +} + // helper function to create fill-array-data-payload according to // https://source.android.com/devices/tech/dalvik/dalvik-bytecode#fill-array template @@ -385,6 +403,25 @@ std::unique_ptr encode_fill_array_data_payload( return std::make_unique(data); } +// Like above, but parse from a vector of hex string elements +template +std::unique_ptr encode_fill_array_data_payload_from_string( + const std::vector& elements) { + static_assert(std::is_integral::value, + "fill-array-data-payload can only contain integral values."); + std::vector vec; + for (const auto& item : elements) { + IntType val; + auto trimmed = trim_whitespaces(item); + auto result = std::from_chars(trimmed.data(), + trimmed.data() + trimmed.size(), val, 16); + always_assert_log(result.ec != std::errc::invalid_argument, + "Invalid payload: \"%s\"", item.c_str()); + vec.emplace_back(val); + } + return encode_fill_array_data_payload(vec); +} + template std::vector get_fill_array_data_payload(const DexOpcodeData* op_data) { static_assert(std::is_integral::value, diff --git a/libredex/IRAssembler.cpp b/libredex/IRAssembler.cpp index 9bae45a2850..3950a555c1e 100644 --- a/libredex/IRAssembler.cpp +++ b/libredex/IRAssembler.cpp @@ -16,6 +16,7 @@ #include "Creators.h" #include "DexClass.h" +#include "DexInstruction.h" #include "DexPosition.h" #include "IRCode.h" #include "Show.h" @@ -75,9 +76,24 @@ s_expr to_s_expr(const IRInstruction* insn, const LabelRefs& label_refs) { switch (opcode::ref(op)) { case opcode::Ref::None: break; - case opcode::Ref::Data: - not_reached_log("Not yet supported"); + case opcode::Ref::Data: { + auto op_data = insn->get_data(); + if (op_data->opcode() == FOPCODE_FILLED_ARRAY) { + auto ewidth = fill_array_data_payload_width(op_data); + s_exprs.emplace_back(ewidth); + auto element_count = fill_array_data_payload_element_count(op_data); + std::vector element_exprs; + element_exprs.reserve(element_count); + for (const auto& s : + pretty_array_data_payload(ewidth, element_count, op_data->data())) { + element_exprs.emplace_back(s); + } + s_exprs.emplace_back(element_exprs); + } else { + not_reached_log("Not yet supported"); + } break; + } case opcode::Ref::Field: s_exprs.emplace_back(show(insn->get_field())); break; @@ -138,6 +154,22 @@ s_expr _to_s_expr(const DexPosition* pos, uint32_t idx, uint32_t parent_idx) { s_expr(parent_idx_str), }); } + +std::unique_ptr create_fill_array_data_payload_from_str( + const uint16_t ewidth, const std::vector& elements) { + switch (ewidth) { + case 1: + return encode_fill_array_data_payload_from_string(elements); + case 2: + return encode_fill_array_data_payload_from_string(elements); + case 4: + return encode_fill_array_data_payload_from_string(elements); + default: { + always_assert_log(ewidth == 8, "Invalid width: %d", ewidth); + return encode_fill_array_data_payload_from_string(elements); + } + } +} } // namespace std::vector to_s_exprs( @@ -206,9 +238,31 @@ std::unique_ptr instruction_from_s_expr( switch (opcode::ref(op)) { case opcode::Ref::None: break; - case opcode::Ref::Data: - not_reached_log("Not yet supported"); + case opcode::Ref::Data: { + if (insn->opcode() == OPCODE_FILL_ARRAY_DATA) { + int32_t ewidth; + s_patn({s_patn(&ewidth)}, tail) + .must_match(tail, "Expecting int for element width" + opcode_str); + always_assert_log(ewidth == 1 || ewidth == 2 || ewidth == 4 || + ewidth == 8, + "Invalid width %d", ewidth); + + std::vector hex_elements; + std::string element_str; + s_expr list; + s_patn({s_patn(list)}, tail) + .must_match(tail, "Expecting list of hex strings for " + opcode_str); + while (s_patn({s_patn(&element_str)}, list).match_with(list)) { + hex_elements.push_back(element_str); + } + auto data = create_fill_array_data_payload_from_str((uint16_t)ewidth, + hex_elements); + insn->set_data(std::move(data)); + } else { + not_reached_log("Not yet supported"); + } break; + } case opcode::Ref::Field: { std::string str; s_patn({s_patn(&str)}, tail) diff --git a/libredex/IRAssembler.h b/libredex/IRAssembler.h index 35e41fc53a4..25e19cfbd5e 100644 --- a/libredex/IRAssembler.h +++ b/libredex/IRAssembler.h @@ -41,10 +41,6 @@ class IRCode; * automatically created by the assembler. I.e. you do *not* need to call * make_{field,method}() beforehand to ensure that they exist. * - * Not-yet-implemented features: - * - try-catch - * - fill-array-data opcodes - * * NOTE: * When assembling an IRCode instance, the assembler will attempt to set the * registers_size for you by making it 1 larger than the largest register diff --git a/libredex/Show.cpp b/libredex/Show.cpp index 2d83ec1058c..122d9bcf0f1 100644 --- a/libredex/Show.cpp +++ b/libredex/Show.cpp @@ -1293,17 +1293,16 @@ std::string show(const DexOpcodeData* insn) { // See format at // https://source.android.com/devices/tech/dalvik/dalvik-bytecode#fill-array const uint16_t ewidth = *data++; - const uint32_t size = *((uint32_t*)data); - ss << "[" << size << " x " << ewidth << "] "; - // escape size - data += 2; - const uint8_t* data_ptr = (uint8_t*)data; - ss << "{ "; - for (size_t i = 0; i < size; i++) { - if (i != 0) { - ss << ", "; + const uint32_t element_count = *((uint32_t*)data); + ss << "[" << element_count << " x " << ewidth << "] {"; + auto vec = pretty_array_data_payload(ewidth, element_count, insn->data()); + bool first{true}; + for (const auto& s : vec) { + if (!first) { + ss << ","; } - ss << std::hex << read(data_ptr, ewidth); + ss << " " << s; + first = false; } ss << " }"; break; @@ -1711,3 +1710,18 @@ std::string pretty_bytes(uint64_t val) { << " " << modifier << "B"; return oss.str(); } + +std::vector pretty_array_data_payload(const uint16_t ewidth, + const uint32_t element_count, + const uint16_t* data) { + std::vector result; + result.reserve(element_count); + const uint8_t* data_ptr = (uint8_t*)(data + 3); + for (size_t i = 0; i < element_count; i++) { + auto xx = read(data_ptr, ewidth); + std::ostringstream oss; + oss << std::hex << xx; + result.emplace_back(oss.str()); + } + return result; +} diff --git a/libredex/Show.h b/libredex/Show.h index cc525f7bf1a..4146a3201da 100644 --- a/libredex/Show.h +++ b/libredex/Show.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * Stringification functions for core types. Definitions are in DexClass.cpp @@ -155,3 +156,8 @@ std::string vshow(const DexType*); // Format a number as a byte entity. std::string pretty_bytes(uint64_t val); + +// Format the items of given width as human readable hex strings. +std::vector pretty_array_data_payload(const uint16_t ewidth, + const uint32_t element_count, + const uint16_t* data); diff --git a/test/unit/IRAssemblerTest.cpp b/test/unit/IRAssemblerTest.cpp index 2bea3a62f64..bfacbaeeb4b 100644 --- a/test/unit/IRAssemblerTest.cpp +++ b/test/unit/IRAssemblerTest.cpp @@ -9,6 +9,7 @@ #include +#include "DexInstruction.h" #include "DexPosition.h" #include "RedexTest.h" #include "Show.h" @@ -727,3 +728,109 @@ TEST_F(IRAssemblerTest, assembleClassFromString) { EXPECT_EQ(v_method->get_class(), cls->get_type()); EXPECT_EQ(v_method->get_name()->str(), "bazPublic"); } + +std::vector get_fill_array_data_insns( + const std::unique_ptr& code) { + std::vector result; + for (const auto& mie : *code) { + if (mie.type == MFLOW_OPCODE && + mie.insn->opcode() == OPCODE_FILL_ARRAY_DATA) { + result.push_back(mie.insn); + } + } + return result; +} + +TEST_F(IRAssemblerTest, fillArrayPayloads) { + auto code = assembler::ircode_from_string(R"( + ( + (const v0 3) + + (new-array v0 "[Z") ; create an array of length 3 + (move-result-pseudo-object v1) + (fill-array-data v1 #1 (0 0 1)) + + (new-array v0 "[C") ; create an array of length 3 + (move-result-pseudo-object v2) + (fill-array-data v2 #2 (61 62 63)) + + (new-array v0 "[I") ; create an array of length 3 + (move-result-pseudo-object v3) + (fill-array-data v3 #4 (3e7 2 40000000)) + + (new-array v0 "[J") ; create an array of length 3 + (move-result-pseudo-object v4) + (fill-array-data v4 #8 (3b9aca00 b2d05e00 b2d05e01)) + + (return-void) + ) +)"); + auto insns = get_fill_array_data_insns(code); + EXPECT_EQ(insns.size(), 4); + + { + auto data = insns.at(0)->get_data(); + auto values = get_fill_array_data_payload(data); + EXPECT_EQ(values.size(), 3); + EXPECT_EQ(values.at(0), 0x0); + EXPECT_EQ(values.at(1), 0x0); + EXPECT_EQ(values.at(2), 0x1); + } + { + auto data = insns.at(1)->get_data(); + auto values = get_fill_array_data_payload(data); + EXPECT_EQ(values.size(), 3); + EXPECT_EQ(values.at(0), 0x61); + EXPECT_EQ(values.at(1), 0x62); + EXPECT_EQ(values.at(2), 0x63); + } + { + auto data = insns.at(2)->get_data(); + auto values = get_fill_array_data_payload(data); + EXPECT_EQ(values.size(), 3); + EXPECT_EQ(values.at(0), 0x3e7); + EXPECT_EQ(values.at(1), 0x2); + EXPECT_EQ(values.at(2), 0x40000000); + } + { + auto data = insns.at(3)->get_data(); + auto values = get_fill_array_data_payload(data); + EXPECT_EQ(values.size(), 3); + EXPECT_EQ(values.at(0), 0x3b9aca00); + EXPECT_EQ(values.at(1), 0xb2d05e00); + EXPECT_EQ(values.at(2), 0xb2d05e01); + } +} + +TEST_F(IRAssemblerTest, arrayDataRoundTrip) { + { + std::vector elements{"3e7", "a"}; + auto op_data = + encode_fill_array_data_payload_from_string(elements); + // SHOW and s-expr will use slightly different format, so that the latter + // will be idiomatic. Just verify the elements are encoded the right way. + EXPECT_STREQ(SHOW(op_data), + "fill-array-data-payload { [2 x 2] { 3e7, a } }"); + } + { + std::vector elements{"3e7", "2", "40000000"}; + auto op_data = + encode_fill_array_data_payload_from_string(elements); + EXPECT_STREQ(SHOW(op_data), + "fill-array-data-payload { [3 x 4] { 3e7, 2, 40000000 } }"); + } + std::string expr(R"( + ( + (const v0 3) + (new-array v0 "[I") ; create an array of length 3 + (move-result-pseudo-object v1) + (fill-array-data v1 #4 (63 64 65)) + (return-void) + ) +)"); + auto code = assembler::ircode_from_string(expr); + std::string expected( + "((const v0 3) (new-array v0 \"[I\") (move-result-pseudo-object v1) " + "(fill-array-data v1 #4 (63 64 65)) (return-void))"); + EXPECT_EQ(expected, assembler::to_string(code.get())); +}