Skip to content

Commit

Permalink
IRAssembler support for fill-array-data
Browse files Browse the repository at this point in the history
Summary:
I'd like to start improving some support for array handling like https://fburl.com/code/eabffe38, but first make it possible to write s-expr test cases on the various ways to define an array. Using an integ test is not very nice, as it does not give control on how the array gets populated.

It would be nice to define char[] with actual chars like a b c but I'm not going to bother doing escapes and whatnot. Just simple expression list format with hex values should be good enough.

Reviewed By: agampe

Differential Revision: D53624243

fbshipit-source-id: 9d0881789019da06a86fa0105c42003acbc21472
  • Loading branch information
wsanville authored and facebook-github-bot committed Mar 13, 2024
1 parent 685106b commit 2f155ff
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 18 deletions.
37 changes: 37 additions & 0 deletions libredex/DexInstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@

#pragma once

#include <charconv>
#include <cstring>
#include <string>
#include <type_traits>
#include <utility>

#include "CppUtil.h"
#include "Debug.h"
#include "DexDefs.h"
#include "DexOpcode.h"
Expand Down Expand Up @@ -363,6 +365,22 @@ class DexOpcodeProto : public DexInstruction {
void set_proto(DexProto* proto) { m_proto = proto; }
};

inline uint16_t fill_array_data_payload_width(const DexOpcodeData* op_data) {
always_assert_log(op_data->opcode() == FOPCODE_FILLED_ARRAY,
"DexOpcodeData is not an array payload");
always_assert(op_data->data_size() >= 3);
return *op_data->data();
}

inline uint32_t fill_array_data_payload_element_count(
const DexOpcodeData* op_data) {
always_assert_log(op_data->opcode() == FOPCODE_FILLED_ARRAY,
"DexOpcodeData is not an array payload");
always_assert(op_data->data_size() >= 3);
auto size_ptr = (uint32_t*)(op_data->data() + 1);
return *size_ptr;
}

// helper function to create fill-array-data-payload according to
// https://source.android.com/devices/tech/dalvik/dalvik-bytecode#fill-array
template <typename IntType>
Expand All @@ -385,6 +403,25 @@ std::unique_ptr<DexOpcodeData> encode_fill_array_data_payload(
return std::make_unique<DexOpcodeData>(data);
}

// Like above, but parse from a vector of hex string elements
template <typename IntType>
std::unique_ptr<DexOpcodeData> encode_fill_array_data_payload_from_string(
const std::vector<std::string>& elements) {
static_assert(std::is_integral<IntType>::value,
"fill-array-data-payload can only contain integral values.");
std::vector<IntType> vec;
for (const auto& item : elements) {
IntType val;
auto trimmed = trim_whitespaces(item);
auto result = std::from_chars(trimmed.data(),
trimmed.data() + trimmed.size(), val, 16);
always_assert_log(result.ec != std::errc::invalid_argument,
"Invalid payload: \"%s\"", item.c_str());
vec.emplace_back(val);
}
return encode_fill_array_data_payload(vec);
}

template <typename IntType>
std::vector<IntType> get_fill_array_data_payload(const DexOpcodeData* op_data) {
static_assert(std::is_integral<IntType>::value,
Expand Down
62 changes: 58 additions & 4 deletions libredex/IRAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "Creators.h"
#include "DexClass.h"
#include "DexInstruction.h"
#include "DexPosition.h"
#include "IRCode.h"
#include "Show.h"
Expand Down Expand Up @@ -75,9 +76,24 @@ s_expr to_s_expr(const IRInstruction* insn, const LabelRefs& label_refs) {
switch (opcode::ref(op)) {
case opcode::Ref::None:
break;
case opcode::Ref::Data:
not_reached_log("Not yet supported");
case opcode::Ref::Data: {
auto op_data = insn->get_data();
if (op_data->opcode() == FOPCODE_FILLED_ARRAY) {
auto ewidth = fill_array_data_payload_width(op_data);
s_exprs.emplace_back(ewidth);
auto element_count = fill_array_data_payload_element_count(op_data);
std::vector<s_expr> element_exprs;
element_exprs.reserve(element_count);
for (const auto& s :
pretty_array_data_payload(ewidth, element_count, op_data->data())) {
element_exprs.emplace_back(s);
}
s_exprs.emplace_back(element_exprs);
} else {
not_reached_log("Not yet supported");
}
break;
}
case opcode::Ref::Field:
s_exprs.emplace_back(show(insn->get_field()));
break;
Expand Down Expand Up @@ -138,6 +154,22 @@ s_expr _to_s_expr(const DexPosition* pos, uint32_t idx, uint32_t parent_idx) {
s_expr(parent_idx_str),
});
}

std::unique_ptr<DexOpcodeData> create_fill_array_data_payload_from_str(
const uint16_t ewidth, const std::vector<std::string>& elements) {
switch (ewidth) {
case 1:
return encode_fill_array_data_payload_from_string<uint8_t>(elements);
case 2:
return encode_fill_array_data_payload_from_string<uint16_t>(elements);
case 4:
return encode_fill_array_data_payload_from_string<uint32_t>(elements);
default: {
always_assert_log(ewidth == 8, "Invalid width: %d", ewidth);
return encode_fill_array_data_payload_from_string<uint64_t>(elements);
}
}
}
} // namespace

std::vector<s_expr> to_s_exprs(
Expand Down Expand Up @@ -206,9 +238,31 @@ std::unique_ptr<IRInstruction> instruction_from_s_expr(
switch (opcode::ref(op)) {
case opcode::Ref::None:
break;
case opcode::Ref::Data:
not_reached_log("Not yet supported");
case opcode::Ref::Data: {
if (insn->opcode() == OPCODE_FILL_ARRAY_DATA) {
int32_t ewidth;
s_patn({s_patn(&ewidth)}, tail)
.must_match(tail, "Expecting int for element width" + opcode_str);
always_assert_log(ewidth == 1 || ewidth == 2 || ewidth == 4 ||
ewidth == 8,
"Invalid width %d", ewidth);

std::vector<std::string> hex_elements;
std::string element_str;
s_expr list;
s_patn({s_patn(list)}, tail)
.must_match(tail, "Expecting list of hex strings for " + opcode_str);
while (s_patn({s_patn(&element_str)}, list).match_with(list)) {
hex_elements.push_back(element_str);
}
auto data = create_fill_array_data_payload_from_str((uint16_t)ewidth,
hex_elements);
insn->set_data(std::move(data));
} else {
not_reached_log("Not yet supported");
}
break;
}
case opcode::Ref::Field: {
std::string str;
s_patn({s_patn(&str)}, tail)
Expand Down
4 changes: 0 additions & 4 deletions libredex/IRAssembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ class IRCode;
* automatically created by the assembler. I.e. you do *not* need to call
* make_{field,method}() beforehand to ensure that they exist.
*
* Not-yet-implemented features:
* - try-catch
* - fill-array-data opcodes
*
* NOTE:
* When assembling an IRCode instance, the assembler will attempt to set the
* registers_size for you by making it 1 larger than the largest register
Expand Down
34 changes: 24 additions & 10 deletions libredex/Show.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,17 +1293,16 @@ std::string show(const DexOpcodeData* insn) {
// See format at
// https://source.android.com/devices/tech/dalvik/dalvik-bytecode#fill-array
const uint16_t ewidth = *data++;
const uint32_t size = *((uint32_t*)data);
ss << "[" << size << " x " << ewidth << "] ";
// escape size
data += 2;
const uint8_t* data_ptr = (uint8_t*)data;
ss << "{ ";
for (size_t i = 0; i < size; i++) {
if (i != 0) {
ss << ", ";
const uint32_t element_count = *((uint32_t*)data);
ss << "[" << element_count << " x " << ewidth << "] {";
auto vec = pretty_array_data_payload(ewidth, element_count, insn->data());
bool first{true};
for (const auto& s : vec) {
if (!first) {
ss << ",";
}
ss << std::hex << read<uint64_t>(data_ptr, ewidth);
ss << " " << s;
first = false;
}
ss << " }";
break;
Expand Down Expand Up @@ -1711,3 +1710,18 @@ std::string pretty_bytes(uint64_t val) {
<< " " << modifier << "B";
return oss.str();
}

std::vector<std::string> pretty_array_data_payload(const uint16_t ewidth,
const uint32_t element_count,
const uint16_t* data) {
std::vector<std::string> result;
result.reserve(element_count);
const uint8_t* data_ptr = (uint8_t*)(data + 3);
for (size_t i = 0; i < element_count; i++) {
auto xx = read<uint64_t>(data_ptr, ewidth);
std::ostringstream oss;
oss << std::hex << xx;
result.emplace_back(oss.str());
}
return result;
}
6 changes: 6 additions & 0 deletions libredex/Show.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <set>
#include <sstream>
#include <string>
#include <vector>

/*
* Stringification functions for core types. Definitions are in DexClass.cpp
Expand Down Expand Up @@ -155,3 +156,8 @@ std::string vshow(const DexType*);

// Format a number as a byte entity.
std::string pretty_bytes(uint64_t val);

// Format the items of given width as human readable hex strings.
std::vector<std::string> pretty_array_data_payload(const uint16_t ewidth,
const uint32_t element_count,
const uint16_t* data);
107 changes: 107 additions & 0 deletions test/unit/IRAssemblerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <gtest/gtest.h>

#include "DexInstruction.h"
#include "DexPosition.h"
#include "RedexTest.h"
#include "Show.h"
Expand Down Expand Up @@ -727,3 +728,109 @@ TEST_F(IRAssemblerTest, assembleClassFromString) {
EXPECT_EQ(v_method->get_class(), cls->get_type());
EXPECT_EQ(v_method->get_name()->str(), "bazPublic");
}

std::vector<IRInstruction*> get_fill_array_data_insns(
const std::unique_ptr<IRCode>& code) {
std::vector<IRInstruction*> result;
for (const auto& mie : *code) {
if (mie.type == MFLOW_OPCODE &&
mie.insn->opcode() == OPCODE_FILL_ARRAY_DATA) {
result.push_back(mie.insn);
}
}
return result;
}

TEST_F(IRAssemblerTest, fillArrayPayloads) {
auto code = assembler::ircode_from_string(R"(
(
(const v0 3)
(new-array v0 "[Z") ; create an array of length 3
(move-result-pseudo-object v1)
(fill-array-data v1 #1 (0 0 1))
(new-array v0 "[C") ; create an array of length 3
(move-result-pseudo-object v2)
(fill-array-data v2 #2 (61 62 63))
(new-array v0 "[I") ; create an array of length 3
(move-result-pseudo-object v3)
(fill-array-data v3 #4 (3e7 2 40000000))
(new-array v0 "[J") ; create an array of length 3
(move-result-pseudo-object v4)
(fill-array-data v4 #8 (3b9aca00 b2d05e00 b2d05e01))
(return-void)
)
)");
auto insns = get_fill_array_data_insns(code);
EXPECT_EQ(insns.size(), 4);

{
auto data = insns.at(0)->get_data();
auto values = get_fill_array_data_payload<uint8_t>(data);
EXPECT_EQ(values.size(), 3);
EXPECT_EQ(values.at(0), 0x0);
EXPECT_EQ(values.at(1), 0x0);
EXPECT_EQ(values.at(2), 0x1);
}
{
auto data = insns.at(1)->get_data();
auto values = get_fill_array_data_payload<uint16_t>(data);
EXPECT_EQ(values.size(), 3);
EXPECT_EQ(values.at(0), 0x61);
EXPECT_EQ(values.at(1), 0x62);
EXPECT_EQ(values.at(2), 0x63);
}
{
auto data = insns.at(2)->get_data();
auto values = get_fill_array_data_payload<uint32_t>(data);
EXPECT_EQ(values.size(), 3);
EXPECT_EQ(values.at(0), 0x3e7);
EXPECT_EQ(values.at(1), 0x2);
EXPECT_EQ(values.at(2), 0x40000000);
}
{
auto data = insns.at(3)->get_data();
auto values = get_fill_array_data_payload<uint64_t>(data);
EXPECT_EQ(values.size(), 3);
EXPECT_EQ(values.at(0), 0x3b9aca00);
EXPECT_EQ(values.at(1), 0xb2d05e00);
EXPECT_EQ(values.at(2), 0xb2d05e01);
}
}

TEST_F(IRAssemblerTest, arrayDataRoundTrip) {
{
std::vector<std::string> elements{"3e7", "a"};
auto op_data =
encode_fill_array_data_payload_from_string<uint16_t>(elements);
// SHOW and s-expr will use slightly different format, so that the latter
// will be idiomatic. Just verify the elements are encoded the right way.
EXPECT_STREQ(SHOW(op_data),
"fill-array-data-payload { [2 x 2] { 3e7, a } }");
}
{
std::vector<std::string> elements{"3e7", "2", "40000000"};
auto op_data =
encode_fill_array_data_payload_from_string<uint32_t>(elements);
EXPECT_STREQ(SHOW(op_data),
"fill-array-data-payload { [3 x 4] { 3e7, 2, 40000000 } }");
}
std::string expr(R"(
(
(const v0 3)
(new-array v0 "[I") ; create an array of length 3
(move-result-pseudo-object v1)
(fill-array-data v1 #4 (63 64 65))
(return-void)
)
)");
auto code = assembler::ircode_from_string(expr);
std::string expected(
"((const v0 3) (new-array v0 \"[I\") (move-result-pseudo-object v1) "
"(fill-array-data v1 #4 (63 64 65)) (return-void))");
EXPECT_EQ(expected, assembler::to_string(code.get()));
}

0 comments on commit 2f155ff

Please sign in to comment.