Skip to content

Commit

Permalink
Add python bindings
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Jun 5, 2023
1 parent bf3599c commit 0b29875
Show file tree
Hide file tree
Showing 13 changed files with 467 additions and 25 deletions.
36 changes: 36 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,43 @@
# IDE/Filesystem
.idea
.vscode
.DS_Store

*.swp
*.swo
*.gcno
*.gcda
*.kdev4
/.kdev4

# Python
__pycache__
*.egg-info
.venv
build/

# Build outputs
bytes/*.cpp
*.so
*.o
*.a

# Test artifacts
tests/*.tok.*
tests/*.src.*
tests/*.err
tests/tests

# CMake/Ninja artifacts
*.cmake
cmake-build-debug/
CMakeFiles/
Testing/
CMakeCache.txt
build.ninja
.ninja_deps
.ninja_log

# Executables
pycdc
pycdas
6 changes: 3 additions & 3 deletions ASTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3007,7 +3007,7 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
} else {
pyc_output << "\n";
start_line(cur_indent, pyc_output);
if (code_src->flags() & PycCode::CO_COROUTINE)
if (code_src->flags() & PycCode::CO_COROUTINE_)
pyc_output << "async ";
pyc_output << "def ";
print_src(dest, mod, pyc_output);
Expand Down Expand Up @@ -3039,12 +3039,12 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
}
}
}
if (code_src->flags() & PycCode::CO_VARARGS) {
if (code_src->flags() & PycCode::CO_VARARGS_) {
if (narg)
pyc_output << ", ";
pyc_output << "*" << code_src->getLocal(narg++)->value();
}
if (code_src->flags() & PycCode::CO_VARKEYWORDS) {
if (code_src->flags() & PycCode::CO_VARKEYWORDS_) {
if (narg)
pyc_output << ", ";
pyc_output << "**" << code_src->getLocal(narg++)->value();
Expand Down
19 changes: 15 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
project(pycdc)
cmake_minimum_required(VERSION 3.1)
cmake_minimum_required(VERSION 3.12)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Debug options.
option(ENABLE_BLOCK_DEBUG "Enable block debugging" OFF)
option(ENABLE_STACK_DEBUG "Enable stack debugging" OFF)
option(ENABLE_BINDINGS "Enable Python bindings" OFF)

# Turn debug defs on if they're enabled.
if (ENABLE_BLOCK_DEBUG)
Expand All @@ -16,8 +17,18 @@ if (ENABLE_STACK_DEBUG)
add_definitions(-DSTACK_DEBUG)
endif()

# For generating the bytes tables
find_package(PythonInterp REQUIRED)
# For generating the bytes tables and bindings
set(PYTHON_VENV_PATH "${CMAKE_SOURCE_DIR}/.venv")
if (EXISTS "${PYTHON_VENV_PATH}")
message("Using existing Python venv at ${PYTHON_VENV_PATH}")
set(Python_ROOT_DIR "${PYTHON_VENV_PATH}")
endif()

if (ENABLE_BINDINGS)
add_subdirectory(bindings)
else()
find_package(Python REQUIRED Interpreter)
endif()

if(CMAKE_COMPILER_IS_GNUCXX OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wshadow -Werror ${CMAKE_CXX_FLAGS}")
Expand All @@ -38,7 +49,7 @@ foreach(ver ${PYTHON_VERSIONS})
endforeach()

add_custom_command(OUTPUT ${MAP_SOURCES}
COMMAND ${PYTHON_EXECUTABLE}
COMMAND ${Python_EXECUTABLE}
${CMAKE_CURRENT_SOURCE_DIR}/bytes/comp_map.py
${CMAKE_CURRENT_SOURCE_DIR}/bytes
${CMAKE_CURRENT_BINARY_DIR}/bytes
Expand Down
31 changes: 31 additions & 0 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ https://github.com/zrax/pycdc
* For makefiles, just run `make`
* To run tests (on \*nix or MSYS), run `make check`

## Building and installing the Python package

This step does not require building the executables of the previous sections.

* Ensure `CMake >= 3.12` is installed
* Create a virtual environment `python3 -m venv venv`
* Run `pip install .`

## Usage
**To run pycdas**, the PYC Disassembler:
`./pycdas [PATH TO PYC FILE]`
Expand All @@ -43,6 +51,29 @@ Both tools support Python marshalled code objects, as output from `marshal.dumps

To use this feature, specify `-c -v <version>` on the command line - the version must be specified as the objects themselves do not contain version metadata.

**To use the Python bindings**, run the following Python script:
```python
import marshal
from pycdc import decompyle

async def test():
a = 5
data = foobar(a)
return data

print(decompyle(marshal.dumps(test.__code__)))
```

or from a `.pyc` file:

```python
from pycdc import decompyle

with open('test.pyc', 'rb') as f:
# pass version=None to infer from the file, or specify a version tuple
print(decompyle(f.read(), version=None))
```

## Authors, Licence, Credits
Decompyle++ is the work of Michael Hansen and Darryl Pogue.

Expand Down
28 changes: 28 additions & 0 deletions bindings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Find the interpreter as well for byte files generation
find_package(Python COMPONENTS Interpreter Development REQUIRED)

# Find pybind11
execute_process(
COMMAND ${Python_EXECUTABLE} -c "import pybind11; print(pybind11.get_cmake_dir(), end='')"
OUTPUT_VARIABLE pybind11_DIR
)
find_package(pybind11 CONFIG REQUIRED)

# Create C library
pybind11_add_module(bindings
bindings.cpp
../pycdc.cpp
../ASTree.cpp
../ASTNode.cpp
)

target_include_directories(bindings PRIVATE pybind11::headers ${Python_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR})
target_link_libraries(bindings PRIVATE pycxx)

if (NOT DEFINED CMAKE_LIBRARY_OUTPUT_DIRECTORY)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/build/lib")
endif ()

target_compile_definitions(
bindings
PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO})
24 changes: 24 additions & 0 deletions bindings/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import sys

from .bindings import decompyle as _decompyle

__version__ = '0.0.1'


def decompyle(code, version=(sys.version_info.major, sys.version_info.minor)):
"""
Decompyle the given code object.
Parameters
----------
code : bytes
The code object to decompile.
version : tuple, optional
The Python version to decompile for. Defaults to the current Python version.
Use None or (0, 0) to infer the Python version from the code object. This will
not work for marshalled code objects.
"""
if version is None:
return _decompyle(code, 0, 0)
else:
return _decompyle(code, version[0], version[1])
49 changes: 49 additions & 0 deletions bindings/bindings.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#pragma clang diagnostic push
#pragma ide diagnostic ignored "cppcoreguidelines-narrowing-conversions"

#include <Python.h>
#include <pybind11/pybind11.h>
#include <pybind11/pytypes.h>
#include <vector>
#include <cstring>
#include <ostream>
#include <sstream>
#include <optional>
#include "ASTree.h"

namespace py = pybind11;


#ifdef WIN32
# define PATHSEP '\\'
#else
# define PATHSEP '/'
#endif

py::str decompyle_binding(py::bytes &data, int major_version, int minor_version) {
PycModule mod;
auto str = data.cast<std::string>();
PycBuffer buffer(
reinterpret_cast<const unsigned char*>(str.c_str()),
str.size()
);

if (major_version == 0 && minor_version == 0) {
mod.loadFromStream(buffer);
}
else {
mod.loadFromMarshalledStream(
buffer,
major_version,
minor_version
);
}
std::ostringstream pyc_output;
decompyle(mod.code(), &mod, pyc_output);
return pyc_output.str();
}

PYBIND11_MODULE(bindings, m) {
m.doc() = "pycdcpy bindings";
m.def("decompyle", &decompyle_binding, "Decompile a marshalled python file");
}
2 changes: 2 additions & 0 deletions data.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "data.h"
#include <cstring>
#include <cstdarg>
#include <ostream>
#include <vector>

/* PycData */
Expand Down Expand Up @@ -80,6 +81,7 @@ int PycBuffer::getBuffer(int bytes, void* buffer)
bytes = m_size - m_pos;
if (bytes != 0)
memcpy(buffer, (m_buffer + m_pos), bytes);
m_pos += bytes;
return bytes;
}

Expand Down
34 changes: 17 additions & 17 deletions pyc_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,23 @@ class PycCode : public PycObject {
public:
typedef std::vector<PycRef<PycString>> globals_t;
enum CodeFlags {
CO_OPTIMIZED = 0x1,
CO_NEWLOCALS = 0x2,
CO_VARARGS = 0x4,
CO_VARKEYWORDS = 0x8,
CO_NESTED = 0x10,
CO_GENERATOR = 0x20,
CO_NOFREE = 0x40,
CO_COROUTINE = 0x80,
CO_ITERABLE_COROUTINE = 0x100,
CO_GENERATOR_ALLOWED = 0x1000,
CO_FUTURE_DIVISION = 0x2000,
CO_FUTURE_ABSOLUTE_IMPORT = 0x4000,
CO_FUTURE_WITH_STATEMENT = 0x8000,
CO_FUTURE_PRINT_FUNCTION = 0x10000,
CO_FUTURE_UNICODE_LITERALS = 0x20000,
CO_FUTURE_BARRY_AS_BDFL = 0x40000,
CO_FUTURE_GENERATOR_STOP = 0x80000,
CO_OPTIMIZED_ = 0x1,
CO_NEWLOCALS_ = 0x2,
CO_VARARGS_ = 0x4,
CO_VARKEYWORDS_ = 0x8,
CO_NESTED_ = 0x10,
CO_GENERATOR_ = 0x20,
CO_NOFREE_ = 0x40,
CO_COROUTINE_ = 0x80,
CO_ITERABLE_COROUTINE_ = 0x100,
CO_GENERATOR_ALLOWED_ = 0x1000,
CO_FUTURE_DIVISION_ = 0x2000,
CO_FUTURE_ABSOLUTE_IMPORT_ = 0x4000,
CO_FUTURE_WITH_STATEMENT_ = 0x8000,
CO_FUTURE_PRINT_FUNCTION_ = 0x10000,
CO_FUTURE_UNICODE_LITERALS_ = 0x20000,
CO_FUTURE_BARRY_AS_BDFL_ = 0x40000,
CO_FUTURE_GENERATOR_STOP_ = 0x80000,
};

PycCode(int type = TYPE_CODE)
Expand Down
38 changes: 38 additions & 0 deletions pyc_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,44 @@ void PycModule::loadFromMarshalledFile(const char* filename, int major, int mino
m_code = LoadObject(&in, this).cast<PycCode>();
}

void PycModule::loadFromStream(PycData& stream)
{
setVersion(stream.get32());
if (!isValid()) {
fputs("Bad MAGIC!\n", stderr);
return;
}

int flags = 0;
if (verCompare(3, 7) >= 0)
flags = stream.get32();

if (flags & 0x1) {
// Optional checksum added in Python 3.7
stream.get32();
stream.get32();
} else {
stream.get32(); // Timestamp -- who cares?

if (verCompare(3, 3) >= 0)
stream.get32(); // Size parameter added in Python 3.3
}

m_code = LoadObject(&stream, this).cast<PycCode>();
}

void PycModule::loadFromMarshalledStream(PycData& stream, int major, int minor)
{
if (!isSupportedVersion(major, minor)) {
fprintf(stderr, "Unsupported version %d.%d\n", major, minor);
return;
}
m_maj = major;
m_min = minor;
m_unicode = (major >= 3);
m_code = LoadObject(&stream, this).cast<PycCode>();
}

PycRef<PycString> PycModule::getIntern(int ref) const
{
if (ref < 0 || (size_t)ref >= m_interns.size())
Expand Down
4 changes: 3 additions & 1 deletion pyc_module.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class PycModule {

void loadFromFile(const char* filename);
void loadFromMarshalledFile(const char *filename, int major, int minor);
void loadFromStream(PycData& stream);
void loadFromMarshalledStream(PycData& stream, int major, int minor);
bool isValid() const { return (m_maj >= 0) && (m_min >= 0); }

int majorVer() const { return m_maj; }
Expand All @@ -60,7 +62,7 @@ class PycModule {

bool strIsUnicode() const
{
return (m_maj >= 3) || (m_code->flags() & PycCode::CO_FUTURE_UNICODE_LITERALS) != 0;
return (m_maj >= 3) || (m_code->flags() & PycCode::CO_FUTURE_UNICODE_LITERALS_) != 0;
}

PycRef<PycCode> code() const { return m_code; }
Expand Down
Loading

0 comments on commit 0b29875

Please sign in to comment.