From 0b29875d75fcbda15ed1e2972e4438c759091a87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= Date: Mon, 5 Jun 2023 23:46:24 +0200 Subject: [PATCH] Add python bindings --- .gitignore | 36 +++++++++++ ASTree.cpp | 6 +- CMakeLists.txt | 19 ++++-- README.markdown | 31 ++++++++++ bindings/CMakeLists.txt | 28 +++++++++ bindings/__init__.py | 24 +++++++ bindings/bindings.cpp | 49 +++++++++++++++ data.cpp | 2 + pyc_code.h | 34 +++++----- pyc_module.cpp | 38 ++++++++++++ pyc_module.h | 4 +- pyproject.toml | 87 ++++++++++++++++++++++++++ setup.py | 134 ++++++++++++++++++++++++++++++++++++++++ 13 files changed, 467 insertions(+), 25 deletions(-) create mode 100644 bindings/CMakeLists.txt create mode 100644 bindings/__init__.py create mode 100644 bindings/bindings.cpp create mode 100644 pyproject.toml create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index 6237ddae6..9d4a1b266 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,43 @@ +# IDE/Filesystem +.idea +.vscode +.DS_Store + *.swp *.swo *.gcno *.gcda *.kdev4 /.kdev4 + +# Python __pycache__ +*.egg-info +.venv +build/ + +# Build outputs +bytes/*.cpp +*.so +*.o +*.a + +# Test artifacts +tests/*.tok.* +tests/*.src.* +tests/*.err +tests/tests + +# CMake/Ninja artifacts +*.cmake +cmake-build-debug/ +CMakeFiles/ +Testing/ +CMakeCache.txt +build.ninja +.ninja_deps +.ninja_log + +# Executables +pycdc +pycdas diff --git a/ASTree.cpp b/ASTree.cpp index e7c926286..32fcdade7 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -3007,7 +3007,7 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) } else { pyc_output << "\n"; start_line(cur_indent, pyc_output); - if (code_src->flags() & PycCode::CO_COROUTINE) + if (code_src->flags() & PycCode::CO_COROUTINE_) pyc_output << "async "; pyc_output << "def "; print_src(dest, mod, pyc_output); @@ -3039,12 +3039,12 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) } } } - if (code_src->flags() & PycCode::CO_VARARGS) { + if (code_src->flags() & PycCode::CO_VARARGS_) { if (narg) pyc_output << ", "; pyc_output << "*" << code_src->getLocal(narg++)->value(); } - if (code_src->flags() & PycCode::CO_VARKEYWORDS) { + if (code_src->flags() & PycCode::CO_VARKEYWORDS_) { if (narg) pyc_output << ", "; pyc_output << "**" << code_src->getLocal(narg++)->value(); diff --git a/CMakeLists.txt b/CMakeLists.txt index cb4874a8b..84c46e6d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ project(pycdc) -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.12) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -7,6 +7,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Debug options. option(ENABLE_BLOCK_DEBUG "Enable block debugging" OFF) option(ENABLE_STACK_DEBUG "Enable stack debugging" OFF) +option(ENABLE_BINDINGS "Enable Python bindings" OFF) # Turn debug defs on if they're enabled. if (ENABLE_BLOCK_DEBUG) @@ -16,8 +17,18 @@ if (ENABLE_STACK_DEBUG) add_definitions(-DSTACK_DEBUG) endif() -# For generating the bytes tables -find_package(PythonInterp REQUIRED) +# For generating the bytes tables and bindings +set(PYTHON_VENV_PATH "${CMAKE_SOURCE_DIR}/.venv") +if (EXISTS "${PYTHON_VENV_PATH}") + message("Using existing Python venv at ${PYTHON_VENV_PATH}") + set(Python_ROOT_DIR "${PYTHON_VENV_PATH}") +endif() + +if (ENABLE_BINDINGS) + add_subdirectory(bindings) +else() + find_package(Python REQUIRED Interpreter) +endif() if(CMAKE_COMPILER_IS_GNUCXX OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wshadow -Werror ${CMAKE_CXX_FLAGS}") @@ -38,7 +49,7 @@ foreach(ver ${PYTHON_VERSIONS}) endforeach() add_custom_command(OUTPUT ${MAP_SOURCES} - COMMAND ${PYTHON_EXECUTABLE} + COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/bytes/comp_map.py ${CMAKE_CURRENT_SOURCE_DIR}/bytes ${CMAKE_CURRENT_BINARY_DIR}/bytes diff --git a/README.markdown b/README.markdown index 787435ce7..4ac8a97ac 100644 --- a/README.markdown +++ b/README.markdown @@ -28,6 +28,14 @@ https://github.com/zrax/pycdc * For makefiles, just run `make` * To run tests (on \*nix or MSYS), run `make check` +## Building and installing the Python package + +This step does not require building the executables of the previous sections. + +* Ensure `CMake >= 3.12` is installed +* Create a virtual environment `python3 -m venv venv` +* Run `pip install .` + ## Usage **To run pycdas**, the PYC Disassembler: `./pycdas [PATH TO PYC FILE]` @@ -43,6 +51,29 @@ Both tools support Python marshalled code objects, as output from `marshal.dumps To use this feature, specify `-c -v ` on the command line - the version must be specified as the objects themselves do not contain version metadata. +**To use the Python bindings**, run the following Python script: +```python +import marshal +from pycdc import decompyle + +async def test(): + a = 5 + data = foobar(a) + return data + +print(decompyle(marshal.dumps(test.__code__))) +``` + +or from a `.pyc` file: + +```python +from pycdc import decompyle + +with open('test.pyc', 'rb') as f: + # pass version=None to infer from the file, or specify a version tuple + print(decompyle(f.read(), version=None)) +``` + ## Authors, Licence, Credits Decompyle++ is the work of Michael Hansen and Darryl Pogue. diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt new file mode 100644 index 000000000..573458250 --- /dev/null +++ b/bindings/CMakeLists.txt @@ -0,0 +1,28 @@ +# Find the interpreter as well for byte files generation +find_package(Python COMPONENTS Interpreter Development REQUIRED) + +# Find pybind11 +execute_process( + COMMAND ${Python_EXECUTABLE} -c "import pybind11; print(pybind11.get_cmake_dir(), end='')" + OUTPUT_VARIABLE pybind11_DIR +) +find_package(pybind11 CONFIG REQUIRED) + +# Create C library +pybind11_add_module(bindings + bindings.cpp + ../pycdc.cpp + ../ASTree.cpp + ../ASTNode.cpp +) + +target_include_directories(bindings PRIVATE pybind11::headers ${Python_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}) +target_link_libraries(bindings PRIVATE pycxx) + +if (NOT DEFINED CMAKE_LIBRARY_OUTPUT_DIRECTORY) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/build/lib") +endif () + +target_compile_definitions( + bindings + PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO}) diff --git a/bindings/__init__.py b/bindings/__init__.py new file mode 100644 index 000000000..53e0a782c --- /dev/null +++ b/bindings/__init__.py @@ -0,0 +1,24 @@ +import sys + +from .bindings import decompyle as _decompyle + +__version__ = '0.0.1' + + +def decompyle(code, version=(sys.version_info.major, sys.version_info.minor)): + """ + Decompyle the given code object. + + Parameters + ---------- + code : bytes + The code object to decompile. + version : tuple, optional + The Python version to decompile for. Defaults to the current Python version. + Use None or (0, 0) to infer the Python version from the code object. This will + not work for marshalled code objects. + """ + if version is None: + return _decompyle(code, 0, 0) + else: + return _decompyle(code, version[0], version[1]) diff --git a/bindings/bindings.cpp b/bindings/bindings.cpp new file mode 100644 index 000000000..ca2d65d91 --- /dev/null +++ b/bindings/bindings.cpp @@ -0,0 +1,49 @@ +#pragma clang diagnostic push +#pragma ide diagnostic ignored "cppcoreguidelines-narrowing-conversions" + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ASTree.h" + +namespace py = pybind11; + + +#ifdef WIN32 +# define PATHSEP '\\' +#else +# define PATHSEP '/' +#endif + +py::str decompyle_binding(py::bytes &data, int major_version, int minor_version) { + PycModule mod; + auto str = data.cast(); + PycBuffer buffer( + reinterpret_cast(str.c_str()), + str.size() + ); + + if (major_version == 0 && minor_version == 0) { + mod.loadFromStream(buffer); + } + else { + mod.loadFromMarshalledStream( + buffer, + major_version, + minor_version + ); + } + std::ostringstream pyc_output; + decompyle(mod.code(), &mod, pyc_output); + return pyc_output.str(); +} + +PYBIND11_MODULE(bindings, m) { + m.doc() = "pycdcpy bindings"; + m.def("decompyle", &decompyle_binding, "Decompile a marshalled python file"); +} diff --git a/data.cpp b/data.cpp index 2c0e2d349..ddbf50b00 100644 --- a/data.cpp +++ b/data.cpp @@ -1,6 +1,7 @@ #include "data.h" #include #include +#include #include /* PycData */ @@ -80,6 +81,7 @@ int PycBuffer::getBuffer(int bytes, void* buffer) bytes = m_size - m_pos; if (bytes != 0) memcpy(buffer, (m_buffer + m_pos), bytes); + m_pos += bytes; return bytes; } diff --git a/pyc_code.h b/pyc_code.h index 4d6e47cd0..69e37cc23 100644 --- a/pyc_code.h +++ b/pyc_code.h @@ -12,23 +12,23 @@ class PycCode : public PycObject { public: typedef std::vector> globals_t; enum CodeFlags { - CO_OPTIMIZED = 0x1, - CO_NEWLOCALS = 0x2, - CO_VARARGS = 0x4, - CO_VARKEYWORDS = 0x8, - CO_NESTED = 0x10, - CO_GENERATOR = 0x20, - CO_NOFREE = 0x40, - CO_COROUTINE = 0x80, - CO_ITERABLE_COROUTINE = 0x100, - CO_GENERATOR_ALLOWED = 0x1000, - CO_FUTURE_DIVISION = 0x2000, - CO_FUTURE_ABSOLUTE_IMPORT = 0x4000, - CO_FUTURE_WITH_STATEMENT = 0x8000, - CO_FUTURE_PRINT_FUNCTION = 0x10000, - CO_FUTURE_UNICODE_LITERALS = 0x20000, - CO_FUTURE_BARRY_AS_BDFL = 0x40000, - CO_FUTURE_GENERATOR_STOP = 0x80000, + CO_OPTIMIZED_ = 0x1, + CO_NEWLOCALS_ = 0x2, + CO_VARARGS_ = 0x4, + CO_VARKEYWORDS_ = 0x8, + CO_NESTED_ = 0x10, + CO_GENERATOR_ = 0x20, + CO_NOFREE_ = 0x40, + CO_COROUTINE_ = 0x80, + CO_ITERABLE_COROUTINE_ = 0x100, + CO_GENERATOR_ALLOWED_ = 0x1000, + CO_FUTURE_DIVISION_ = 0x2000, + CO_FUTURE_ABSOLUTE_IMPORT_ = 0x4000, + CO_FUTURE_WITH_STATEMENT_ = 0x8000, + CO_FUTURE_PRINT_FUNCTION_ = 0x10000, + CO_FUTURE_UNICODE_LITERALS_ = 0x20000, + CO_FUTURE_BARRY_AS_BDFL_ = 0x40000, + CO_FUTURE_GENERATOR_STOP_ = 0x80000, }; PycCode(int type = TYPE_CODE) diff --git a/pyc_module.cpp b/pyc_module.cpp index d2227906c..dfc1304cb 100644 --- a/pyc_module.cpp +++ b/pyc_module.cpp @@ -239,6 +239,44 @@ void PycModule::loadFromMarshalledFile(const char* filename, int major, int mino m_code = LoadObject(&in, this).cast(); } +void PycModule::loadFromStream(PycData& stream) +{ + setVersion(stream.get32()); + if (!isValid()) { + fputs("Bad MAGIC!\n", stderr); + return; + } + + int flags = 0; + if (verCompare(3, 7) >= 0) + flags = stream.get32(); + + if (flags & 0x1) { + // Optional checksum added in Python 3.7 + stream.get32(); + stream.get32(); + } else { + stream.get32(); // Timestamp -- who cares? + + if (verCompare(3, 3) >= 0) + stream.get32(); // Size parameter added in Python 3.3 + } + + m_code = LoadObject(&stream, this).cast(); +} + +void PycModule::loadFromMarshalledStream(PycData& stream, int major, int minor) +{ + if (!isSupportedVersion(major, minor)) { + fprintf(stderr, "Unsupported version %d.%d\n", major, minor); + return; + } + m_maj = major; + m_min = minor; + m_unicode = (major >= 3); + m_code = LoadObject(&stream, this).cast(); +} + PycRef PycModule::getIntern(int ref) const { if (ref < 0 || (size_t)ref >= m_interns.size()) diff --git a/pyc_module.h b/pyc_module.h index b3e52dbea..8a9aedf09 100644 --- a/pyc_module.h +++ b/pyc_module.h @@ -44,6 +44,8 @@ class PycModule { void loadFromFile(const char* filename); void loadFromMarshalledFile(const char *filename, int major, int minor); + void loadFromStream(PycData& stream); + void loadFromMarshalledStream(PycData& stream, int major, int minor); bool isValid() const { return (m_maj >= 0) && (m_min >= 0); } int majorVer() const { return m_maj; } @@ -60,7 +62,7 @@ class PycModule { bool strIsUnicode() const { - return (m_maj >= 3) || (m_code->flags() & PycCode::CO_FUTURE_UNICODE_LITERALS) != 0; + return (m_maj >= 3) || (m_code->flags() & PycCode::CO_FUTURE_UNICODE_LITERALS_) != 0; } PycRef code() const { return m_code; } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..bbaefaa3d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,87 @@ +[project] +name = "pycdc" +description = "Python bindings for Decompyle++" +license = { file = "LICENSE" } +readme = "README.md" +urls.homepage = "https://github.com/zrax/pycdc" +dynamic = ["version"] +requires-python = ">3.7.6,<3.11" + +dependencies = [] + +[project.optional-dependencies] +dev = [] + +[tool.setuptools] +packages = ["pycdc"] +package-dir = { "pycdc" = "bindings" } + +[tool.setuptools.dynamic] +version = { attr = "bindings.__version__" } + +[tool.interrogate] +ignore-init-method = true +ignore-init-module = true +ignore-magic = false +ignore-semiprivate = false +ignore-private = false +ignore-property-decorators = false +ignore-module = true +ignore-nested-functions = false +ignore-nested-classes = true +ignore-setters = false +fail-under = 10 +exclude = ["docs", "build", "tests"] +verbose = 0 +quiet = false +whitelist-regex = [] +color = true +omit-covered-files = false + +[tool.pytest.ini_options] +testpaths = [ + "tests", +] + + +[build-system] +requires = [ + "setuptools", + "pybind11>=2.10.4", +] +build-backend = "setuptools.build_meta" + + +[tool.cibuildwheel] +skip = [ + "*p36-*", # Skip Python 3.6 + "pp*", # Skip PyPy + "*-win32", # Skip 32-bit Windows + "*-manylinux_i686", # Skip 32-bit Linux + "*-win_arm64", # Skip experimental Windows on ARM + "*-musllinux*", # Skip slow Linux + "*-manylinux_aarch64", # Skip slow Linux + "*-manylinux_ppc64le", # Skip slow Linux + "*-manylinux_s390x", # Skip slow Linux +] + +[tool.ruff] +fix = true +exclude = [ + ".git", + "__pycache__", + "__init__.py", + ".mypy_cache", + ".pytest_cache", + ".venv", + "build", +] +ignore = [] +line-length = 88 +select = [ + "E", + "F", + "W", + "I001" +] +fixable = ["E", "F", "W", "I"] diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..5d746b929 --- /dev/null +++ b/setup.py @@ -0,0 +1,134 @@ +# Adapted from https://github.com/pybind/cmake_example +import os +import re +import subprocess +import sys +from pathlib import Path + +from setuptools import Extension, setup +from setuptools.command.build_ext import build_ext + +# Convert distutils Windows platform specifiers to CMake -A arguments +PLAT_TO_CMAKE = { + "win32": "Win32", + "win-amd64": "x64", + "win-arm32": "ARM", + "win-arm64": "ARM64", +} + + +# A CMakeExtension needs a sourcedir instead of a file list. +# The name must be the _single_ output extension from the CMake build. +# If you need multiple extensions, see scikit-build. +class CMakeExtension(Extension): + def __init__(self, name: str, sourcedir: str = "") -> None: + super().__init__(name, sources=[]) + self.sourcedir = os.fspath(Path(sourcedir).resolve()) + + +class CMakeBuild(build_ext): + def build_extension(self, ext: CMakeExtension) -> None: + # Must be in this form due to bug in .resolve() only fixed in Python 3.10+ + ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name) + extdir = ext_fullpath.parent.resolve() + + # Using this requires trailing slash for auto-detection & inclusion of + # auxiliary "native" libs + + debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug + cfg = "Debug" if debug else "Release" + + # CMake lets you override the generator - we need to check this. + # Can be set with Conda-Build, for example. + cmake_generator = os.environ.get("CMAKE_GENERATOR", "") + + # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON + # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code + # from Python. + cmake_args = [ + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}", + f"-DPython_EXECUTABLE={sys.executable}", + f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm + f"-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" + ] + build_args = [] + # Adding CMake arguments set as environment variable + # (needed e.g. to build for ARM OSx on conda-forge) + if "CMAKE_ARGS" in os.environ: + cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item] + + # In this example, we pass in the version to C++. You might not need to. + cmake_args += [f"-DEXTENSION_SUFFIX={''.join(ext_fullpath.suffixes)}"] + + if self.compiler.compiler_type != "msvc": + # Using Ninja-build since it a) is available as a wheel and b) + # multithreads automatically. MSVC would require all variables be + # exported for Ninja to pick it up, which is a little tricky to do. + # Users can override the generator with CMAKE_GENERATOR in CMake + # 3.15+. + if not cmake_generator or cmake_generator == "Ninja": + try: + import ninja + + ninja_executable_path = Path(ninja.BIN_DIR) / "ninja" + cmake_args += [ + "-GNinja", + f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}", + ] + except ImportError: + pass + + else: + # Single config generators are handled "normally" + single_config = any(x in cmake_generator for x in {"NMake", "Ninja"}) + + # CMake allows an arch-in-generator style for backward compatibility + contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"}) + + # Specify the arch if using MSVC generator, but only if it doesn't + # contain a backward-compatibility arch spec already in the + # generator name. + if not single_config and not contains_arch: + cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]] + + # Multi-config generators have a different way to specify configs + if not single_config: + cmake_args += [ + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}" + ] + build_args += ["--config", cfg] + + if sys.platform.startswith("darwin"): + # Cross-compile support for macOS - respect ARCHFLAGS if set + archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", "")) + if archs: + cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))] + + # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level + # across all generators. + if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: + # self.parallel is a Python 3 only way to set parallel jobs by hand + # using -j in the build_ext call, not supported by pip or PyPA-build. + if hasattr(self, "parallel") and self.parallel: + # CMake 3.12+ only. + build_args += [f"-j{self.parallel}"] + + build_temp = Path(self.build_temp) / ext.name + if not build_temp.exists(): + build_temp.mkdir(parents=True) + + cmake_args.append(f"-DENABLE_BINDINGS=ON") + + subprocess.run( + ["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True + ) + subprocess.run( + ["cmake", "--build", ".", "--target", "bindings", *build_args], cwd=build_temp, check=True + ) + +# The information here can also be placed in setup.cfg - better separation of +# logic and declaration, and simpler if you include description/version in a file. +setup( + ext_modules=[CMakeExtension("pycdc.bindings")], + cmdclass={"build_ext": CMakeBuild}, +)