Skip to content

Commit

Permalink
Add line numbers, fix None and hint about staging
Browse files Browse the repository at this point in the history
View best with:

git show --color-words=. <ref>
  • Loading branch information
Arusekk committed Feb 9, 2021
1 parent 63aa45b commit cb128a8
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 11 deletions.
54 changes: 50 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,60 @@ Note: this only works best-effort and assumes you don't use
constants and literals that are not possible to encode this way.
It just makes sure that the code for code objects is a little better,
while longer and hopefully not changing its semantics.
It also clears the lnotab (line number information), because who needs it anyway.
In future versions it might be possible to build a useful lnotab
for debugging.

The code here is intended for use with CPython 3.9,
but should work with CPython 3.8-3.11 with no changes,
and with older wordcode CPythons just by tweaking the marshaller.

If necessary, it will be rewritten using the excellent [xdis] library.
If necessary, it may be rewritten using the excellent [xdis] library.

[xdis]: https://github.com/rocky/python-xdis

Note that in most cases you should not need to transform very complex
code patterns using this project,
and whenever it fails to do its job on your actual payload
you can just compile a stager with it,
and embed your actual payload inside in some way.

Example stager for the simple case [`test/stager.py`](test/stager.py):
```py
import binascii
import marshal

actual_payload = '''
610d0d0a0000000001e20d60eb030000e30000000000000000000000000000000007000000400000
00735c00000064005a0069005a01640144005d325a0265036402830144005d245a04650565046403
1700640216006502170083016501650565046502170083013c007118710c65066404a00764056406
8400650044008301a101830101006407530029086158030000477572204d726120627320436c6775
62612c206f6c2047767a204372677265660a0a4f726e686776736879207666206f72676772652067
756e61206874796c2e0a526b637976707667207666206f72676772652067756e6120767a63797670
76672e0a46767a637972207666206f72676772652067756e612070627a6379726b2e0a50627a6379
726b207666206f72676772652067756e612070627a637976706e6772712e0a53796e67207666206f
72676772652067756e61206172666772712e0a46636e656672207666206f72676772652067756e61
2071726166722e0a45726e716e6f767976676c207062686167662e0a46637270766e7920706e6672
66206e65726127672066637270766e7920726162687475206762206f65726e782067757220656879
72662e0a4e796775626874752063656e706776706e7976676c206f726e67662063686576676c2e0a
5265656265662066756268797120617269726520636e666620667679726167796c2e0a4861797266
6620726b637976707667796c2066767972617072712e0a56612067757220736e7072206273206e7a
6f76746876676c2c20657273686672206775722067727a63676e677662612067622074687266662e
0a477572657220667562687971206f72206261722d2d206e6171206365727372656e6f796c206261
796c20626172202d2d626f6976626866206a6e6c2067622071622076672e0a4e7967756268747520
67756e67206a6e6c207a6e6c20616267206f7220626f6976626866206e6720737665666720686179
726666206c62682765722051686770752e0a41626a207666206f72676772652067756e6120617269
72652e0a4e79677562687475206172697265207666206273677261206f72676772652067756e6120
2a65767475672a2061626a2e0a56732067757220767a6379727a7261676e6776626120766620756e
657120676220726b63796e76612c2076672766206e206f6e71207671726e2e0a5673206775722076
7a6379727a7261676e6776626120766620726e666c20676220726b63796e76612c207667207a6e6c
206f72206e2074626271207671726e2e0a416e7a7266636e707266206e6572206261722075626178
766174207465726e67207671726e202d2d207972672766207162207a626572206273206775626672
212902e941000000e961000000e91a000000e90d000000da00630100000000000000000000000200
00000600000043000000731800000067007c005d107d017400a0017c017c01a102910271045300a9
002902da0164da036765742902da022e30da016372060000007206000000fa1a2f7573722f6c6962
2f707974686f6e332e392f746869732e7079da0a3c6c697374636f6d703e1c000000f30000000072
0c0000004e2908da01737207000000720a000000da0572616e6765da0169da03636872da05707269
6e74da046a6f696e720600000072060000007206000000720b000000da083c6d6f64756c653e0100
0000730a0000000416040108010c012402
535353535353
'''.replace('\n', '') # hexlified PYC file, padded to encodable length
eval(marshal.loads(binascii.unhexlify(actual_payload)[16:]))
```
23 changes: 23 additions & 0 deletions test/ext_01.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
x = 2

print((
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,

x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,

x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,

x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
))
39 changes: 39 additions & 0 deletions test/stager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import binascii
import marshal

actual_payload = '''
610d0d0a0000000001e20d60eb030000e30000000000000000000000000000000007000000400000
00735c00000064005a0069005a01640144005d325a0265036402830144005d245a04650565046403
1700640216006502170083016501650565046502170083013c007118710c65066404a00764056406
8400650044008301a101830101006407530029086158030000477572204d726120627320436c6775
62612c206f6c2047767a204372677265660a0a4f726e686776736879207666206f72676772652067
756e61206874796c2e0a526b637976707667207666206f72676772652067756e6120767a63797670
76672e0a46767a637972207666206f72676772652067756e612070627a6379726b2e0a50627a6379
726b207666206f72676772652067756e612070627a637976706e6772712e0a53796e67207666206f
72676772652067756e61206172666772712e0a46636e656672207666206f72676772652067756e61
2071726166722e0a45726e716e6f767976676c207062686167662e0a46637270766e7920706e6672
66206e65726127672066637270766e7920726162687475206762206f65726e782067757220656879
72662e0a4e796775626874752063656e706776706e7976676c206f726e67662063686576676c2e0a
5265656265662066756268797120617269726520636e666620667679726167796c2e0a4861797266
6620726b637976707667796c2066767972617072712e0a56612067757220736e7072206273206e7a
6f76746876676c2c20657273686672206775722067727a63676e677662612067622074687266662e
0a477572657220667562687971206f72206261722d2d206e6171206365727372656e6f796c206261
796c20626172202d2d626f6976626866206a6e6c2067622071622076672e0a4e7967756268747520
67756e67206a6e6c207a6e6c20616267206f7220626f6976626866206e6720737665666720686179
726666206c62682765722051686770752e0a41626a207666206f72676772652067756e6120617269
72652e0a4e79677562687475206172697265207666206273677261206f72676772652067756e6120
2a65767475672a2061626a2e0a56732067757220767a6379727a7261676e6776626120766620756e
657120676220726b63796e76612c2076672766206e206f6e71207671726e2e0a5673206775722076
7a6379727a7261676e6776626120766620726e666c20676220726b63796e76612c207667207a6e6c
206f72206e2074626271207671726e2e0a416e7a7266636e707266206e6572206261722075626178
766174207465726e67207671726e202d2d207972672766207162207a626572206273206775626672
212902e941000000e961000000e91a000000e90d000000da00630100000000000000000000000200
00000600000043000000731800000067007c005d107d017400a0017c017c01a102910271045300a9
002902da0164da036765742902da022e30da016372060000007206000000fa1a2f7573722f6c6962
2f707974686f6e332e392f746869732e7079da0a3c6c697374636f6d703e1c000000f30000000072
0c0000004e2908da01737207000000720a000000da0572616e6765da0169da03636872da05707269
6e74da046a6f696e720600000072060000007206000000720b000000da083c6d6f64756c653e0100
0000730a0000000416040108010c012402
535353535353
'''.replace('\n', '') # hexlified PYC file, padded to encodable length
eval(marshal.loads(binascii.unhexlify(actual_payload)[16:]))
50 changes: 43 additions & 7 deletions utfpyc.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def __init__(self, codeobj, force=False, verbose=False):
self.state = U8.ascii
self.was_extended_arg = False
self.nextcode = list(self.bcode)[1:]
self.lnotab = bytearray()
self.lineno = codeobj.co_firstlineno
self.index = 0

def maybe_insert_cont(self):
if self.was_extended_arg:
Expand Down Expand Up @@ -121,6 +124,8 @@ def process(self, x, nextx):
self.maybe_insert_start(val, x)

if self.need_ignore and opcode >= dis.HAVE_ARGUMENT:
if self.newcode[-1] is None:
self.newcode[-1] = ANY_ASCII
self.newcode.extend((dis.opmap['NOP'], None))

if self.newcode[-1:] == [None]:
Expand Down Expand Up @@ -183,10 +188,30 @@ def adjumps(self):
if x.opcode in dis.hasjrel or x.opcode in dis.hasjabs:
self.fixjump(x)

def record_lineno(self, lineno):
index = len(self.newcode)
if lineno is None or lineno == self.lineno or index == self.index:
return
lineinc = lineno - self.lineno
idxinc = index - self.index
if lineinc < 0:
# negative line number deltas result in invalid utf-8
return
while lineinc > 127:
self.lnotab.extend((0, 127))
lineinc -= 127
while idxinc > 127:
self.lnotab.extend((127, 0))
idxinc -= 127
self.lnotab.extend((idxinc, lineinc))
self.index = index
self.lineno = lineno

def transcode(self, can_recurse=False):
for x, nextx in zip_longest(self.bcode, self.nextcode,
fillvalue=empty_instr):
minoff = len(self.newcode)
self.record_lineno(x.starts_line)
if x.opcode != dis.EXTENDED_ARG:
self.process(x, nextx)
maxoff = len(self.newcode)
Expand All @@ -207,7 +232,8 @@ def transcode(self, can_recurse=False):
if self.verbose > 1:
hexdump(self.newcode)
return Transcoder(
CodeWrapper(self.codeobj, co_code=self.newcode),
CodeWrapper(self.codeobj, co_code=self.newcode,
co_lnotab=self.lnotab),
self.force,
self.verbose).transcode(can_recurse - 1)

Expand All @@ -219,7 +245,8 @@ def transcode(self, can_recurse=False):
# adjust stacksize
co_stacksize = maybe_bigger(self.codeobj.co_stacksize)

codeobj = self.codeobj.replace(co_code=newcode, co_lnotab=b'',
codeobj = self.codeobj.replace(co_code=newcode,
co_lnotab=bytes(self.lnotab),
co_stacksize=co_stacksize)
if self.verbose:
if self.verbose > 1:
Expand Down Expand Up @@ -247,10 +274,11 @@ class NorefMarshalDumper:
...: b'.',
}

def __init__(self, fp, force=False, verbose=0):
def __init__(self, fp, force=False, write_lnotab=True, verbose=0):
self.fp = fp
self.verbose = verbose
self.force = force
self.write_lnotab = write_lnotab

def u32(self, i):
self.write(struct.pack('<I', i))
Expand All @@ -277,8 +305,12 @@ def dump_int(self, i):
self.s32(i)

def dump_str(self, s):
self.fp.write(b'z')
self.u8(len(s))
if len(s) < 0x80:
self.fp.write(b'z')
self.u8(len(s))
else:
self.fp.write(b'a')
self.u32(len(s))
self.fp.write(s.encode())

def dump_bytes(self, b):
Expand Down Expand Up @@ -310,7 +342,7 @@ def dump_code(self, co):
self.dump(co.co_filename)
self.dump(co.co_name)
self.u32(co.co_firstlineno)
self.dump(co.co_lnotab)
self.dump(self.write_lnotab and co.co_lnotab or b'')


def main():
Expand All @@ -321,6 +353,9 @@ def main():
help='set alternate co_filename')
par.add_argument('--mode', default='exec', choices=['single', 'exec'],
help='set alternate compile mode')
par.add_argument('--no-lnotab', action='store_false', dest='lnotab',
help='reduce the output size by dropping '
'line number information')
par.add_argument('-v', '--verbose', default=0, action='count')
par.add_argument('-f', '--force', action='store_true',
help='force write even if UTF-8 cannot be fully acheived')
Expand All @@ -341,7 +376,8 @@ def main():
fp.seek(16)
# like marshal.dump(codeobj, fp), but no remembering and references;
# it also fixes up code whenever it can be made more UTF-8 valid
NorefMarshalDumper(fp, args.force, args.verbose).dump(codeobj)
NorefMarshalDumper(fp, args.force, args.lnotab,
args.verbose).dump(codeobj)


if __name__ == "__main__":
Expand Down

0 comments on commit cb128a8

Please sign in to comment.