Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleaning and filtering tools #126

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
!.vscode/tasks.json
!.vscode/launch.json
*.code-workspace
.idea

### Python ###
# Byte-compiled / optimized / DLL files
Expand Down
123 changes: 118 additions & 5 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,19 @@
import logging
import os.path
import pickle
import re
import shlex
import sys

import subprocess
import sys
import os
from recuperabit import logic, utils
# scanners
from recuperabit.fs.ntfs import NTFSScanner
try:
import readline
except:
pass #readline not available

__author__ = "Andrea Lazzarotto"
__copyright__ = "(c) 2014-2021, Andrea Lazzarotto"
Expand All @@ -52,6 +60,8 @@
('other', 'List unrecoverable partitions'),
('allparts', 'List all partitions'),
('tree <part#>', 'Show contents of partition (tree)'),
('gtree <part#> <...grep options>', 'Show contents of partition (tree) in a pager, piping through grep. '
'Invalid partition id gets all partitions'),
('csv <part#> <path>', 'Save a CSV representation in a file'),
('bodyfile <part#> <path>', 'Save a body file representation in a file'),
('tikzplot <part#> [<path>]', 'Produce LaTeX code to draw a Tikz figure'),
Expand All @@ -65,13 +75,38 @@
rebuilt = set()


def output_to_pager(text, grep_opts=None):
try:
# args for lex stolen from git source, see `man less`
pager = subprocess.Popen('grep {} | less -F -R -S -X -K'
.format('".*"' if grep_opts is None else grep_opts),
stdin=subprocess.PIPE,
stdout=sys.stdout,
shell=True)
if text is None:
pager.stdin.write(bytearray("None", 'utf-8'))
return
for line in text:
pager.stdin.write(bytearray("{}{}".format(line, os.linesep), 'utf-8'))
pager.stdin.close()
pager.wait()
except KeyboardInterrupt:
pass
# let less handle this, -K will exit cleanly


def list_parts(parts, shorthands, test):
"""List partitions corresponding to test."""
for i, part in shorthands:
if test(parts[part]):
print('Partition #' + str(i), '->', parts[part])


def get_parts(parts, shorthands, test):
"""List partitions corresponding to test."""
return [i for i, part in shorthands if test(parts[part])]


def check_valid_part(num, parts, shorthands, rebuild=True):
"""Check if the required partition is valid."""
try:
Expand All @@ -92,6 +127,54 @@ def check_valid_part(num, parts, shorthands, rebuild=True):
return None


def quiet_check_valid_part(num, parts, shorthands, rebuild=True):
"""Check if the required partition is valid."""
# TODO merge this function with the one above: kwarg to remove log
try:
i = int(num)
except ValueError:
print('Value is not valid!')
return None
if i in range(len(shorthands)):
i, par = shorthands[i]
part = parts[par]
if rebuild and par not in rebuilt:
part.rebuild()
rebuilt.add(par)
return part
print('No partition with given ID!')
return None


def print_part_tree(part_id, file_filter, parts, shorthands):
part = check_valid_part(part_id, parts, shorthands)
if part is not None:
part_id = int(part_id)
root = utils.verbose_tree_folder(part_id, part.root, [])
lost = utils.verbose_tree_folder(part_id, part.lost, [])
if root:
output_to_pager(root, file_filter)
if lost:
output_to_pager(lost, file_filter)
print('-' * 10)


def print_all_parts_tree(file_filter, parts, shorthands):
l_parts = get_parts(parts, shorthands, lambda x: x.recoverable)
all_parts = filter(lambda p: p is not None, [(i, quiet_check_valid_part(i, parts, shorthands)) for i in l_parts])
output = []
for i, part in all_parts:
root = utils.verbose_tree_folder(i, part.root, [])
lost = utils.verbose_tree_folder(i, part.lost, [])
if root:
output.extend(root) # TODO: maybe just log to file and not store into memory in case it's too large
if lost:
output.extend(lost) # TODO: maybe no pager if logfile available
output.extend(['-' * 10])
#TODO: possibly filter by size as well
output_to_pager(output, file_filter)


def interpret(cmd, arguments, parts, shorthands, outdir):
"""Perform command required by user."""
if cmd == 'help':
Expand All @@ -108,6 +191,16 @@ def interpret(cmd, arguments, parts, shorthands, outdir):
print(utils.tree_folder(part.root))
print(utils.tree_folder(part.lost))
print('-'*10)
elif cmd == 'gtree':
if len(arguments) < 2:
file_filter = '".*"'
else:
file_filter = '"' + '" "'.join(arguments[1:]) + '"'
part = quiet_check_valid_part(arguments[0], parts, shorthands)
if part is not None:
print_part_tree(arguments[0], file_filter, parts, shorthands)
else:
print_all_parts_tree(file_filter, parts, shorthands)
elif cmd == 'bodyfile':
if len(arguments) != 2:
print('Wrong number of parameters!')
Expand Down Expand Up @@ -280,6 +373,12 @@ def main():
'-o', '--outputdir', type=str, help='directory for restored contents'
' and output files'
)
parser.add_argument(
'-l', '--outputlog', type=str, help='file for logs to be stored'
)
parser.add_argument(
'-n', '--skipexisting', type=str, help='do not write anew content for existing files to output dir'
)
args = parser.parse_args()

try:
Expand All @@ -297,6 +396,17 @@ def main():
'recuperabit_output')
args.outputdir = 'recuperabit_output'

if args.outputlog is None:
logging.info('No output directory specified, defaulting to '
'recuperabit_output/restore.log')
# TODO: write output from gtree to file

if args.skipexisting is None:
logic.__skip_existing_files__ = True
logging.info('No skip existing specified, defaulting to True')
else:
logic.__skip_existing_files__ = args.skipexisting != "False"

# Try to reload information from the savefile
if args.savefile is not None:
if args.overwrite:
Expand Down Expand Up @@ -327,7 +437,7 @@ def main():
# Ask for confirmation before beginning the process
try:
confirm = input('Type [Enter] to start the analysis or '
'"exit" / "quit" / "q" to quit: ')
'"exit" / "quit" / "q" to quit: ')
except EOFError:
print('')
exit(0)
Expand Down Expand Up @@ -362,12 +472,15 @@ def main():
while True:
print('\nWrite command ("help" for details):')
try:
command = input('> ').split(' ')
command = shlex.split(input('> '))
except (EOFError, KeyboardInterrupt):
print('')
exit(0)
cmd = command[0]
arguments = command[1:]
try:
cmd = command[0]
arguments = command[1:]
except IndexError:
continue
interpret(cmd, arguments, parts, shorthands, args.outputdir)

if __name__ == '__main__':
Expand Down
37 changes: 25 additions & 12 deletions recuperabit/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
import sys
import time
import types
from datetime import datetime, timezone

from .utils import tiny_repr
__skip_existing_files__ = True


class SparseList(object):
Expand Down Expand Up @@ -237,28 +239,39 @@ def recursive_restore(node, part, outputdir, make_dirs=True):
if is_directory and content is not None:
logging.warning(u'Directory %s has data content!', file_path)
restore_path += '_recuperabit_content'

failed = False
try:
if content is not None:
logging.info(u'Restoring #%s %s', node.index, file_path)
with codecs.open(restore_path, 'wb') as outfile:
if isinstance(content, types.GeneratorType):
for piece in content:
outfile.write(piece)
else:
outfile.write(content)
if not os.path.isfile(restore_path) or not __skip_existing_files__:
logging.info(u'Restoring #%s %s', node.index, file_path)
with codecs.open(restore_path, 'wb') as outfile:
if isinstance(content, types.GeneratorType):
for piece in content:
outfile.write(piece)
else:
outfile.write(content)
else:
logging.info(u'File #%s %s exists', node.index, file_path)
else:
if not is_directory:
# Empty file
open(restore_path, 'wb').close()
except IOError:
logging.error(u'IOError when trying to create %s', restore_path)
failed = True
logging.debug(u'IOError when trying to create %s', restore_path)

# Restore Modification + Access time
mtime, atime, _ = node.get_mac()
if mtime is not None:
atime = time.mktime(atime.astimezone().timetuple())
mtime = time.mktime(mtime.astimezone().timetuple())

def workaround_ltimezone_bug(l_time):
try:
return l_time.astimezone()
except ValueError:
return datetime(1901, 1, 1, 0, 0, tzinfo=timezone.utc).astimezone()

if mtime is not None and not failed:
atime = time.mktime(workaround_ltimezone_bug(atime).timetuple())
mtime = time.mktime(workaround_ltimezone_bug(mtime).timetuple())
os.utime(restore_path, (atime, mtime))

if is_directory:
Expand Down
18 changes: 18 additions & 0 deletions recuperabit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ def _file_tree_repr(node):
)


def _short_file_tree_repr(node):
"""Give a nice representation for the tree."""
if node.is_directory:
return [f'{node.name}/', None]
return [f'{node.name}', readable_bytes(node.size)]

def tree_folder(directory, padding=0):
"""Return a tree-like textual representation of a directory."""
lines = []
Expand All @@ -207,6 +213,18 @@ def tree_folder(directory, padding=0):
return '\n'.join(lines)


def verbose_tree_folder(part_id, directory, lines, prefix=""):
"""Return a tree-like textual representation of a directory."""
if len(directory.children) == 0 or not directory.is_directory:
node_name, size = _short_file_tree_repr(directory)
lines.append(f"#{part_id}: Size {size}: " + prefix + node_name)
return
for entry in directory.children:
parent_folder, _ = _short_file_tree_repr(directory)
verbose_tree_folder(part_id, entry, lines, prefix + parent_folder)
return lines


def _bodyfile_repr(node, path):
"""Return a body file line for node."""
end = '/' if node.is_directory or len(node.children) else ''
Expand Down