diff --git a/.gitignore b/.gitignore index 2808a40..b36c0a8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ !.vscode/tasks.json !.vscode/launch.json *.code-workspace +.idea ### Python ### # Byte-compiled / optimized / DLL files diff --git a/main.py b/main.py index 578099c..ac7f6b4 100755 --- a/main.py +++ b/main.py @@ -27,11 +27,19 @@ import logging import os.path import pickle +import re +import shlex import sys - +import subprocess +import sys +import os from recuperabit import logic, utils # scanners from recuperabit.fs.ntfs import NTFSScanner +try: + import readline +except: + pass #readline not available __author__ = "Andrea Lazzarotto" __copyright__ = "(c) 2014-2021, Andrea Lazzarotto" @@ -52,6 +60,8 @@ ('other', 'List unrecoverable partitions'), ('allparts', 'List all partitions'), ('tree ', 'Show contents of partition (tree)'), + ('gtree <...grep options>', 'Show contents of partition (tree) in a pager, piping through grep. ' + 'Invalid partition id gets all partitions'), ('csv ', 'Save a CSV representation in a file'), ('bodyfile ', 'Save a body file representation in a file'), ('tikzplot []', 'Produce LaTeX code to draw a Tikz figure'), @@ -65,6 +75,26 @@ rebuilt = set() +def output_to_pager(text, grep_opts=None): + try: + # args for lex stolen from git source, see `man less` + pager = subprocess.Popen('grep {} | less -F -R -S -X -K' + .format('".*"' if grep_opts is None else grep_opts), + stdin=subprocess.PIPE, + stdout=sys.stdout, + shell=True) + if text is None: + pager.stdin.write(bytearray("None", 'utf-8')) + return + for line in text: + pager.stdin.write(bytearray("{}{}".format(line, os.linesep), 'utf-8')) + pager.stdin.close() + pager.wait() + except KeyboardInterrupt: + pass + # let less handle this, -K will exit cleanly + + def list_parts(parts, shorthands, test): """List partitions corresponding to test.""" for i, part in shorthands: @@ -72,6 +102,11 @@ def list_parts(parts, shorthands, test): print('Partition #' + str(i), '->', parts[part]) +def get_parts(parts, shorthands, test): + """List partitions corresponding to test.""" + return [i for i, part in shorthands if test(parts[part])] + + def check_valid_part(num, parts, shorthands, rebuild=True): """Check if the required partition is valid.""" try: @@ -92,6 +127,54 @@ def check_valid_part(num, parts, shorthands, rebuild=True): return None +def quiet_check_valid_part(num, parts, shorthands, rebuild=True): + """Check if the required partition is valid.""" + # TODO merge this function with the one above: kwarg to remove log + try: + i = int(num) + except ValueError: + print('Value is not valid!') + return None + if i in range(len(shorthands)): + i, par = shorthands[i] + part = parts[par] + if rebuild and par not in rebuilt: + part.rebuild() + rebuilt.add(par) + return part + print('No partition with given ID!') + return None + + +def print_part_tree(part_id, file_filter, parts, shorthands): + part = check_valid_part(part_id, parts, shorthands) + if part is not None: + part_id = int(part_id) + root = utils.verbose_tree_folder(part_id, part.root, []) + lost = utils.verbose_tree_folder(part_id, part.lost, []) + if root: + output_to_pager(root, file_filter) + if lost: + output_to_pager(lost, file_filter) + print('-' * 10) + + +def print_all_parts_tree(file_filter, parts, shorthands): + l_parts = get_parts(parts, shorthands, lambda x: x.recoverable) + all_parts = filter(lambda p: p is not None, [(i, quiet_check_valid_part(i, parts, shorthands)) for i in l_parts]) + output = [] + for i, part in all_parts: + root = utils.verbose_tree_folder(i, part.root, []) + lost = utils.verbose_tree_folder(i, part.lost, []) + if root: + output.extend(root) # TODO: maybe just log to file and not store into memory in case it's too large + if lost: + output.extend(lost) # TODO: maybe no pager if logfile available + output.extend(['-' * 10]) + #TODO: possibly filter by size as well + output_to_pager(output, file_filter) + + def interpret(cmd, arguments, parts, shorthands, outdir): """Perform command required by user.""" if cmd == 'help': @@ -108,6 +191,16 @@ def interpret(cmd, arguments, parts, shorthands, outdir): print(utils.tree_folder(part.root)) print(utils.tree_folder(part.lost)) print('-'*10) + elif cmd == 'gtree': + if len(arguments) < 2: + file_filter = '".*"' + else: + file_filter = '"' + '" "'.join(arguments[1:]) + '"' + part = quiet_check_valid_part(arguments[0], parts, shorthands) + if part is not None: + print_part_tree(arguments[0], file_filter, parts, shorthands) + else: + print_all_parts_tree(file_filter, parts, shorthands) elif cmd == 'bodyfile': if len(arguments) != 2: print('Wrong number of parameters!') @@ -280,6 +373,12 @@ def main(): '-o', '--outputdir', type=str, help='directory for restored contents' ' and output files' ) + parser.add_argument( + '-l', '--outputlog', type=str, help='file for logs to be stored' + ) + parser.add_argument( + '-n', '--skipexisting', type=str, help='do not write anew content for existing files to output dir' + ) args = parser.parse_args() try: @@ -297,6 +396,17 @@ def main(): 'recuperabit_output') args.outputdir = 'recuperabit_output' + if args.outputlog is None: + logging.info('No output directory specified, defaulting to ' + 'recuperabit_output/restore.log') + # TODO: write output from gtree to file + + if args.skipexisting is None: + logic.__skip_existing_files__ = True + logging.info('No skip existing specified, defaulting to True') + else: + logic.__skip_existing_files__ = args.skipexisting != "False" + # Try to reload information from the savefile if args.savefile is not None: if args.overwrite: @@ -327,7 +437,7 @@ def main(): # Ask for confirmation before beginning the process try: confirm = input('Type [Enter] to start the analysis or ' - '"exit" / "quit" / "q" to quit: ') + '"exit" / "quit" / "q" to quit: ') except EOFError: print('') exit(0) @@ -362,12 +472,15 @@ def main(): while True: print('\nWrite command ("help" for details):') try: - command = input('> ').split(' ') + command = shlex.split(input('> ')) except (EOFError, KeyboardInterrupt): print('') exit(0) - cmd = command[0] - arguments = command[1:] + try: + cmd = command[0] + arguments = command[1:] + except IndexError: + continue interpret(cmd, arguments, parts, shorthands, args.outputdir) if __name__ == '__main__': diff --git a/recuperabit/logic.py b/recuperabit/logic.py index a0b2411..fc45d48 100644 --- a/recuperabit/logic.py +++ b/recuperabit/logic.py @@ -27,8 +27,10 @@ import sys import time import types +from datetime import datetime, timezone from .utils import tiny_repr +__skip_existing_files__ = True class SparseList(object): @@ -237,28 +239,39 @@ def recursive_restore(node, part, outputdir, make_dirs=True): if is_directory and content is not None: logging.warning(u'Directory %s has data content!', file_path) restore_path += '_recuperabit_content' - + failed = False try: if content is not None: - logging.info(u'Restoring #%s %s', node.index, file_path) - with codecs.open(restore_path, 'wb') as outfile: - if isinstance(content, types.GeneratorType): - for piece in content: - outfile.write(piece) - else: - outfile.write(content) + if not os.path.isfile(restore_path) or not __skip_existing_files__: + logging.info(u'Restoring #%s %s', node.index, file_path) + with codecs.open(restore_path, 'wb') as outfile: + if isinstance(content, types.GeneratorType): + for piece in content: + outfile.write(piece) + else: + outfile.write(content) + else: + logging.info(u'File #%s %s exists', node.index, file_path) else: if not is_directory: # Empty file open(restore_path, 'wb').close() except IOError: - logging.error(u'IOError when trying to create %s', restore_path) + failed = True + logging.debug(u'IOError when trying to create %s', restore_path) # Restore Modification + Access time mtime, atime, _ = node.get_mac() - if mtime is not None: - atime = time.mktime(atime.astimezone().timetuple()) - mtime = time.mktime(mtime.astimezone().timetuple()) + + def workaround_ltimezone_bug(l_time): + try: + return l_time.astimezone() + except ValueError: + return datetime(1901, 1, 1, 0, 0, tzinfo=timezone.utc).astimezone() + + if mtime is not None and not failed: + atime = time.mktime(workaround_ltimezone_bug(atime).timetuple()) + mtime = time.mktime(workaround_ltimezone_bug(mtime).timetuple()) os.utime(restore_path, (atime, mtime)) if is_directory: diff --git a/recuperabit/utils.py b/recuperabit/utils.py index 3ee1424..ba0c0c2 100644 --- a/recuperabit/utils.py +++ b/recuperabit/utils.py @@ -188,6 +188,12 @@ def _file_tree_repr(node): ) +def _short_file_tree_repr(node): + """Give a nice representation for the tree.""" + if node.is_directory: + return [f'{node.name}/', None] + return [f'{node.name}', readable_bytes(node.size)] + def tree_folder(directory, padding=0): """Return a tree-like textual representation of a directory.""" lines = [] @@ -207,6 +213,18 @@ def tree_folder(directory, padding=0): return '\n'.join(lines) +def verbose_tree_folder(part_id, directory, lines, prefix=""): + """Return a tree-like textual representation of a directory.""" + if len(directory.children) == 0 or not directory.is_directory: + node_name, size = _short_file_tree_repr(directory) + lines.append(f"#{part_id}: Size {size}: " + prefix + node_name) + return + for entry in directory.children: + parent_folder, _ = _short_file_tree_repr(directory) + verbose_tree_folder(part_id, entry, lines, prefix + parent_folder) + return lines + + def _bodyfile_repr(node, path): """Return a body file line for node.""" end = '/' if node.is_directory or len(node.children) else ''