From bbb7d8409c217ae4caeb759346e76251b8561255 Mon Sep 17 00:00:00 2001 From: Abdul Saboor Date: Sun, 1 Dec 2024 12:54:53 -0600 Subject: [PATCH] get_lines added including negative indexing support on file --- ipyvasp/__init__.py | 1 + ipyvasp/_version.py | 2 +- ipyvasp/utils.py | 34 ++++++++++++++++++++++++++++++++++ ipyvasp/widgets.py | 6 ++++-- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/ipyvasp/__init__.py b/ipyvasp/__init__.py index 3eeec3a..af54bf0 100644 --- a/ipyvasp/__init__.py +++ b/ipyvasp/__init__.py @@ -18,6 +18,7 @@ "webshow", "load_results", "parse_text", + "get_lines", "summarize", ] diff --git a/ipyvasp/_version.py b/ipyvasp/_version.py index 8fee438..a30ece3 100644 --- a/ipyvasp/_version.py +++ b/ipyvasp/_version.py @@ -1 +1 @@ -__version__ = "0.9.85" +__version__ = "0.9.86" diff --git a/ipyvasp/utils.py b/ipyvasp/utils.py index b4c2501..ac96d60 100644 --- a/ipyvasp/utils.py +++ b/ipyvasp/utils.py @@ -1,5 +1,6 @@ __all__ = [ "get_file_size", + "get_lines", "set_dir", "interpolate_data", "rolling_mean", @@ -10,6 +11,7 @@ import re import os +import io from contextlib import contextmanager from pathlib import Path from inspect import signature, getdoc @@ -33,6 +35,38 @@ def get_file_size(path: str): size /= 1024.0 else: return "" + +def get_lines(f, indices): + """Read lines by indexing from an opened file pointer `f`. Negative indexing is supported to read lines from end. + Returns a single str of line if one integer given, otherwise a list of lines. + This consumes a lot less memory then indexing over `f.readlines()[index]`. + + >>> with open('some_file','r') as f: + >>> get_lines(f, -1) # last line + >>> get_lines(f, range(5)) # first 5 lines + >>> get_lines(f, range(-5,0)) # last 5 lines + """ + if not isinstance(f, io.TextIOWrapper): + raise TypeError(f"f should be file-like object. got {type(f)}") + + return_line = False + if isinstance(indices, int): + indices = [indices] + return_line = True + + if not isinstance(indices, (tuple,list, range)): + raise TypeError(f"indices should int/list/tuple/range, got {type(indices)}") + + f.seek(0) + if min(indices) < 0: + if not hasattr(f, '_nlines'): # do this once, assuming file is not changed while reading + f._nlines = sum(1 for _ in enumerate(f)) + f.seek(0) + + indices = [i + (f._nlines if i < 0 else 0) for i in indices] # make all positive + + lines = [l for i, l in enumerate(f) if i in indices] + return lines[0] if return_line else lines def _sig_kwargs(from_func, skip_params=()): diff --git a/ipyvasp/widgets.py b/ipyvasp/widgets.py index e195b51..19b9f66 100644 --- a/ipyvasp/widgets.py +++ b/ipyvasp/widgets.py @@ -447,8 +447,10 @@ def mapf(self, func, to_df=False,mode='r', encoding=None): >>> import json >>> import ipyvasp as ipv >>> files = ipv.Files(...) - >>> files.mapf(lambda fp: json.load(fp),to_df=True) - >>> files.mapf(lambda fp: [fp.readline() for _ in range(5)]) # read first five lines + >>> files.mapf(lambda fp: json.load(fp,cls=ipv.DecodeToNumpy),to_df=True) # or use ipv.load(path) in map + >>> files.mapf(lambda fp: ipv.get_lines(fp, range(5)) # read first five lines + >>> files.mapf(lambda fp: ipv.get_lines(fp, range(-5,0)) # read last five lines + >>> files.mapf(lambda fp: ipv.get_lines(fp, -1) # read last line """ if not mode in 'rb': raise ValueError("Only 'r'/'rb' mode is allowed in this context!")