get_lines added including negative indexing support on file

asaboor-gh · Dec 1, 2024 · bbb7d84 · bbb7d84
1 parent 14c88d6
commit bbb7d84
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 3 deletions.
diff --git a/ipyvasp/__init__.py b/ipyvasp/__init__.py
@@ -18,6 +18,7 @@
     "webshow",
     "load_results",
     "parse_text",
+    "get_lines",
     "summarize",
 ]
 

diff --git a/ipyvasp/_version.py b/ipyvasp/_version.py
@@ -1 +1 @@
-__version__ = "0.9.85"
+__version__ = "0.9.86"
diff --git a/ipyvasp/utils.py b/ipyvasp/utils.py
@@ -1,5 +1,6 @@
 __all__ = [
     "get_file_size",
+    "get_lines",
     "set_dir",
     "interpolate_data",
     "rolling_mean",
@@ -10,6 +11,7 @@
 
 import re
 import os
+import io
 from contextlib import contextmanager
 from pathlib import Path
 from inspect import signature, getdoc
@@ -33,6 +35,38 @@ def get_file_size(path: str):
             size /= 1024.0
     else:
         return ""
+
+def get_lines(f, indices):
+    """Read lines by indexing from an opened file pointer `f`. Negative indexing is supported to read lines from end.
+    Returns a single str of line if one integer given, otherwise a list of lines.
+    This consumes a lot less memory then indexing over `f.readlines()[index]`.
+
+    >>> with open('some_file','r') as f:
+    >>>     get_lines(f, -1) # last line
+    >>>     get_lines(f, range(5)) # first 5 lines
+    >>>     get_lines(f, range(-5,0)) # last 5 lines
+    """
+    if not isinstance(f, io.TextIOWrapper):
+        raise TypeError(f"f should be file-like object. got {type(f)}")
+
+    return_line = False
+    if isinstance(indices, int):
+        indices = [indices]
+        return_line = True
+
+    if not isinstance(indices, (tuple,list, range)):
+        raise TypeError(f"indices should int/list/tuple/range, got {type(indices)}")
+
+    f.seek(0)
+    if min(indices) < 0:
+        if not hasattr(f, '_nlines'): # do this once, assuming file is not changed while reading
+            f._nlines = sum(1 for _ in enumerate(f))
+            f.seek(0)
+
+        indices = [i + (f._nlines if i < 0 else 0) for i in indices] # make all positive
+
+    lines = [l for i, l in enumerate(f) if i in indices]
+    return lines[0] if return_line else lines
 
 
 def _sig_kwargs(from_func, skip_params=()):

diff --git a/ipyvasp/widgets.py b/ipyvasp/widgets.py
@@ -447,8 +447,10 @@ def mapf(self, func, to_df=False,mode='r', encoding=None):
         >>> import json
         >>> import ipyvasp as ipv
         >>> files = ipv.Files(...)
-        >>> files.mapf(lambda fp: json.load(fp),to_df=True) 
-        >>> files.mapf(lambda fp: [fp.readline() for _ in range(5)]) # read first five lines
+        >>> files.mapf(lambda fp: json.load(fp,cls=ipv.DecodeToNumpy),to_df=True) # or use ipv.load(path) in map
+        >>> files.mapf(lambda fp: ipv.get_lines(fp, range(5)) # read first five lines
+        >>> files.mapf(lambda fp: ipv.get_lines(fp, range(-5,0)) # read last five lines
+        >>> files.mapf(lambda fp: ipv.get_lines(fp, -1) # read last line
         """
         if not mode in 'rb':
             raise ValueError("Only 'r'/'rb' mode is allowed in this context!")