From bbb7d8409c217ae4caeb759346e76251b8561255 Mon Sep 17 00:00:00 2001
From: Abdul Saboor <asaboor@udel.edu>
Date: Sun, 1 Dec 2024 12:54:53 -0600
Subject: [PATCH] get_lines added including negative indexing support on file

---
 ipyvasp/__init__.py |  1 +
 ipyvasp/_version.py |  2 +-
 ipyvasp/utils.py    | 34 ++++++++++++++++++++++++++++++++++
 ipyvasp/widgets.py  |  6 ++++--
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/ipyvasp/__init__.py b/ipyvasp/__init__.py
index 3eeec3a..af54bf0 100644
--- a/ipyvasp/__init__.py
+++ b/ipyvasp/__init__.py
@@ -18,6 +18,7 @@
     "webshow",
     "load_results",
     "parse_text",
+    "get_lines",
     "summarize",
 ]
 
diff --git a/ipyvasp/_version.py b/ipyvasp/_version.py
index 8fee438..a30ece3 100644
--- a/ipyvasp/_version.py
+++ b/ipyvasp/_version.py
@@ -1 +1 @@
-__version__ = "0.9.85"
+__version__ = "0.9.86"
diff --git a/ipyvasp/utils.py b/ipyvasp/utils.py
index b4c2501..ac96d60 100644
--- a/ipyvasp/utils.py
+++ b/ipyvasp/utils.py
@@ -1,5 +1,6 @@
 __all__ = [
     "get_file_size",
+    "get_lines",
     "set_dir",
     "interpolate_data",
     "rolling_mean",
@@ -10,6 +11,7 @@
 
 import re
 import os
+import io
 from contextlib import contextmanager
 from pathlib import Path
 from inspect import signature, getdoc
@@ -33,6 +35,38 @@ def get_file_size(path: str):
             size /= 1024.0
     else:
         return ""
+    
+def get_lines(f, indices):
+    """Read lines by indexing from an opened file pointer `f`. Negative indexing is supported to read lines from end.
+    Returns a single str of line if one integer given, otherwise a list of lines.
+    This consumes a lot less memory then indexing over `f.readlines()[index]`.
+
+    >>> with open('some_file','r') as f:
+    >>>     get_lines(f, -1) # last line
+    >>>     get_lines(f, range(5)) # first 5 lines
+    >>>     get_lines(f, range(-5,0)) # last 5 lines
+    """
+    if not isinstance(f, io.TextIOWrapper):
+        raise TypeError(f"f should be file-like object. got {type(f)}")
+    
+    return_line = False
+    if isinstance(indices, int):
+        indices = [indices]
+        return_line = True
+
+    if not isinstance(indices, (tuple,list, range)):
+        raise TypeError(f"indices should int/list/tuple/range, got {type(indices)}")
+    
+    f.seek(0)
+    if min(indices) < 0:
+        if not hasattr(f, '_nlines'): # do this once, assuming file is not changed while reading
+            f._nlines = sum(1 for _ in enumerate(f))
+            f.seek(0)
+
+        indices = [i + (f._nlines if i < 0 else 0) for i in indices] # make all positive
+    
+    lines = [l for i, l in enumerate(f) if i in indices]
+    return lines[0] if return_line else lines
 
 
 def _sig_kwargs(from_func, skip_params=()):
diff --git a/ipyvasp/widgets.py b/ipyvasp/widgets.py
index e195b51..19b9f66 100644
--- a/ipyvasp/widgets.py
+++ b/ipyvasp/widgets.py
@@ -447,8 +447,10 @@ def mapf(self, func, to_df=False,mode='r', encoding=None):
         >>> import json
         >>> import ipyvasp as ipv
         >>> files = ipv.Files(...)
-        >>> files.mapf(lambda fp: json.load(fp),to_df=True) 
-        >>> files.mapf(lambda fp: [fp.readline() for _ in range(5)]) # read first five lines
+        >>> files.mapf(lambda fp: json.load(fp,cls=ipv.DecodeToNumpy),to_df=True) # or use ipv.load(path) in map
+        >>> files.mapf(lambda fp: ipv.get_lines(fp, range(5)) # read first five lines
+        >>> files.mapf(lambda fp: ipv.get_lines(fp, range(-5,0)) # read last five lines
+        >>> files.mapf(lambda fp: ipv.get_lines(fp, -1) # read last line
         """
         if not mode in 'rb':
             raise ValueError("Only 'r'/'rb' mode is allowed in this context!")