Skip to content

Commit

Permalink
Make extract_text_from_files executable
Browse files Browse the repository at this point in the history
  • Loading branch information
ashariyar committed Jun 20, 2023
1 parent a63cfd7 commit 688f85d
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 35 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# NEXT RELEASE
* Actually make `extract_text_from_files` executable

# 1.8.0
* Add a script to
* Add a script to extract files
* More default sort rules

### 1.7.1
* Handle 0 byte PDF error
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ pipx install clown_sort[gui]
![](doc/manual_select_box.png)


## One Offs
There is a script you can use to extract text from a single file (or a bunch of files, or all the files in a given directory). Just run:
## One Off Extractions
Sometimes you just want to see the extracted text from a PDF or image and aren't trying to sort anything. There is a convenience script ou can use to extract text from a single file, multiple files, or all the files in a given directory. Just run `extract_text_from_files MY_FILE` to extract a single file. As an example for extracting multiple files and/or directories:

```
scripts/extract_text_from_files.py MY_FILE1 MY_FILE2 SOME_DIR3
extract_text_from_files MY_FILE1 MY_FILE2 SOME_DIR3
```

This will parse and display the text in `MY_FILE1`, `MY_FILE2`, and all the files in `SOME_DIR3`.
Expand Down
31 changes: 30 additions & 1 deletion clown_sort/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
from glob import glob
from os import environ, getcwd, path
from pathlib import Path
Expand Down Expand Up @@ -40,7 +41,34 @@ def sort_screenshots():
file_to_sort.sort_file()


def set_screenshot_timestamps():
def extract_text_from_files() -> None:
"""
Extract text from a single file or from all files in a given directory. Can accept
multiple paths as arguments on the command line.
TODO: replace with an option parser
"""
console.line()

if len(sys.argv) <= 1:
print("Provide at least one filename to extract.")
sys.exit()

files_to_process = []

for file_path in sys.argv[1:]:
if Path(file_path).is_dir():
files_to_process.extend(files_in_dir(file_path))
else:
files_to_process.append(file_path)

for file_path in files_to_process:
build_sortable_file(file_path).print_extracted_text()
console.line(2)


def set_screenshot_timestamps_from_filenames():
"""Parse the filenames to reset the file creation timestamps."""
Config.configure()

for image in screenshot_paths(Config.screenshots_dir):
Expand Down Expand Up @@ -86,6 +114,7 @@ def screenshot_paths(dir: Path) -> List[SortableFile]:


def build_sortable_file(file_path: Union[str, Path]) -> SortableFile:
"""Decide if it's a PDF, image, or other type of file."""
if is_image(file_path):
return ImageFile(file_path)
elif is_pdf(file_path):
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,5 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
sort_screenshots = 'clown_sort:sort_screenshots'
set_screenshot_timestamps = 'clown_sort:set_screenshot_timestamps'
set_screenshot_timestamps_from_filenames = 'clown_sort:set_screenshot_timestamps_from_filenames'
extract_text_from_files = 'clown_sort:extract_text_from_files'
29 changes: 0 additions & 29 deletions scripts/extract_text_from_files.py

This file was deleted.

0 comments on commit 688f85d

Please sign in to comment.