Skip to content

Commit

Permalink
wow.py now getting all text from the recursive file scan. now to impl…
Browse files Browse the repository at this point in the history
…ement grabbing a random file and some random lines from it
  • Loading branch information
grothetr committed Nov 30, 2023
1 parent 6cdf7ae commit 887489c
Showing 1 changed file with 20 additions and 7 deletions.
27 changes: 20 additions & 7 deletions wow.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#!/usr/bin/python
#Words Of Wisdom
# output some random text from my journal
# output some random text from my journals and writings

import os
import sys
import time
import filetype #for filetype (extension and mime type)
import chardet #for getting encoding
import subprocess
Expand All @@ -19,23 +21,29 @@ def addSampleFileIfTxt(filepath):
ftype = filetype.guess(filepath)
if v: print('filetype: {}'.format(str(ftype)))
if v: print('encoding: {}'.format(enc))
if ftype != None:
if ftype.extension in ['py', 'c', 'cc', 'h', 'hh', 'java']: #don't want code in the sample data
return
if ftype.extension == 'odt':
if v: print('converting odt: {}'.format(filepath))
subproc = subprocess.run(['odt2txt', filepath], encoding='utf-8', stdout=subprocess.PIPE)
total_text += str(subproc.stdout)
if enc in ['ascii','utf-8']:
if v: print(fb)
total_text += str(fb)
elif ftype.extension == 'odt':
total_text += subprocess.run(['odt2txt', filepath]).stdout
total_text += str(fb, encoding='utf-8')

else:
if v: print('{} is not txt')
else:
if v: print('not file')



#paths=['/home/thomas/doc/fiction']
paths=['/home/thomas/doc/j/', '/home/thomas/_poetry', '/home/thomas/doc/_journal_2019'] #the paths to scan recursively for files from which to grab text
samplefiles=[] #the individual files we want to grab text from

v=True #verbose

v=False #verbose
tStart = time.time()
for p in paths:
if v: print('path {}'.format(p))
if os.path.isdir(p):
Expand All @@ -46,6 +54,11 @@ def addSampleFileIfTxt(filepath):
else:
addSampleFileIfTxt(p)

#now we have all of our files of interest. so pick a random one

tEnd = time.time()
tDuration = tEnd - tStart
print('report generated in {} seconds, from paths {}'.format(tDuration, str(paths)))
print(total_text)
#for f in samplefiles:
# print(f)
Expand Down

0 comments on commit 887489c

Please sign in to comment.