Skip to content

Commit

Permalink
quick and dirty regex based solution to multi-line histories and a he…
Browse files Browse the repository at this point in the history
…lper zsh script for searching for history files in backups and merging everything into a hist file with unique items
  • Loading branch information
zalmoxis committed Feb 16, 2021
1 parent c3fd9f1 commit 6f37781
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 24 deletions.
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,36 @@
*Thanks to [jcsalterego](https://github.com/jcsalterego) for the idea and who's already implemented the same thing for [bash](https://github.com/jcsalterego/historian)*

### About this fork

- intended to search backups for histories and merge everything into 1 unique commands history for quick documentation
- multi line commands work
- use a new backup db when backing up to insure you don't pollute yours with broken commands
- remove the hist file you wanna restore to before writing to it to insure uniqueness
- maybe add some of these to your zshrc to further insure uniqueness (past 100k things can get quite slow so try to keep your total hist size below 50k)

```
#set history size
export HISTSIZE=100000 # this must come after sourcing oh-my-zsh!!!
#save history after logout
export SAVEHIST=1000000 # this must come after sourcing oh-my-zsh!!!
#history file
export HISTFILE=$HOME/.zsh_history
#append into history file
setopt INC_APPEND_HISTORY
#save only one command if 2 common are same and consistent
setopt HIST_IGNORE_DUPS
#add timestamp for each entry
setopt EXTENDED_HISTORY
setopt HIST_EXPIRE_DUPS_FIRST # Expire duplicate entries first when trimming history.
setopt HIST_IGNORE_DUPS # Don't record an entry that was just recorded again.
setopt HIST_IGNORE_ALL_DUPS # Delete old recorded entry if new entry is a duplicate.
setopt HIST_FIND_NO_DUPS # Do not display a line previously found.
setopt HIST_IGNORE_SPACE # Don't record an entry starting with a space.
setopt HIST_SAVE_NO_DUPS # Don't write duplicate entries in the history file.
setopt HIST_REDUCE_BLANKS # Remove superfluous blanks before recording entry.
```

### Backup and Restore ZSH history

- Simple python script that can backup and restore your zsh history file to a sqlite db
Expand Down
65 changes: 41 additions & 24 deletions src/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import os
import sqlite3
import argparse
import re

home_folder = os.environ['HOME']
default_db_location = '/'.join([home_folder, '.zsh_hist_backup.db'])
default_hist_location = '/'.join([home_folder, '.zsh_history'])
#default_db_location = '/'.join([home_folder, '.zsh_hist_backup.db'])
default_db_location = '/'.join(['/tmp/', '.zsh_hist_backup.db'])
#default_hist_location = '/'.join([home_folder, '.zsh_history'])
default_hist_location = '/'.join(['/tmp/', '.zsh_history'])
default_table_name = 'CMD_HISTORY'


Expand All @@ -28,6 +31,8 @@ def init_db(db_name=default_db_location):
conn.close()


# BACKUP could pollute DB if bad hist file passed
# TODO - first backup db file to backups dir (run from wrapper script)
def backup(history_path=None, db_name='zsh_history.db'):
"""Backup zsh history to a sqlite db."""
if (history_path is None):
Expand All @@ -37,21 +42,34 @@ def backup(history_path=None, db_name='zsh_history.db'):
print("Invalid path to zsh history:" + history_path)
exit(-1)
cmd_dict = {}
with open(history_path, "r", encoding="ISO-8859-1") as f:
for line in f.readlines():
line = line.rstrip('\n\t')
arr = line.split(';')
metadata = arr[0]
cmd = arr[1] if len(arr) > 1 else ""
# Handle empty lines
if cmd != "":
try:
timestamp = int(metadata.split(': ')[1].split(':')[0])
cmd_dict[cmd] = (line, timestamp)
except:
# if a cmd can't be parsed ignore it
pass

command_pattern = re.compile(': [0-9]{10,11}:[0-9]+;.+?(?=: [0-9]{10,11}:[0-9]+;|$)', re.MULTILINE|re.DOTALL)

with open(history_path, "r", encoding="ISO-8859-1") as f:
commands = command_pattern.findall(str(f.read()))
print(len(commands))
print(commands[0][0:200])
# exit()

for command_entry in commands:
print("COMMAND: %s" % command_entry)
arr = command_entry.split(';')
metadata = arr[0]
# remove metadata section for the "command" string
cmd = command_entry[(len(metadata)+1):] if len(arr) > 1 else ""
# Handle empty lines
if cmd != "":
print("COMMAND: %s" % cmd)
print("END============")
try:
timestamp = int(metadata.split(': ')[1].split(':')[0])
# this keps a single occurence of the full multi-line command at the most recently noticed timestamp
# TODO - check if command exists and if timestamp is newer - we want to record the most recent time we ran this command
cmd_dict[cmd] = (command_entry, timestamp)
except:
# if a cmd can't be parsed ignore it
pass

rows = []
for cmd, (line, timestamp) in cmd_dict.items():
rows = rows + [(cmd, line, timestamp)]
Expand All @@ -64,6 +82,9 @@ def backup(history_path=None, db_name='zsh_history.db'):
conn.close()


# RESTORE overwrites the file
# TODO - first copy file to another location (or do this from sh wrapper script)
# TODO - first run backup to temp database (or do this from sh wrapper script)
def restore(history_path=None, db_name=None, max_lines=None):
"""Append history from a sqlite db to the given history file."""
"""Creates the file if it doesn't exist"""
Expand All @@ -74,14 +95,6 @@ def restore(history_path=None, db_name=None, max_lines=None):

cmd_dict = {}
prev_file_lines = []
if os.path.isfile(history_path):
with open(history_path) as history_file:
for line in history_file:
line = line.rstrip('\n\t')
arr = line.split(';')
cmd = arr[1] if len(arr) > 1 else ""
cmd_dict[cmd] = line
prev_file_lines += [line + '\n']

conn = sqlite3.connect(db_name)
cursor = conn.cursor()
Expand All @@ -90,9 +103,13 @@ def restore(history_path=None, db_name=None, max_lines=None):
prev_history = cursor.fetchall()
new_lines = -1
if (max_lines is not None):
max_lines=int(max_lines)
new_lines = max_lines - len(prev_file_lines)
file_lines = []
for cmd, line, timestamp in prev_history:
# print("COMMAND: %s" % cmd)
print("COMMAND ENTRY: %s" % line)
print("END==============")
if new_lines != -1 and len(file_lines) > new_lines:
break
if cmd not in cmd_dict:
Expand Down
54 changes: 54 additions & 0 deletions src/histories.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/zsh

hist_type='zsh'
#hist_type='bash'

output_name=zsh-hist-rebuilt

# copy said history files to local place preserving timestamps and the like
target_dir=~/backup/histories/pulled/$hist_type/
echo $target_dir
#exit
mkdir -p $target_dir

function search_drives_for_histfiles () {
#search drives for history files
volumes=()
for volume in $volumes; do
sudo find /media/$username/volume/ -iname '.zsh_history' > ~/backup/histories/
done
}

function copy_histfiles_from_drives () {
for hist_file in $(cat "$hist_type"_history_files*); do
echo processing $hist_file
new_hist_file="$(echo $hist_file | sed 's/\//!/g')"
echo new hist file $new_hist_file
sudo cp -a "$hist_file" $target_dir"$new_hist_file"
done
}

function load_histfiles_to_db () {
# TODO first backup db file to backups dir in case histfiles pollute DB with broken
# ie. single line instead of multiline commands or other corruptions

cd $target_dir

for hist_file in $(ls *"$hist_type"_history); do
echo backing up to db: $hist_file
head ./"$hist_file"
# TODO add interactive accepting of this file
zsh-hist.py -p ./"$hist_file" -d $target_dir/$output_name.db -m 10000000 -b
done
}

function regenerate_histfile_from_db () {
# TODO - first copy target histfile to another location if it exists
# TODO - first run backup to temp database (in /tmp)
rm /tmp/$output_name
zsh-hist.py -p /tmp/$output_name -d $target_dir/$output_name.db -m 10000000 -r

}

load_histfiles_to_db
#regenerate_histfile_from_db

0 comments on commit 6f37781

Please sign in to comment.