-
Notifications
You must be signed in to change notification settings - Fork 2
/
backup_all_es71.py
executable file
·83 lines (67 loc) · 2.95 KB
/
backup_all_es71.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
import os, sys, requests, json, types, argparse, bz2, shutil
import elasticsearch
from elasticsearch.exceptions import NotFoundError, RequestError, ElasticsearchException
from hysds.es_util import get_mozart_es, get_grq_es
def backup(component, backup_root, only_index=None):
"""Recurse over all indices at the ElasticSearch URL and backup indices."""
# get ES object
es = get_mozart_es() if component == 'mozart' else get_grq_es()
# save backup root
if os.path.isdir(backup_root):
saved = backup_root + ".bak"
if os.path.isdir(saved):
shutil.rmtree(saved)
shutil.move(backup_root, saved)
os.makedirs(backup_root)
# get all indices
c = elasticsearch.client.IndicesClient(es.es)
indices = sorted(c.get_alias().keys())
# Only use indices that have the only_index prefix
if only_index is not None:
print("Filtering indices with prefix %s" % only_index)
indices = [idx for idx in indices if idx.startswith(only_index)]
print("Backing up only indices: %s" % indices)
# loop over each index and save settings, mapping, and docs
for idx in indices:
if idx == "geonames":
continue
print("Backup up %s..." % idx)
d = os.path.join(backup_root, idx)
if not os.path.isdir(d):
os.makedirs(d)
# save settings
settings = c.get_settings(idx)
s = os.path.join(d, "%s.settings" % idx)
with open(s, "w") as f:
json.dump(settings, f, indent=2, sort_keys=True)
print("Backed up settings for %s" % idx)
# save mapping
mapping = c.get_mapping(idx)
m = os.path.join(d, "%s.mapping" % idx)
with open(m, "w") as f:
json.dump(mapping, f, indent=2, sort_keys=True)
print("Backed up mapping for %s" % idx)
# save docs
query = {"query": {"match_all": {}}}
txt = os.path.join(d, "%s.docs" % idx)
with open(txt, "w") as f:
for hit in es.query(body=query, index=idx):
f.write("%s\n" % json.dumps(hit["_source"]))
# b = os.path.join(d, '%s.docs.bz2' % idx)
# with bz2.BZ2File(b, 'w') as f:
# for doc in docs:
# f.write("%s\n" % json.dumps(doc))
print("Backed up docs for %s" % idx)
def main():
parser = argparse.ArgumentParser(description="Backup all ElasticSearch indexes.")
parser.add_argument("component", choices=['mozart', 'grq'])
parser.add_argument("directory", help="backup directory location")
# Add optional argument named --only-index
parser.add_argument("--only-index", help="backup only the specified index. \
This argument will act as a prefix of the index name. For example, if the argument is grq_abc, \
it will match grq_abc_2024.08, grq_abc_2024.09, and so on..")
args = parser.parse_args()
backup(args.component, args.directory, args.only_index)
if __name__ == "__main__":
main()