forked from diskoverdata/diskover-community
-
Notifications
You must be signed in to change notification settings - Fork 0
/
diskover_gource.py
executable file
·82 lines (73 loc) · 2.59 KB
/
diskover_gource.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""diskover - Elasticsearch file system crawler
diskover is a file system crawler that index's
your file metadata into Elasticsearch.
See README.md or https://github.com/shirosaidev/diskover
for more information.
Copyright (C) Chris Park 2017
diskover is released under the Apache 2.0 license. See
LICENSE for the full license text.
"""
from diskover import config
from datetime import datetime
import time
import sys
import os
def gource(es, cliargs):
"""This is the gource visualization function.
It uses the Elasticsearch scroll api to get all the data
for gource.
"""
if cliargs['gourcert']:
data = {
"sort": {
"indexing_date": {
"order": "asc"
}
}
}
elif cliargs['gourcemt']:
data = {
"sort": {
"last_modified": {
"order": "asc"
}
}
}
# refresh index
es.indices.refresh(index=cliargs['index'])
# search es and start scroll
res = es.search(index=cliargs['index'], doc_type='file', scroll='1m',
size=100, body=data, request_timeout=config['es_timeout'])
while res['hits']['hits'] and len(res['hits']['hits']) > 0:
for hit in res['hits']['hits']:
if cliargs['gourcert']:
# convert date to unix time
d = str(int(time.mktime(datetime.strptime(
hit['_source']['indexing_date'],
'%Y-%m-%dT%H:%M:%S.%f').timetuple())))
u = str(hit['_source']['worker_name'])
t = 'A'
elif cliargs['gourcemt']:
d = str(int(time.mktime(datetime.strptime(
hit['_source']['last_modified'],
'%Y-%m-%dT%H:%M:%S').timetuple())))
u = str(hit['_source']['owner'])
t = 'M'
f = os.path.join(hit['_source']['path_parent'], hit['_source']['filename'])
output = d + '|' + u + '|' + t + '|' + f
try:
# output for gource
sys.stdout.write(output + '\n')
sys.stdout.flush()
except Exception:
sys.exit(1)
if cliargs['gourcert']:
# slow down output for gource
time.sleep(config['gource_maxfilelag'])
# get es scroll id
scroll_id = res['_scroll_id']
# use es scroll api
res = es.scroll(scroll_id=scroll_id, scroll='1m',
request_timeout=config['es_timeout'])