-
Notifications
You must be signed in to change notification settings - Fork 1
/
search.py
171 lines (142 loc) · 6.76 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import sqlite3
import os
import requests
import logging
from colorlog import ColoredFormatter
from concurrent.futures import ThreadPoolExecutor, as_completed
from previews import generate_image_preview
# Logging configuration
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
formatter = ColoredFormatter(
"%(asctime)s - %(name)s - %(log_color)s%(levelname)s%(reset)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
log_colors={
'DEBUG': 'cyan',
'INFO': 'green',
'WARNING': 'yellow',
'ERROR': 'red',
'CRITICAL': 'bold_red',
}
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.addHandler(console_handler)
def local_search(search_term, node_id, conn, search_type='name', category=None):
"""
Perform a local search in the PostgreSQL database for a specific search term.
Args:
search_term (str): Term to search for, either in file names (partial) or md5_hash (exact match).
node_id (str): The ID of the current node performing the search.
search_type (str): The type of search to perform ('name' for file name, 'md5' for md5_hash).
category (str, optional): Category to filter the search results by.
Returns:
list: A list of dictionaries matching the search term and category (if specified), with 'node_id' included.
"""
matches = []
cursor = conn.cursor()
try:
logger.debug(f"Starting local search: search_term={search_term}, search_type={search_type}, category={category}")
# Create the SQL query to include download_count
query = "SELECT file_name, path, md5_hash, file_size, category, download_count FROM files WHERE"
conditions = []
if category:
conditions.append(" category = %s")
if search_type == 'name':
conditions.append(" LOWER(file_name) LIKE LOWER(%s)")
search_term = f"%{search_term}%"
elif search_type == 'md5':
conditions.append(" md5_hash = %s")
query += " AND".join(conditions)
# Execute the query
cursor.execute(query, tuple([category, search_term] if category else [search_term]))
results = cursor.fetchall()
# Determine the protocol for download URLs
if os.getenv("ENABLE_SSL") == "true" or os.getenv("ENABLE_HTTPS_REDIRECT") == "true":
protocol = "https"
else:
protocol = "http"
# Create a hidden directory for previews
shared_directory = os.getenv("SHARED_DIRECTORY")
hidden_directory = os.path.join(shared_directory, '.previews')
os.makedirs(hidden_directory, exist_ok=True) # Create the hidden directory if it doesn't exist
for row in results:
file_name = row[0]
file_path = row[1]
md5_hash = row[2]
# Generate the preview file name
preview_file_name = f"{os.path.splitext(file_name)[0]} - preview.webp"
output_file_path = os.path.join(hidden_directory, preview_file_name)
# Generate the image preview
try:
generate_image_preview(file_path, output_file_path)
except Exception as e:
logger.error(f"Failed to generate preview for {file_name}: {e}")
match = {
'file_name': file_name,
'path': file_path,
'md5_hash': md5_hash,
'file_size': row[3],
'category': row[4],
'download_count': row[5], # Added download_count to the result
'node_id': node_id,
'download_url': f"{protocol}://{node_id}/download/{md5_hash}",
'preview_url': f"{protocol}://{node_id}/previews/.previews/{preview_file_name}" # Assuming you have a way to serve these previews
}
matches.append(match)
logger.info(f"Local search completed. Found {len(matches)} matches.")
except Exception as e:
logger.error(f"Error during local search: {e}")
finally:
cursor.close()
conn.close()
return matches
def global_search(search_term, known_nodes, current_node_id, conn, search_type='name', category=None):
"""
Perform a global search across all known nodes and the local index in the PostgreSQL database.
Args:
search_term (str): Term to search for in file names or md5_hash.
known_nodes (list): List of known nodes to query for remote searches.
current_node_id (str): The ID of the current node performing the search.
search_type (str): The type of search to perform ('name' for file name, 'md5' for md5_hash).
category (str, optional): Category to filter the search results by.
Returns:
list: A combined list of dictionaries from both local and remote searches, sorted by download_count in descending order.
"""
global_matches = []
logger.debug(f"Initiating global search for term '{search_term}' on node '{current_node_id}'")
# Perform local search
local_matches = local_search(search_term, current_node_id, conn, search_type, category)
global_matches.extend(local_matches)
def remote_search(node_id):
"""Performs the remote search request."""
if node_id == current_node_id:
return [] # Skip the current node
try:
search_url = f"http://{node_id}/localsearch"
logger.debug(f"Sending remote search request to {search_url}")
response = requests.post(search_url, json={
"search_term": search_term,
"search_type": search_type,
"category": category
}, verify=False)
response.raise_for_status()
remote_matches = response.json()
for match in remote_matches:
match['node_id'] = node_id
logger.info(f"Received {len(remote_matches)} matches from node {node_id}")
return remote_matches
except requests.RequestException as e:
logger.error(f"Error during global search on node {node_id}: {e}")
return []
# Use ThreadPoolExecutor to perform remote searches concurrently
with ThreadPoolExecutor() as executor:
futures = {executor.submit(remote_search, node_id): node_id for node_id in known_nodes if node_id != current_node_id}
for future in as_completed(futures):
remote_matches = future.result()
global_matches.extend(remote_matches)
# Sort global matches by download_count in descending order
global_matches = sorted(global_matches, key=lambda x: x.get('download_count', 0), reverse=True)
logger.info(f"Global search completed. Total matches found: {len(global_matches)}")
return global_matches