-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
111 lines (94 loc) · 3.14 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# import streamlit as st
# import torch
# from langchain_ollama import OllamaLLM
# import requests
# from parse import (
# search,
# extract_body_content,
# clean_body_content,
# split_content,
# parse,
# )
# model = OllamaLLM(model="llama3.1")
# # Scrape the website
# input_prompt = st.text_area("Describe what you want to know")
# question = input_prompt.replace(" ", "+")
# content = search(f"https://www.google.com/search?q={question}")
# body_content = extract_body_content(content)
# cleaned_content = clean_body_content(body_content)
# # Store the content
# st.session_state.content = cleaned_content
# # Ask Questions
# if "content" in st.session_state:
# if st.button("Let's go"):
# if question:
# st.write("Thinking...")
# # Provide the content with Ollama
# chunks = split_content(st.session_state.content)
# parsed_result = parse(chunks, question)
# st.write(parsed_result)
import streamlit as st
import torch
from langchain_ollama import OllamaLLM
import requests
from parse import (
extract_body_content,
clean_body_content,
split_content,
parse,
)
# Tor Configuration
TOR_PROXY = "socks5h://127.0.0.1:9050"
proxies = {
"http": TOR_PROXY,
"https": TOR_PROXY
}
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
}
def check_tor():
"""Check if Tor is working properly."""
try:
response = requests.get("https://check.torproject.org", proxies=proxies, headers=headers, timeout=10)
if "Congratulations" in response.text:
return True
else:
return False
except requests.RequestException as e:
st.error(f"Error checking Tor: {e}")
return False
def search(url):
"""Make a request through Tor."""
try:
response = requests.get(url, headers=headers, proxies=proxies, timeout=10)
response.raise_for_status()
return response.text
except requests.exceptions.RequestException as e:
st.error(f"Failed to fetch data: {e}")
return None
# Initialize the LLM Model
model = OllamaLLM(model="llama3.1")
# Streamlit UI
st.title("Tor-Enabled Web Scraper")
# Check if Tor is running
if check_tor():
st.success("Tor is enabled. Your connection is anonymous!")
else:
st.warning("Tor is NOT working. Please ensure it is running.")
# User Input
input_prompt = st.text_area("Describe what you want to know")
question = input_prompt.replace(" ", "+")
if st.button("Let's go"):
st.write("Searching through Tor...")
# Avoid scraping Google directly (Use an alternative)
url = f"https://lite.duckduckgo.com/lite?q={question}"
content = search(url)
if content:
body_content = extract_body_content(content)
cleaned_content = clean_body_content(body_content)
# Store the content
st.session_state.content = cleaned_content
st.success("Content retrieved successfully!")
chunks = split_content(st.session_state.content)
parsed_result = parse(chunks, question)
st.write(parsed_result)