Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Blondon1 patch 3 #371

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions AUTOMATION/PDF To Text/pdfToText.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@ def convert_pdf(filename):

# If any Error is encountered, Print the Error on Screen
except Exception as e:
print(f'Error Converting PDF to Text or Saving Converted Text into .txt file: {e}')
print(f'Error: {e}')
if out_filename.exists():
out_filename.unlink()
return None


if __name__ == '__main__':
file = input('Enter Full Path and FileName: ')
convert_pdf(file)
convert_pdf(file)
71 changes: 33 additions & 38 deletions AUTOMATION/Web_Scraper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,48 @@
import requests
import openpyxl


def extract_brand_name_and_title(name):
# Split the name and return the first word as the brand name and the rest as title
brand, title = name.split(' ', 1)
return brand, title


def scrape_graphics_cards_data():
def get_page_content(url):
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.text

def parse_html(html):
soup = BeautifulSoup(html, 'html.parser')
return soup.find('div', class_='main-products product-grid').find_all(
'div', class_='product-layout has-extra-button')

def write_to_excel(cards, file_path):
excel = openpyxl.Workbook()
sheet = excel.active
sheet.title = "price"
sheet.append(['Brand', 'Name', 'Price'])

for card in cards:
name = card.find('div', class_='name').a.text
brand, title = extract_brand_name_and_title(name)
price = card.find('div', class_='price').span.text
sheet.append([brand, title, price])

with open(file_path, 'wb') as f:
excel.save(f)

def scrape_graphics_cards_data(file_path='Graphics Card.xlsx'):
try:
# Create a new Excel workbook and set up the worksheet
excel = openpyxl.Workbook()
sheet = excel.active
sheet.title = "price"
sheet.append(['Brand', 'Name', 'Price'])

url = 'https://www.techlandbd.com/pc-components/graphics-card?sort=p.price&order=ASC&fq=1&limit=100'
response = requests.get(url)
response.raise_for_status()

# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')

# Find all product cards on the webpage
cards = soup.find('div', class_='main-products product-grid').find_all(
'div', class_='product-layout has-extra-button')

for card in cards:
# Extract the product name
name = card.find('div', class_='name').a.text

# Split the name to get the brand and title
brand, title = extract_brand_name_and_title(name)

# Extract the product price
price = card.find('div', class_='price').span.text

# Print the product details and add them to the Excel sheet
print(brand, title, price)
sheet.append([brand, title, price])

# Save the Excel file
excel.save('Graphics Card.xlsx')
html = get_page_content(url)
cards = parse_html(html)
write_to_excel(cards, file_path)

except requests.RequestException as e:
print("Network error:", e)
except Exception as e:
print("An error occurred:", e)


if __name__ == "__main__":
# Call the main scraping function
scrape_graphics_cards_data()

Loading