larymak · blondon1 · Jan 29, 2024 · Jan 30, 2024
diff --git a/AUTOMATION/PDF To Text/pdfToText.py b/AUTOMATION/PDF To Text/pdfToText.py
@@ -25,10 +25,12 @@ def convert_pdf(filename):
 
     # If any Error is encountered, Print the Error on Screen
     except Exception as e:
-        print(f'Error Converting PDF to Text or Saving Converted Text into .txt file: {e}')
+        print(f'Error: {e}')
+        if out_filename.exists():
+            out_filename.unlink()
         return None
 
 
 if __name__ == '__main__':
     file = input('Enter Full Path and FileName: ')
-    convert_pdf(file)
+    convert_pdf(file)
diff --git a/AUTOMATION/Web_Scraper/app.py b/AUTOMATION/Web_Scraper/app.py
@@ -2,53 +2,48 @@
 import requests
 import openpyxl
 
-
 def extract_brand_name_and_title(name):
-    # Split the name and return the first word as the brand name and the rest as title
     brand, title = name.split(' ', 1)
     return brand, title
 
-
-def scrape_graphics_cards_data():
+def get_page_content(url):
+    headers = {'User-Agent': 'Mozilla/5.0'}
+    response = requests.get(url, headers=headers)
+    response.raise_for_status()
+    return response.text
+
+def parse_html(html):
+    soup = BeautifulSoup(html, 'html.parser')
+    return soup.find('div', class_='main-products product-grid').find_all(
+        'div', class_='product-layout has-extra-button')
+
+def write_to_excel(cards, file_path):
+    excel = openpyxl.Workbook()
+    sheet = excel.active
+    sheet.title = "price"
+    sheet.append(['Brand', 'Name', 'Price'])
+
+    for card in cards:
+        name = card.find('div', class_='name').a.text
+        brand, title = extract_brand_name_and_title(name)
+        price = card.find('div', class_='price').span.text
+        sheet.append([brand, title, price])
+
+    with open(file_path, 'wb') as f:
+        excel.save(f)
+
+def scrape_graphics_cards_data(file_path='Graphics Card.xlsx'):
     try:
-        # Create a new Excel workbook and set up the worksheet
-        excel = openpyxl.Workbook()
-        sheet = excel.active
-        sheet.title = "price"
-        sheet.append(['Brand', 'Name', 'Price'])
-
         url = 'https://www.techlandbd.com/pc-components/graphics-card?sort=p.price&order=ASC&fq=1&limit=100'
-        response = requests.get(url)
-        response.raise_for_status()
-
-        # Parse the HTML content
-        soup = BeautifulSoup(response.text, 'html.parser')
-
-        # Find all product cards on the webpage
-        cards = soup.find('div', class_='main-products product-grid').find_all(
-            'div', class_='product-layout has-extra-button')
-
-        for card in cards:
-            # Extract the product name
-            name = card.find('div', class_='name').a.text
-
-            # Split the name to get the brand and title
-            brand, title = extract_brand_name_and_title(name)
-
-            # Extract the product price
-            price = card.find('div', class_='price').span.text
-
-            # Print the product details and add them to the Excel sheet
-            print(brand, title, price)
-            sheet.append([brand, title, price])
-
-        # Save the Excel file
-        excel.save('Graphics Card.xlsx')
+        html = get_page_content(url)
+        cards = parse_html(html)
+        write_to_excel(cards, file_path)
 
+    except requests.RequestException as e:
+        print("Network error:", e)
     except Exception as e:
         print("An error occurred:", e)
 
-
 if __name__ == "__main__":
-    # Call the main scraping function
     scrape_graphics_cards_data()
+