diff --git a/Image-Processing/Recognize Text From image/Recognize_text_from_image.py b/Image-Processing/Recognize Text From image/Recognize_text_from_image.py new file mode 100644 index 00000000..e7c9ab55 --- /dev/null +++ b/Image-Processing/Recognize Text From image/Recognize_text_from_image.py @@ -0,0 +1,19 @@ +from PIL import Image +import pytesseract +import cv2 +import os + +# path to the Tessaract +pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" + +image = cv2.imread("image.jpg") #read image +gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #convert image into gray colored image +filename = "{}.jpg".format(os.getpid()) +cv2.imwrite(filename, gray) #write image into file +text = pytesseract.image_to_string(Image.open(filename)) #convert image into string and store in variable text +cv2.imshow("Image", image) +cv2.imshow("Output", gray) +cv2.waitKey(1) +print("successfully found") if 'NEVER' in text else print("Text Not Found") +os.remove(filename) + diff --git a/Web-Scraping/flipcart_ratings_price_scraping/README.md b/Web-Scraping/flipcart_ratings_price_scraping/README.md new file mode 100644 index 00000000..52364bf3 --- /dev/null +++ b/Web-Scraping/flipcart_ratings_price_scraping/README.md @@ -0,0 +1,6 @@ +We can collecting various details(price,name,ratings) of e-commerse website(Flipcart) and storing it in csv file:) + +#Modules used +Selenium :This is used to automate the web application and it does support chrome,firefox,IE,safari +Pandas :Python library used for data analysis,DataFrames is two-dimensional(2-D) data structure defined in pandas which consists of rows and columns. +BeautifulSoup :Beautiful Soup is a Python library for pulling data out of HTML and XML files. It works with your favorite parser to provide idiomatic ways of navigating, searching, and modifying the parse tree. \ No newline at end of file diff --git a/Web-Scraping/flipcart_ratings_price_scraping/chromedriver.exe b/Web-Scraping/flipcart_ratings_price_scraping/chromedriver.exe new file mode 100644 index 00000000..f3e602b0 Binary files /dev/null and b/Web-Scraping/flipcart_ratings_price_scraping/chromedriver.exe differ diff --git a/Web-Scraping/flipcart_ratings_price_scraping/products.csv b/Web-Scraping/flipcart_ratings_price_scraping/products.csv new file mode 100644 index 00000000..5842c385 --- /dev/null +++ b/Web-Scraping/flipcart_ratings_price_scraping/products.csv @@ -0,0 +1,25 @@ +Product Name,Price,Rating +HP 15s Core i3 10th Gen - (4 GB/512 GB SSD/Windows 10 Home) 15s-fr1004tu Thin and Light Laptop,"₹39,990",4.4 +Asus VivoBook 14 Ryzen 5 Quad Core 3500U - (8 GB/512 GB SSD/Windows 10 Home) X412DA-EK501T Thin and Li...,"₹42,990",4.3 +Dell Vostro Core i3 10th Gen - (4 GB/256 GB SSD/Windows 10 Home) Vostro 3491 Thin and Light Laptop,"₹35,990",4.2 +Apple MacBook Air Core i3 10th Gen - (8 GB/256 GB SSD/Mac OS Catalina) MWTJ2HN/A,"₹92,990",4.7 +Acer One 14 Pentium Dual Core - (4 GB/1 TB HDD/Windows 10 Home) Z2-485 Thin and Light Laptop,"₹24,990",4.1 +Dell Vostro Core i5 10th Gen - (8 GB/1 TB HDD/256 GB SSD/Windows 10 Home) Vostro 3491 Thin and Light L...,"₹53,490",4.2 +Acer Aspire 7 Core i5 9th Gen - (8 GB/512 GB SSD/Windows 10 Home/4 GB Graphics/NVIDIA Geforce GTX 1650...,"₹58,990",4.5 +Apple MacBook Pro with Touch Bar Core i5 8th Gen - (8 GB/256 GB SSD/Mac OS Catalina) MXK62HN/A,"₹1,22,990",4.8 +Lenovo Ideapad S540 Core i5 10th Gen - (8 GB/1 TB HDD/256 GB SSD/Windows 10 Home/2 GB Graphics) S540-1...,"₹65,990",4.5 +MSI GL65 Leopard Core i7 10th Gen - (16 GB/1 TB HDD/256 GB SSD/Windows 10 Home/6 GB Graphics/NVIDIA Ge...,"₹1,19,990",4.8 +HP 14s Core i5 10th Gen - (8 GB/1 TB HDD/256 GB SSD/Windows 10 Home) 14S-ER0003TU Thin and Light Lapto...,"₹63,990",4.4 +HP Pavilion x360 Core i7 8th Gen - (8 GB/1 TB HDD/256 GB SSD/Windows 10 Home/2 GB Graphics) 14-dh0112T...,"₹79,990",4.8 +Asus VivoBook 14 Core i3 10th Gen - (4 GB/256 GB SSD/Windows 10 Home) X412FA-EK363T Thin and Light Lap...,"₹36,990",4.3 +HP Pavilion x360 Core i3 10th Gen - (8 GB/512 GB SSD/Windows 10 Home) 14-dh1178TU 2 in 1 Laptop,"₹53,990",4.3 +MSI Alpha 15 Ryzen 7 Quad Core 3750H - (16 GB/1 TB HDD/256 GB SSD/Windows 10 Home/4 GB Graphics/AMD Ra...,"₹69,990",4.7 +Asus VivoBook 14 Core i5 10th Gen - (8 GB/1 TB HDD/256 GB SSD/Windows 10 Home/2 GB Graphics) X412FJ-EK...,"₹59,990",4.4 +HP Omen Ryzen 5 Hexa Core 4600H - (8 GB/512 GB SSD/Windows 10 Home/4 GB Graphics/NVIDIA Geforce GTX 16...,"₹75,990",4.6 +Acer Aspire 7 Ryzen 7 Quad Core 3750H - (8 GB/512 GB SSD/Windows 10 Home/4 GB Graphics/NVIDIA Geforce ...,"₹59,990",4.2 +HP 15 Ryzen 3 Dual Core 3200U - (4 GB/1 TB HDD/Windows 10 Home) 15-db1069AU Laptop,"₹32,180",4.1 +Asus TUF Gaming A17 Ryzen 7 Octa Core 4800H - (16 GB/1 TB HDD/256 GB SSD/Windows 10 Home/6 GB Graphics...,"₹91,990",4.8 +Apple MacBook Air Core i5 10th Gen - (8 GB/512 GB SSD/Mac OS Catalina) MVH22HN/A,"₹1,22,990",4.5 +Lenovo Ideapad S145 Ryzen 3 Dual Core 3200U - (4 GB/1 TB HDD/Windows 10 Home) S145-15API Laptop,"₹33,490",4 +Asus ROG Strix G15 (2020) Core i7 10th Gen - (16 GB/1 TB SSD/Windows 10 Home/4 GB Graphics/NVIDIA Gefo...,"₹96,990",4.8 +Apple MacBook Air Core i3 10th Gen - (8 GB/256 GB SSD/Mac OS Catalina) MWTL2HN/A,"₹92,990",4.7 diff --git a/Web-Scraping/flipcart_ratings_price_scraping/requirements.txt b/Web-Scraping/flipcart_ratings_price_scraping/requirements.txt new file mode 100644 index 00000000..c2dda6ac --- /dev/null +++ b/Web-Scraping/flipcart_ratings_price_scraping/requirements.txt @@ -0,0 +1 @@ +selenium==1.24.2 \ No newline at end of file diff --git a/Web-Scraping/flipcart_ratings_price_scraping/scraping.py b/Web-Scraping/flipcart_ratings_price_scraping/scraping.py new file mode 100644 index 00000000..3eb6b654 --- /dev/null +++ b/Web-Scraping/flipcart_ratings_price_scraping/scraping.py @@ -0,0 +1,21 @@ +from selenium import webdriver +from bs4 import BeautifulSoup +import pandas as pd +driver = webdriver.Chrome(r"chromedriver") +products=[] +prices=[] +ratings=[] +driver.get("https://www.flipkart.com/laptops/~buyback-guarantee-on-laptops-/pr?sid=6bo%2Cb5g&uniq") +content = driver.page_source +soup = BeautifulSoup(content,'html.parser') +for a in soup.findAll('a',href=True, attrs={'class':'_31qSD5'}): + name=a.find('div', attrs={'class':'_3wU53n'}) + price=a.find('div', attrs={'class':'_1vC4OE _2rQ-NK'}) + rating=a.find('div', attrs={'class':'hGSR34'}) + products.append(name.text) + prices.append(price.text) + ratings.append(rating.text) +df = pd.DataFrame({'Product Name':products,'Price':prices,'Rating':ratings}) +df.to_csv('products.csv', index=False, encoding='utf-8') +driver.close() +