-
Notifications
You must be signed in to change notification settings - Fork 0
/
cocktail_scraper.py
175 lines (135 loc) · 5.7 KB
/
cocktail_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# import requests
# import pandas as pd
# import io
# from PIL import Image
# import base64
# import xlsxwriter
# url = 'https://www.thecocktaildb.com/api/json/v1/1/search.php'
# # Set the first letter of the cocktail name to search for
# first_letter = 'a'
# # Set the query parameters for the API request
# params = {'f': first_letter}
# # Send the API request and store the response in a variable
# response = requests.get(url, params=params)
# # Extract the JSON data from the response
# data = response.json()
# # Create a list to store the cocktail data
# cocktails = []
# # Iterate over the cocktails in the JSON data and extract the name, image, and instructions
# for drink in data['drinks']:
# name = drink['strDrink']
# image_url = drink['strDrinkThumb']
# instructions = drink['strInstructions']
# # Load the image from the URL and save it to a BytesIO object
# image_response = requests.get(image_url)
# image_bytes = io.BytesIO(image_response.content)
# # Open the image using Pillow and resize it to fit in the Excel cell
# image = Image.open(image_bytes)
# max_size = (200, 200)
# image.thumbnail(max_size)
# # Convert the image to a base64-encoded string
# image_buf = io.BytesIO()
# image.save(image_buf, format='PNG')
# image_data = image_buf.getvalue()
# image_base64 = base64.b64encode(image_data).decode()
# # Add the cocktail data to the list of cocktails
# cocktails.append({'name': name, 'image': image_base64, 'instructions': instructions})
# # Create a DataFrame from the list of cocktails
# df = pd.DataFrame(cocktails)
# # Set up the XlsxWriter engine and worksheet
# writer = pd.ExcelWriter('cocktails.xlsx', engine='xlsxwriter')
# workbook = writer.book
# worksheet = workbook.add_worksheet('Cocktails')
# # Set the width of the columns to fit the data
# worksheet.set_column('A:A', 30)
# worksheet.set_column('B:B', 20)
# worksheet.set_column('C:C', 50)
# # Write the column headers to the worksheet
# worksheet.write('A1', 'Name')
# worksheet.write('B1', 'Image')
# worksheet.write('C1', 'Instructions')
# # Iterate over the cocktails and write the data to the worksheet
# for i, cocktail in enumerate(cocktails):
# # Write the name and instructions to the worksheet
# worksheet.write(i + 1, 0, cocktail['name'])
# worksheet.write(i + 1, 2, cocktail['instructions'])
# # Write the image to the worksheet as a base64-encoded string
# image_base64 = cocktail['image']
# image_data = base64.b64decode(image_base64)
# image_buf = io.BytesIO(image_data)
# worksheet.insert_image(i + 1, 1, 'image.png', {'image_data': image_buf})
# # Save the worksheet and close the workbook
# writer.save()
# import requests
# import pandas as pd
# url = 'https://www.thecocktaildb.com/api/json/v1/1/search.php'
# # Set the first letter of the cocktail name to search for
# first_letter = 'a'
# # Set the query parameters for the API request
# params = {'f': first_letter}
# # Send the API request and store the response in a variable
# response = requests.get(url, params=params)
# # Extract the JSON data from the response
# data = response.json()
# # Create a DataFrame from the 'drinks' list in the JSON data
# df = pd.DataFrame(data['drinks'])
# # Save the DataFrame to an Excel file
# writer = pd.ExcelWriter('cocktails.xlsx', engine='xlsxwriter')
# df.to_excel(writer, index=False)
# writer.save()
import requests
import pandas as pd
url = 'https://www.thecocktaildb.com/api/json/v1/1/search.php?s='
# params = {'s': 'a'}
# params = {'s': ''}
# loop through letters a-z
for letter in 'abcdefghijklmnopqrstuvwxyz':
params = {'s': letter}
response = requests.get(url, params=params)
data = response.json()
# iterate through alphabets a to z
# for letter in range(97, 123):
# # set the value of the s parameter for the current search
# params['s'] = chr(letter)
# # make the API request with the current search query
# response = requests.get(url, params=params)
# # process the response data as needed
# data = response.json()
# response = requests.get(url, params=params)
# data = response.json()
cocktails = []
for cocktail_data in data['drinks']:
cocktail = {
'drink': cocktail_data['strDrink'],
'DrinkAlternate': cocktail_data['strDrinkAlternate'],
'Tags': cocktail_data['strTags'],
'category': cocktail_data['strCategory'],
'IBA': cocktail_data['strIBA'],
'alcoholic': cocktail_data['strAlcoholic'],
'glass': cocktail_data['strGlass'],
'instructions': cocktail_data['strInstructions'],
'image': cocktail_data['strDrinkThumb'],
'Ingredient1': cocktail_data['strIngredient1'],
'Ingredient2': cocktail_data['strIngredient2'],
'Ingredient3': cocktail_data['strIngredient3'],
'Ingredient4': cocktail_data['strIngredient4'],
'Ingredient5': cocktail_data['strIngredient5'],
'Ingredient6': cocktail_data['strIngredient6'],
'Ingredient7': cocktail_data['strIngredient7'],
'Ingredient8': cocktail_data['strIngredient8'],
'Ingredient9': cocktail_data['strIngredient9'],
'Ingredient10': cocktail_data['strIngredient10'],
'Measure1': cocktail_data['strMeasure1'],
'Measure2': cocktail_data['strMeasure2'],
'Measure3': cocktail_data['strMeasure3'],
'Measure4': cocktail_data['strMeasure4'],
'Measure5': cocktail_data['strMeasure5'],
'Measure6': cocktail_data['strMeasure6'],
'Measure7': cocktail_data['strMeasure7'],
'Measure8': cocktail_data['strMeasure8'],
}
cocktails.append(cocktail)
df = pd.DataFrame(cocktails)
writer = pd.ExcelWriter('cocktails.xlsx', engine='xlsxwriter')
df.to_excel(writer, index=False)
writer.save()