diff --git a/.vs/Avito/v15/.suo b/.vs/Avito/v15/.suo new file mode 100644 index 0000000..ef8b240 Binary files /dev/null and b/.vs/Avito/v15/.suo differ diff --git a/.vs/slnx.sqlite b/.vs/slnx.sqlite new file mode 100644 index 0000000..cff8af0 Binary files /dev/null and b/.vs/slnx.sqlite differ diff --git a/Avito/Category.py b/Avito/Category.py index 6748682..31a64e1 100644 --- a/Avito/Category.py +++ b/Avito/Category.py @@ -4,7 +4,7 @@ from Avito.categories import categories -def getListCategories(html): +def get_list_categories(html): """ Функция возвращает список категорий. """ soup = BeautifulSoup(html, 'lxml') all_li = soup.find('nav', class_='category-map').find_all('li', class_='category-map-item') @@ -18,10 +18,10 @@ def getListCategories(html): return links -def getDictCategories(ip_list): +def get_dict_categories(ip_list): """ Функция возвращает словарь с категориями. """ dict_categories = {} - links = getListCategories(Request('https://www.avito.ru/', ip_list).getHtml()) + links = get_list_categories(Request('https://www.avito.ru/', ip_list).getHtml()) # Добавляем в словарь категории. Ключём является название категории на русском языке, # значение является название категории на английском языке diff --git a/Avito/District.py b/Avito/District.py index 6e3b224..7fe001c 100644 --- a/Avito/District.py +++ b/Avito/District.py @@ -3,7 +3,7 @@ from Avito.Request import Request -def getData(html): +def get_data(html): data = {} soup = BeautifulSoup(html, "lxml") tds = soup.find("optgroup", label="район").find_all("option") @@ -14,12 +14,12 @@ def getData(html): return data -def getDistrict(city, ip): +def get_district(city, ip): """ Функция возвращает словарь с районами city. Ключём является название района на русском, значение является название района на английском. """ url = "https://avito.ru/{}".format(city) - district = getData(Request(url, ip).getHtml()) + district = get_data(Request(url, ip).getHtml()) print(district) return district diff --git a/Avito/Goods.py b/Avito/Goods.py index 63c70e7..a508f55 100644 --- a/Avito/Goods.py +++ b/Avito/Goods.py @@ -5,7 +5,7 @@ links = [] -def getLinksGoods(html): +def get_links_goods(html): soup = BeautifulSoup(html, "lxml") tds = soup.find_all("div", class_="item item_table clearfix js-catalog-item-enum js-item-extended item_table_extended snippet-experiment item_hide-elements") for td in tds: @@ -23,11 +23,11 @@ def getLinksGoods(html): return links -def getGoods(urls, ip): +def get_goods(urls, ip): """ Функция возвращает список ссылок товаров. """ for url in urls: html = Request(url, ip).getHtml() - getLinksGoods(html) + get_links_goods(html) return links diff --git a/Avito/Links.py b/Avito/Links.py index 26af1c8..9c8bc71 100644 --- a/Avito/Links.py +++ b/Avito/Links.py @@ -5,7 +5,7 @@ urls = [] -def getLastPage(html): +def get_last_page(html): try: soup = BeautifulSoup(html, "lxml") pages = soup.find("div", class_="pagination-pages clearfix").find_all("a", class_="pagination-page") @@ -15,10 +15,10 @@ def getLastPage(html): return 1 -def getLinks(city, categories, subcategories, district, ip, qq): +def get_links(city, categories, subcategories, district, ip, qq): for subcategory in subcategories: url = "https://avito.ru/{}/{}/{}?p={}&{}={}".format(city, categories, subcategory, 1, qq, district) - n = int(getLastPage(Request(url, ip).getHtml())) + n = int(get_last_page(Request(url, ip).getHtml())) print(n) for page in range(1, n + 1): url = "https://avito.ru/{}/{}/{}?p={}&{}={}".format(city, categories, subcategory, page, qq, district) @@ -27,11 +27,11 @@ def getLinks(city, categories, subcategories, district, ip, qq): print(url) -def linksToProductPages(city, categories, subcategories, districts, ip, qq): +def links_to_product_pages(city, categories, subcategories, districts, ip, qq): """ Функция возвращает список ссылок страниц с товарими. """ if type(districts) == dict: for district in districts: - getLinks(city, categories, subcategories, districts[district], ip, qq) + get_links(city, categories, subcategories, districts[district], ip, qq) else: - getLinks(city, categories, subcategories, districts, ip, qq) + get_links(city, categories, subcategories, districts, ip, qq) return urls diff --git a/Avito/Metro.py b/Avito/Metro.py index b187c2e..9043ef3 100644 --- a/Avito/Metro.py +++ b/Avito/Metro.py @@ -6,7 +6,7 @@ "Санкт-перербург": "spb"} -def getData(html, label, tag, attribute): +def get_data(html, label, tag, attribute): data = {} soup = BeautifulSoup(html, "lxml") if label: @@ -21,14 +21,14 @@ def getData(html, label, tag, attribute): return data -def getMetro(city, ip): +def get_metro(city, ip): if city in city_with_metro: url = "https://www.avito.ru/s/avito/components/metro-map/svg-maps/metro-map-{}.svg".format( city_with_metro[city]) html = Request(url, ip).forMetro() - name = getData(html, False, "text", "data-st-id") + name = get_data(html, False, "text", "data-st-id") return name else: html = Request("https://www.avito.ru/{}".format(city), ip).getHtml() - name = getData(html, True, "option", "value") + name = get_data(html, True, "option", "value") return name diff --git a/Avito/Number.py b/Avito/Number.py index 5ef6413..31410d8 100644 --- a/Avito/Number.py +++ b/Avito/Number.py @@ -21,21 +21,21 @@ def main(self): break except: continue - name = self.getName() - self.buttonClick() - image = self.getImage() + name = self.get_name() + self.button_click() + image = self.get_image() self.write(image) - number = self.getNamber() + number = self.get_namber() data = "{} - {}".format(name, str(number)) print(data) all_number.append(data) - def getName(self): + def get_name(self): """ Метод возвращает название товара. """ name = self.driver.find_element_by_xpath('//span[@class="title-info-title-text"]').text return name - def buttonClick(self): + def button_click(self): """ Метод кликает на кнопку для получения номера телефона. """ while True: try: @@ -46,7 +46,7 @@ def buttonClick(self): except: continue - def getImage(self): + def get_image(self): """ Метод возвращает изображение номера телефона в байтах. """ while True: try: @@ -63,7 +63,7 @@ def write(self, img): with open(self.name, "wb") as f: f.write(img) - def getNamber(self): + def get_namber(self): """ Метод возвращает номер телефона. """ tessdata_dir_config = '--tessdata-dir "C:\Tesseract-OCR"' image = Image.open(self.name) diff --git a/Avito/NumberTelephone.py b/Avito/NumberTelephone.py index 4403164..ba7b7c8 100644 --- a/Avito/NumberTelephone.py +++ b/Avito/NumberTelephone.py @@ -5,7 +5,7 @@ from Avito.Number import NumberTelephone, all_number -def getNumberTelephone(urls, n): +def get_number_telephone(urls, n): """ Функция возвращает список номеров телефона продавцов. """ try: rmtree("AvitoIMG") @@ -23,11 +23,11 @@ def getNumberTelephone(urls, n): break for list in lists: - r = threading.Thread(target=Threading, args=(lists[lists.index(list)], lists.index(list))) + r = threading.Thread(target=threading, args=(lists[lists.index(list)], lists.index(list))) r.start() -def Threading(urls, index): +def threading(urls, index): for url in urls: number = NumberTelephone(url, urls.index(url), index) number.main() diff --git a/Avito/Proxies.py b/Avito/Proxies.py index ae442a1..bf2470f 100644 --- a/Avito/Proxies.py +++ b/Avito/Proxies.py @@ -5,12 +5,12 @@ proxy_list = [] -def getHtml(url): +def get_html(url): r = requests.get(url) return r.text -def listIP(html): +def list_iP(html): soup = BeautifulSoup(html, "lxml") trs = soup.find("tbody").find_all("tr") for tr in trs: @@ -21,10 +21,10 @@ def listIP(html): return proxy_list -def getProxy(): +def get_proxy(): """ Функция возвращает список https прокси. """ url = "https://www.sslproxies.org/" - proxy_list = listIP(getHtml(url)) + proxy_list = list_iP(get_html(url)) return proxy_list diff --git a/Avito/Request.py b/Avito/Request.py index bc86e15..1688165 100644 --- a/Avito/Request.py +++ b/Avito/Request.py @@ -10,7 +10,7 @@ def __init__(self, url, list_ip): self.headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'} - def getHtml(self, returned=None): + def get_html(self, returned=None): for ip in self.list_ip: try: html = self.response(ip) @@ -26,7 +26,7 @@ def response(self, ip): if len(r.text) > 80000: return r.text - def forMetro(self): + def for_metro(self): for ip in self.list_ip: try: r = get(self.url, proxies={"https": ip}, headers=self.headers, timeout=5) diff --git a/Avito/Subcategories.py b/Avito/Subcategories.py index e1f10d2..2e839a1 100644 --- a/Avito/Subcategories.py +++ b/Avito/Subcategories.py @@ -3,7 +3,7 @@ from Avito.Request import Request -def getData(html): +def get_data(html): links = [] soup = BeautifulSoup(html, "lxml") tds = soup.find("div", class_="catalog-counts__section").find_all("li") @@ -13,12 +13,12 @@ def getData(html): return links -def getSubcategories(category, ip): +def get_subcategories(category, ip): """ Функция возвращает список подкатегорий category. """ url = "https://avito.ru/rossiya/{}".format(category) while True: try: - subcategories = getData(Request(url, ip).getHtml()) + subcategories = get_data(Request(url, ip).getHtml()) print(subcategories) return subcategories except: diff --git a/Avito/__main__.py b/Avito/__main__.py index 6c2acb2..a424568 100644 --- a/Avito/__main__.py +++ b/Avito/__main__.py @@ -1,23 +1,23 @@ # coding=utf-8 -from Avito.District import getDistrict +from Avito.District import get_district from Avito.City import cities -from Avito.Links import linksToProductPages -from Avito.Goods import getGoods -from Avito.Category import getDictCategories -from Avito.NumberTelephone import getNumberTelephone -from Avito.Subcategories import getSubcategories +from Avito.Links import links_to_product_pages +from Avito.Goods import get_goods +from Avito.Category import get_dict_categories +from Avito.NumberTelephone import get_number_telephone +from Avito.Subcategories import get_subcategories from Avito.Metro import getMetro -from Avito.Proxies import getProxy +from Avito.Proxies import get_proxy def main(): - proxy_list = getProxy() + proxy_list = get_proxy() - categories = getDictCategories(proxy_list) + categories = get_dict_categories(proxy_list) city = cities["Ижевск"] - district = getDistrict(city, proxy_list) + district = get_district(city, proxy_list) # Если в city метро, а не районы, то # metro = getMetro(city, proxy_list) @@ -25,16 +25,16 @@ def main(): category = categories["Велосипеды"] - subcategories = getSubcategories(category, proxy_list) + subcategories = get_subcategories(category, proxy_list) - links_to_product_pages = linksToProductPages(city, category, subcategories, district['Октябрьский'], proxy_list, "district") + links_to_product_pages = links_to_product_pages(city, category, subcategories, district['Октябрьский'], proxy_list, "district") - goods = getGoods(links_to_product_pages, proxy_list) + goods = get_goods(links_to_product_pages, proxy_list) # Колличество потоков n = 5 - numbers = getNumberTelephone(goods, n) + numbers = get_number_telephone(goods, n) if __name__ == '__main__': diff --git a/Test/NumberTest.py b/Test/NumberTest.py index ee3dc55..0be3231 100644 --- a/Test/NumberTest.py +++ b/Test/NumberTest.py @@ -1,10 +1,10 @@ # coding=utf-8 -from Avito.Goods import getGoods -from Avito.NumberTelephone import getNumberTelephone -from Avito.Proxies import getProxy +from Avito.Goods import get_goods +from Avito.NumberTelephone import get_number_telephone +from Avito.Proxies import get_proxy -proxy_list = getProxy() -goods = getGoods(["https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=4&district=164", +proxy_list = get_proxy() +goods = get_goods(["https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=4&district=164", "https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=3&district=164", "https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=2&district=164", "https://avito.ru/izhevsk/velosipedy/zapchasti_i_aksessuary?p=1&district=164"], proxy_list) @@ -12,4 +12,4 @@ # Колличество потоков n = 5 -numbers = getNumberTelephone(goods, n) +numbers = get_number_telephone(goods, n)