forked from huzhicheng/BaiduMusicSpider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspiderWorker.py
53 lines (41 loc) · 1.54 KB
/
spiderWorker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#-*- coding:utf-8 -*-
__author__ = 'huzhicheng'
import threading
from threading import RLock
import urllib2
from bs4 import BeautifulSoup
import codecs
import os
root = os.path.dirname(__file__)+"/"
lock = RLock()
class worker(threading.Thread):
def __init__(self,queue,tname):
super(worker,self).__init__(name=tname)
self._queue = queue
def run(self):
while True:
lock.acquire()
mLink = self._queue.get()
lock.release()
if isinstance(mLink,unicode) and mLink=="quit":
break
downloadUrl = "http://music.baidu.com"+mLink+"/download"
req = urllib2.urlopen(downloadUrl)
soup = BeautifulSoup(req.read())
tag = soup.find("a",id="128")
if tag:
print(threading.currentThread().getName(),tag["href"])
songNameSoup = soup.find("a",class_="song-link-hook")
if songNameSoup:
name = songNameSoup.text
name = unicode(name).encode("utf-8")
singerSoup = soup.find("span",class_="author_list")
if singerSoup:
author = singerSoup["title"]
author= unicode(author).encode("utf-8")
record = "|".join(["http://music.baidu.com"+unicode(tag["href"]).encode("utf-8"),name,author])
lock.acquire()
f = codecs.open(root+"list.txt","a")
f.write(record+"\n")
f.close()
lock.release()