map的基本使用:
map函数一手包办了序列操作,参数传递和结果保存等一系列的操作。
from multiprocessing.dummy import Pool
poop = Pool(4) # 4代表电脑是多少核的
results = pool.map(爬取函数,网址列表)
from multiprocessing.dummy import Pool as ThreadPool
import requests
import time
kv = {'user-agent':'Mozilla/5.0'}
def getsource(url):
html = requests.get(url,headers=kv)
urls = []
for i in range(0,41):
i = i*50
newpage = 'https://tieba.baidu.com/f?kw=读书&ie=utf-8&pn=' + str(i)
urls.append(newpage)
# 单线程爬取
time1 = time.time()
for each in urls:
print(each)
getsource(each)
time2 = time.time()
print('单线程耗时: ' + str(time2-time1))
# 多线程爬取
pool = ThreadPool(8)
time3 = time.time()
results = pool.map(getsource, urls)
pool.close()
pool.join()
time4 = time.time()
print('多线程所消耗时间:' + str(time4 - time3))
