Scrapy请求回调不工作
问题描述:
这是我的代码:Scrapy请求回调不工作
class AAA(scrapy.Spider):
name = 'aaa'
start_urls = [
'https://forum.lowyat.net/topic/377400/all'
]
COOKIES_ENABLED = False
count = 0
check = 0
item = AAAItem()
toDownload = []
def parse(self, response):
for sel in response.xpath('//*[@id="contentmiddle"]/div[3]/ol/li'):
self.item['name'] = sel.xpath('div/div/div[1]/p[1]/a/text()').extract()
self.item['date'] = sel.xpath('div/div/div[2]/p[4]/text()').extract()
lastUpdateDate = self.getLastUpdateDate()
date_object1 = self.convertToDate(self.item['date'][0]+"")
date_object2 = self.convertToDate(lastUpdateDate)
if date_object1 <= date_object2:
self.haha2(response)
self.stopSpider()
self.item['link'] = sel.xpath('div/div/div[4]/p[3]/a/@href').extract()
self.arrangeDownloadUrl()
yield self.item
def arrangeDownloadUrl(self):
try:
downloadUrl = "http://AAA.com"+self.item['link'][0]+""
self.toDownload.append(downloadUrl)
except IndexError:
print 'file not downloaded, link dead'
def haha2(self, response):
for i in range (len(self.toDownload)):
Request(self.toDownload[i], self.haha3)
def haha3(self, response):
print 'haha3.................................................................'
def stopSpider(self):
raise scrapy.exceptions.CloseSpider('done')
def getLastUpdateDate(self):
date = "Nov 5, 2001 - 1:06 PM"
return date
def convertToDate(self, value):
result = datetime.strptime(value, '%b %d, %Y - %I:%S %p')
return result
def convertToString(self, value):
result = value.strftime("%b %w, %Y - %I:%S %p")
return result
出于保护隐私的目的,我不得不改变页面的URL。 无论如何,问题是请求haha2
函数未能请求回调,haha3
... 它不会进入haha3函数,除非我用这样的东西调用它,像这样self.haha3(response)
...但这会打败目的因为我想打开链接并将响应作为我想要打开的链接...任何想法,我哪里出错了?
答
尝试
def haha2(self, response):
for i in range (len(self.toDownload)):
yield Request(self.toDownload[i], callback=self.haha3)
您是否尝试过'产量请求(...)'(屈服吧)在haha2? – soooooot 2014-11-11 09:39:54