一、（2）selenium爬取超链接后，反爬取超链接里的内容。

本文利用一、（1）中爬取证监会新闻的超链接，对超链接里的每条新闻进行爬取，方法很简单。

完整代码如下：

# coding=utf-8
import codecs
from selenium import webdriver

#配置自己爬取内容的浏览器
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["ignore-certificate-errors"])
driver = webdriver.Chrome(chrome_options=options)

# 爬取结果保存的路径
result = codecs.open('xinwen.txt', 'w', 'utf-8')

#打开存储超链接的文本
f = codecs.open("jieguo.txt", 'r', 'utf-8')
#读取每一行超链接，目的是循环打开每个超链接
R = f.readlines()
R1 = len(R)

#为每条新闻设置标题
k = 0
while k < R1:
    S = R[k]
    print(S)
    print('Page ', k + 1, '/', R1)
    driver.get(S)
    m = k + 1
    result.write(u'Page: ' + str(m))
    result.write("\r\n")
    #try捕捉异常，定位新闻内容的Xpath。并存储到自己指定的文件中
    try:
        tt2 = driver.find_element_by_xpath('/html/body/div/div/div[5]/div')
        print(tt2.text)
        result.write(tt2.text)
        result.write("\r\n")

    except NoSuchElementException as e:

        print("done")
        result.write(u'Page:faile ' + str(m))
        result.write("\r\n")

    k = k + 1

result.close()
driver.close()
driver.quit()