Here is the code:
#!/usr/bin/python
# coding: utf-8
import scrapy
import time
from bs4 import BeautifulSoup
class QuotesSpider(scrapy.Spider):
name="quotes"
headers = {
'Cookie': cookie,
"User-Agent": UA,
"Referer": "http://weixin.sogou.com/weixin?type=2"
}
def start_requests(self,filename=None):
with open('your_file.txt','r') as f:
for query in f:
self.log("%s" % query)
yield scrapy.http.FormRequest(url='http://weixin.sogou.com/weixin',
formdata={'type':'2',
'ie':'utf8',
'query':query,
'tsn':'1',
'ft':'',
'et':'',
# 'sst0': str(int(time.time()*1000)),
# 'page': str(1),
'interation':'',
'wxid':'',
'usip':''},
headers=self.headers,method='get', dont_filter=True,
meta = {'dont_redirect': True, "handle_httpstatus_list" : [301, 302, 303]},
callback=self.parse)
def parse(self, response):
filename1="quotes-111.txt"
with open(filename1,"a") as k:
soup = BeautifulSoup(response.body, 'html.parser')
cc_rating_text="约".encode('utf8')
dd_rating_text="条".encode('utf8')
for row in soup.find_all('div',attrs={"class" : "mun"}):
line=row.text.strip()
tag_found = line.find(cc_rating_text)
tag_found2 = line.find(dd_rating_text)
rating = line[tag_found+1:tag_found2]
k.write(str(rating)+"\n")
self.log("Saved file %s" % filename1)
No comments:
Post a Comment