ezoic

Monday, June 11, 2018

Scrapy Spider, one url, multiple request sample code


class PabhSpider(CrawlSpider):
    name = 'pabh'
    allowed_domains = ['xxx']

    def start_requests(self):
        url = 'http://xxx'
        num1 = '01'
        formdata = {
            "depart":num,
            "years":'2014'
        }
        return [FormRequest(url=url,formdata=formdata,method='get',callback=self.parse)]


    def parse(self, response):
        item = XXXItem()
        hxs = Selector(response)
        item['bh'] = hxs.xpath('/html/body/form/p/font/select[3]/option/@value').extract()
        yield item

        num = ['02','03','04','05','06','07','08','09','10','11','12','13','14','21','31','40','51','61']

        for x in  num:
            url = 'http://xxx'
            formdata={
                "depart":x,
                "years":'2014'
            }
            yield FormRequest(url=url,formdata=formdata,method='get',callback=self.parse)

No comments:

Post a Comment

R is not a simple programming language, and it does better on reading excel files than python

R is not a simple programming language, and it does better on reading excel files than python . tried to read excel files to python and R. i...