No Title

22 November 2016

Views: 181

# -*- coding: utf-8 -*-

from scrapy.spider import Spider
from scrapy.selector import Selector
from scrapy.http import Request
from projecty.items import projectyItem

class projectySpider(Spider):
name = "projecty"
allowed_domains = [
'duckduckgo.com',
]

def start_requests(self):
for keyword in self.crawler.settings['WND_SEARCH_KEYWORDS']:
yield Request(
'https://www.duckduckgo.com/html?q=%s' % keyword + " +" " &t=h_&atb=34-6__&df=m&ia=web" ,
callback=self.parse_keyword
)

def parse_keyword(self, response):
#sel = Selector(response)
#search_results = sel.xpath('//*[@id="links"]/*[contains(@class, "results_links") and position() > 1]/*[@class="links_main links_deep result__body"]).extract()
for sel in response.xpath('//*[@id="links"]/*[contains(@class, "results_links") and position() > 1]/*[@class="links_main links_deep result__body"]/*[@class="result__title"]'):
item = projectyItem()

#print "%s
" % ''.join(link.xpath('normalize-space(//*[@class="large"]//text())').extract())
#print link
#item['title'] = sel.xpath('//*[@class="result__a"]/b/text()').extract()
item['title'] = sel.xpath('//title').extract()
"projecty/spiders/search.py" 38L, 1590C

Share