Python parsel.Selector方法代码示例 您所在的位置:网站首页 python的parsel模块 Python parsel.Selector方法代码示例

Python parsel.Selector方法代码示例

2023-07-28 13:42| 来源: 网络整理| 查看: 265

本文整理汇总了Python中parsel.Selector方法的典型用法代码示例。如果您正苦于以下问题:Python parsel.Selector方法的具体用法?Python parsel.Selector怎么用?Python parsel.Selector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在parsel的用法示例。

在下文中一共展示了parsel.Selector方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: scrape # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def scrape(): driver = webdriver.PhantomJS() driver.get('http://quotes.toscrape.com/js-onclick') while True: sel = parsel.Selector(text=driver.page_source) for quote in sel.css('div.quote'): print({ 'text': quote.css('span.text::text').extract_first(), 'author': quote.css('span small::text').extract_first(), 'tags': quote.css('div.tags a.tag::text').extract(), }) try: next_button = driver.find_element_by_css_selector('li.next > a') next_button.click() except NoSuchElementException: break 开发者ID:scrapinghub,项目名称:scrapy-training,代码行数:18,代码来源:spider_4_standalone_selenium.py 示例2: parse # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def parse(self, id): coll = MONGO_CLIENT['ppp']['proj_text'] resp = coll.find_one({'_id': id})['text'] hxs = Selector(text=resp) title_re_time = list(self.parse_value(hxs, self.re_time_x)) details = list(self.parse_value(hxs, self.details_x)) # print(details) title = title_re_time[0] re_time = title_re_time[1] keywords = ['????', '????', '?????', '????', '????', '????', '??????/??', '?????', '????'] proj_dict = {} proj_dict['????'] = title proj_dict['??????'] = re_time.replace('???????', '') for keyword in keywords: for i in range(len(details)): if details[i] == keyword: try: if details[i + 1] not in keywords: proj_dict[keyword] = details[i + 1] else: proj_dict[keyword] = 'null' except Exception as e: proj_dict[keyword] = 'null' return proj_dict # print(title, re_time, area, trade, money, state, start_time, get_form, level, man, phone) 开发者ID:HughWen,项目名称:wen_spiders,代码行数:27,代码来源:PPPSpider.py 示例3: find_matches # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def find_matches(self, sel): """ Generator to find live and upcoming matches in parsel.Selector object :returns: yields eticker.Match objects """ matches = sel.xpath("//table[@id='gb-matches']//tr") for match in matches: item = self._find_match(match) # Populate stream data if match is live if not item['time_secs']: resp = requests.get(item['url']) sel_detailed = Selector(text=resp.text) item['stream'] = sel_detailed.xpath("//div[@class='matches-streams']" "/span[.//a[re:test(text(),'english', 'i')]]" "//iframe/@src").extract_first() item['stream'] = clean_stream_url(item['stream']) yield item 开发者ID:Granitosaurus,项目名称:ggmt,代码行数:19,代码来源:matchticker.py 示例4: parse_tags # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def parse_tags(id_get): if not (tags_coll.find_one({'id': id_get})): url = API%id_get resp = requests.get(url, headers=headers, timeout=10) hxs = Selector(text=resp.text) tags=parse_value(hxs,tag_x)[0] print tags item = { 'id':id_get, 'tags':tags } save_tags(id_get, item) return item else: item = tags_coll.find_one({'id': id_get}) return item 开发者ID:HughWen,项目名称:database_project,代码行数:18,代码来源:getContent.py 示例5: parse_words # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def parse_words(self, url): resp0 = self.p_get(url) hxs = Selector(text=resp0.text) word_list = self.parse_value(hxs, self.words_x) return list(word_list) 开发者ID:HughWen,项目名称:wen_spiders,代码行数:7,代码来源:WordsSpider.py 示例6: parse # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def parse(self): self.sublist_url.append(self.base_url) # build the sublist_url print('building the sublist_url...') resp = self.p_get(self.base_url) hxs = Selector(text=resp.text) self.sublist_url += self.parse_value(hxs, self.sublist_x) # build the page_url print('building the page_url...') for url in self.sublist_url: time.sleep(1) resp0 = self.p_get(url) hxs = Selector(text=resp0.text) inner_list = self.parse_value(hxs, self.page_x) for inner in inner_list: if inner not in self.page_url: self.page_url.append(inner) # parse the word and write to file print('parsing the words and writing to file...') with open('oxford_words.txt', 'w') as words_f: # get the words of sublist_url for url in self.page_url: word_list = self.parse_words(url) print(url) print(str(len(word_list)) + 'words') for word in word_list: words_f.write(word + '\n') # get the words of page_url for url in self.sublist_url: word_list = self.parse_words(url) print(url) print(str(len(word_list)) + 'words') for word in word_list: words_f.write(word + '\n') 开发者ID:HughWen,项目名称:wen_spiders,代码行数:40,代码来源:WordsSpider.py 示例7: select # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def select(self, css): """ Select ops from the graph using css-like selectors. The available selectors and corresponding op attributes are: - element: Op type - id: Op name - class: Op label - attribute: Any key-value pair from op metadata - hierarchy: Scopes provide op hierarchy Arguments: css (str): A css selector string Returns: list of ops Examples: # Get all ops with the "bias" label subgraph.select(".bias") # Get the op named "conv_filter' subgraph.select("#conv_filter") # Get the "bias" ops within Affine layers subgraph.select("Affine .bias") # Get all TensorValueOps subgraph.select("TensorValueOp") # Get all ops from timestep 3 in an RNN (ie with metadata "recurrent_step=3") subgraph.select("[recurrent_step=3]") """ ops = list() for selected in parsel.Selector(self._to_xml()).css(css): op = self._selector_to_op(selected) if op is not None: ops.append(op) return ops 开发者ID:NervanaSystems,项目名称:ngraph,代码行数:42,代码来源:graph.py 示例8: download_matches # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def download_matches(self): """ Downloads live and upcoming matches. :return: list of eticker.Match objects """ resp = requests.get(self.game_url) if resp.status_code != 200: raise ConnectionRefusedError('Got response error {}'.format(resp.status_code)) sel = Selector(text=resp.text) return self.find_matches(sel) 开发者ID:Granitosaurus,项目名称:ggmt,代码行数:12,代码来源:matchticker.py 示例9: download_history # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def download_history(self): """ Downloads recent matches. :return: list of eticker.Match objects """ resp = requests.get('{}/gosubet'.format(self.game_url)) if resp.status_code != 200: raise ConnectionRefusedError('Got response error {}'.format(resp.status_code)) sel = Selector(text=resp.text) return self.find_history(sel) 开发者ID:Granitosaurus,项目名称:ggmt,代码行数:12,代码来源:matchticker.py 示例10: find_history # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def find_history(self, sel): """ Generator to find recent matches in parsel.Selector object :returns: yields eticker.Match objects """ matches = sel.xpath("//h2[contains(text(),'Recent')]/..//tr") for match in matches: item = self._find_match(match) yield item 开发者ID:Granitosaurus,项目名称:ggmt,代码行数:11,代码来源:matchticker.py 示例11: _test_match # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def _test_match(self, game): gt = GosuTicker(game) data = pkg_resources.resource_string('tests', f'/html/match_{game}.html').decode('utf-8') result = pkg_resources.resource_string('tests', f'/html/match_{game}.json').decode('utf-8') sel = Selector(text=data) matches = list(gt.find_matches(sel)) assert json.dumps(matches) == result 开发者ID:Granitosaurus,项目名称:ggmt,代码行数:9,代码来源:test_matchticker.py 示例12: get_version # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def get_version(cls, entry, *matchers): body = entry['response']['content']['text'] selector = Selector(text=body) for xpath, regexp in matchers: value = selector.xpath(xpath).extract_first() if not value: continue version = extract_version(value, regexp) if version: return version 开发者ID:alertot,项目名称:detectem,代码行数:14,代码来源:matchers.py 示例13: check_presence # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def check_presence(cls, entry, *matchers): body = entry['response']['content']['text'] selector = Selector(text=body) for xpath in matchers: sel = selector.xpath(xpath) if sel: return True return False 开发者ID:alertot,项目名称:detectem,代码行数:12,代码来源:matchers.py 示例14: get_module_name # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def get_module_name(cls, entry, *matchers): body = entry['response']['content']['text'] selector = Selector(text=body) for xpath, regexp in matchers: value = selector.xpath(xpath).extract_first() if not value: continue name = extract_name(value, regexp) if name: return name 开发者ID:alertot,项目名称:detectem,代码行数:14,代码来源:matchers.py 示例15: __init__ # 需要导入模块: import parsel [as 别名] # 或者: from parsel import Selector [as 别名] def __init__(self, text, type='html'): self.sel = Selector(text, type=type) self.set = set() 开发者ID:lymlhhj123,项目名称:scrapy_redis_splash_spider,代码行数:5,代码来源:items.py

注:本文中的parsel.Selector方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。



【本文地址】

公司简介

联系我们

今日新闻

    推荐新闻

    专题文章
      CopyRight 2018-2019 实验室设备网 版权所有