运行截图: 等待页面全部加载完成后将html抓取,分析处理后得到想要的数据。 python源代码:
from os import times
from bs4 import BeautifulSoup
import urllib.error
from selenium.webdriver.common.keys import Keys # 模仿键盘,操作下拉框的
from bs4 import BeautifulSoup # 解析html的
from selenium import webdriver
import time
def main():
pa()
def pa(): # 爬取动态下滑加载网页
options = webdriver.ChromeOptions()
options.binary_location = r"D:\Program Files\Google\Chrome\Application\chrome.exe"
driver = webdriver.Chrome(chrome_options=options)
driver.get("https://food.jd.com")
js = '''
let height = 0
let interval = setInterval(() => {
window.scrollTo({
top: height,
behavior: "smooth"
});
height += 500
}, 500);
setTimeout(() => {
clearInterval(interval)
}, 7000);
'''
driver.execute_script(js)
time.sleep(8)
bs = BeautifulSoup(driver.page_source, "html.parser")
driver.close()
list = bs.select(".goods-item__title")
for i in range(len(list)):
list[i] = list[i].get_text()
print("%s\n" % list[i])
if __name__ == "__main__":
main()
仅教学用,无攻击行为或意向。
有疑问留下评论,当日必回。
感谢阅读~~~
|