vs做網(wǎng)站通過e瀏覽器馮耀宗seo課程
文章目錄
- 一、窗口切換實戰(zhàn)
- 二、京東數(shù)據(jù)抓取
一、窗口切換實戰(zhàn)
案例實戰(zhàn):使用selenium實現(xiàn)打開百度和騰訊兩個窗口并切換
知識點:用到selenium中execute_script()執(zhí)行js代碼及switch_to.window()方法
全部代碼如下:
import time
import warnings
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Optionswarnings.filterwarnings('ignore')
# 創(chuàng)建ChromeOptions對象
chrome_options = Options()
# 添加啟動參數(shù),禁用瀏覽器自動化控制提示
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])driver = webdriver.Chrome(chrome_options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument",{"source": " Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) "}
)
driver.maximize_window()
# 請求第1個網(wǎng)址:driver.get()
driver.get('https://www.baidu.com')
time.sleep(3)# 請求第2個網(wǎng)址:JS代碼
js_code = 'window.open("https://www.qq.com")'
driver.execute_script(js_code)# 獲取窗口
window_list = driver.window_handles# 切換窗口到百度
driver.switch_to.window(window_list[0])
driver.find_element(by=By.ID, value='kw').send_keys('風景')
driver.find_element(by=By.ID, value='su').click()
time.sleep(2)# 切換窗口到QQ
driver.switch_to.window(window_list[1])
driver.find_element(by=By.XPATH, value='//*[@id="qqhome-top-header"]/div/div/div[2]/div/input').send_keys('Python')
driver.find_element(by=By.XPATH, value='//*[@id="qqhome-top-header"]/div/div/div[2]/div/button/span').click()
input()
二、京東數(shù)據(jù)抓取
案例實戰(zhàn):使用selenium實現(xiàn)打開京東搜索商品并滑動頁面獲取數(shù)據(jù)
知識點:用到selenium中execute_script()執(zhí)行js代碼
全部代碼如下:
import time
import warnings
from lxml import etree
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Optionskeyword = '手機'
warnings.filterwarnings('ignore')
# 創(chuàng)建ChromeOptions對象
chrome_options = Options()
# 添加啟動參數(shù),禁用瀏覽器自動化控制提示
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])driver = webdriver.Chrome(chrome_options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument",{"source": " Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) "}
)
driver.maximize_window()def requests_url():# 訪問京東首頁start_url = r'https://www.jd.com/'driver.get(start_url)input_data()def input_data():# 輸入數(shù)據(jù)driver.find_element(by=By.ID, value='key').send_keys(keyword)time.sleep(2)driver.find_element(by=By.CLASS_NAME, value='button').click()time.sleep(8)Down_Scroll()def Down_Scroll():# 控制鼠標滑到底部for i in range(1, 11):js_code = 'scrollTo(0, {})'.format(i * 600)driver.execute_script(js_code)time.sleep(1)time.sleep(3)get_goods_info()def get_goods_info():# 解析響應(yīng)response = driver.page_sourcehtml_xpath = etree.HTML(response)li_list = html_xpath.xpath('//div[@id="J_goodsList"]/ul/li')for li in li_list:# 1、商品標題goods_name_1 = li.xpath(r'.//div[@class="p-name p-name-type-2"]/a/em/text()')goods_name_2 = li.xpath(r'.//div[@class="p-name p-name-type-2"]/a/@title')goods_name = goods_name_1 if goods_name_1 != [] else goods_name_2goods_name = ''.join(goods_name).replace('\n', '').replace(' ', '').replace(' ', '')# 2、賣家名字sale_name = li.xpath(r'.//a[@class="curr-shop hd-shopname"]/@title')sale_name = ''.join(sale_name)# 3、商品價格goods_price = li.xpath(r'.//div/div[2]/strong/i/text()')goods_price = ''.join(goods_price)if goods_name != '':print(goods_name, sale_name, goods_price, sep=' | ')def main():requests_url()input()if __name__ == '__main__':main()