柳州做網(wǎng)站公司今天最新消息
從谷歌瀏覽器的開(kāi)發(fā)工具進(jìn)入
選擇圖片右鍵點(diǎn)擊檢查
之后發(fā)現(xiàn)網(wǎng)址變化的只有start數(shù)值,每次變化值為30
Python代碼
import requests
from bs4 import BeautifulSoup
import time
import os# 豆瓣影人圖片
url = 'https://movie.douban.com/celebrity/1011562/photos/'
res = requests.get(url=url, headers="").text
content = BeautifulSoup(res, "html.parser")
data = content.find_all('div', attrs={'class': 'cover'})
picture_list = []
for d in data:plist = d.find('img')['src']picture_list.append(plist)
print(picture_list)# https://movie.douban.com/celebrity/1011562/photos/?type=C&start=30&sortby=like&size=a&subtype=a
def get_poster_url(res):content = BeautifulSoup(res, "html.parser")data = content.find_all('div', attrs={'class': 'cover'})picture_list = []for d in data:plist = d.find('img')['src']picture_list.append(plist)return picture_list# XPath://*[@id="content"]/div/div[1]/ul/li[1]/div[1]/a/img
def download_picture(pic_l):if not os.path.exists(r'picture'):os.mkdir(r'picture')for i in pic_l:pic = requests.get(i)p_name = i.split('/')[7]with open('picture\\' + p_name, 'wb') as f:f.write(pic.content)def fire():page = 0for i in range(0, 450, 30):print("開(kāi)始爬取第 %s 頁(yè)" % page)url = 'https://movie.douban.com/celebrity/1011562/photos/?type=C&start={}&sortby=like&size=a&subtype=a'.format(i)res = requests.get(url=url, headers="").textdata = get_poster_url(res)download_picture(data)page += 1time.sleep(1)fire()
把爬取的圖片全部放到新建的文件夾中存放