The first instance of crawler environment was completed in Python. Today, we continue the second project: crawler all the images of a website, see a website, want to save the pictures of the local, so, look down:
Crawl target website: blog.csdn.net/qq_42363090…
1: Ensure that the Requests module is installed. If not, install the requests module using the following command:
pip install requests
Copy the code
Module Requests was installed successfully
2: Create a first.py file and write the following code
#_*_coding:utf-8_*_
import requests
import re
import os
class GetImage(object) :
def __init__(self,url) :
self.url = url
self.headers = {
'User-Agent': 'the Mozilla / 5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
}
self.dir_path = os.path.dirname(os.path.abspath(__file__))
self.path = self.dir_path+'/imgs'
isExists = os.path.exists(self.dir_path+'/imgs')
Create a directory
if not isExists:
os.makedirs(self.path)
def download(self,url) :
try:
res = requests.get(url,headers=self.headers)
return res
except Exception as E:
print(url+'Download failed, cause :'+E)
def parse(self,res) :
content = res.content.decode()
# print(content)
img_list = re.findall(r',content,re.S)
img_list = ['http://www.yangqq.com/skin/jxhx/'+url for url in img_list]
return img_list
def save(self,res_img,file_name) :
if res_img:
with open(file_name,'wb') as f:
f.write(res_img.content)
print(url+'Download successful')
def run(self) :
# download
res = self.download(self.url)
# parse
url_list = self.parse(res)
# Download image
for url in url_list:
res_img = self.download(url)
name = url.strip().split('/').pop()
file_name = self.path+'/'+name
# save
self.save(res_img,file_name)
if __name__ == '__main__':
url_list = ['https://www.yangqq.com/skin/jxhx/'.'https://www.yangqq.com/skin/jxhx/list.html'.'https://www.yangqq.com/skin/jxhx/share.html'.'https://www.yangqq.com/skin/jxhx/list2.html'.'https://www.yangqq.com/skin/jxhx/list3.html'.'https://www.yangqq.com/skin/jxhx/daohang.html'.'https://www.yangqq.com/skin/jxhx/about.html']
for url in url_list:
text = GetImage(url)
text.run()
Copy the code
3: Right click the run button on the file, and the result is as follows
Ok, finish