Python climb waterfall stream baidu picture

Python climbed to the waterfall stream Baidu pictures

import requests
from bs4 import BeautifulSoup
import re
from urllib.parse import urlencode
import json
import os

name = input("Please enter the picture you want to crawl:")
number = int(input("Please enter the number of sheets to climb:")) date={#date is some basic information of baidu picture link, which can be viewed through F12. We refresh the picture, and we can see the new webpage code, which can be extracted.
    "tn": "resultjson_com"."ipn":"rj"."ct": 201326592."fp": "result"."queryWord": "name"."cl": 2."lm": -1."ie": "utf-8"."oe": "utf-8"."word":"name"."pn": 0.'rn': 30,}def get_url(date) :
    url="https://image.baidu.com/search/acjson?"+urlencode(date) #urlencode converts date data to urls

   # print(url)# verify link generation

    return url

def get_html(url) :
    html=requests.get(url)

    #print(html.status_code)# print(html.status_code
    return html

def get_urllist(html) :
    data=html.json()["data"]                 # JSON can parse JSON format web source code, is a dictionary, can extract data, data contains Baidu pictures link information


    Print (data[1]
    return data

def get_picture(data) :
    picture_urllist=[]
    for i in range(len(data)):
        try:
            picture_urllist.append(data[i]["middleURL"])    #data is also a dictionary, many keys can be seen in the link, is a picture link, extract

        except:
            continue
    Print (picture_urllist) print(picture_urllist

    return picture_urllist

def picture_write(picture_urllist,n) :
    for i in range(len(picture_urllist)):
      # try:

        path="/home/jin/life/picture/"+name+"/"+name+str(n)+".jpg"
        picture=requests.get(picture_urllist[i])
        with open(path,"wb") as file:
            file.write(picture.content)
            n+=1
            print("Successfully climb to {} picture".format(n))
        if n>=number:
            print("Successful climb")
            exit()
    return n
        # except:
        # continue



def make_file(name) :
    path="/home/jin/life/picture/"+name

    os.makedirs(path)             #os.makedires(path) creates a folder with a path
    #print(path)# check whether the file is created

def main() :
    date["queryWord"]=name
    date["word"]=name
    n=0
    make_file(name)
    for i in range(0.10000.30):
        date["pn"]=i

        url1=get_url(date)

        html=get_html(url1)

        data=get_urllist(html)

        pictureurl=get_picture(data)
        n=picture_write(pictureurl,n)


main()
Copy the code

Python climb waterfall stream baidu picture

Related Posts

Flutter rendering process

【 practical 】 WebPack4 + EJS + Express takes you through a multi-page application project architecture

What is a JavaScript closure?