The target

Climb the cool dog music station song chart

The target address
The environment



The crawler code

Transferred to tool

import requests
import re
import parsel
Request the website

headers = { 'authority': '', 'cookie': 'kg_mid=ac3836df72c523f46a85d8a5fd90fe59; kg_dfid=3ve7aQ2XyGmN0yE3uv3WcaHs; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d = 1600260110160312, 707; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; kg_mid_temp=ac3836df72c523f46a85d8a5fd90fe59; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1602312738', 'referer': '', 'user-agent': 'the Mozilla / 5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36', } url = '' response = requests.get(url=url, headers=headers)Copy the code

Parsing website data

def func(url): response = requests.get(url=url, headers=headers) response.encode = response.apparent_encoding hashs = re.findall('"Hash":"(.*?) "', response.text, re.S) album_ids = re.findall('"album_id":(.*?) ,"', response.text, re.S) FileNames = re.findall('"FileName":"(.*?) "', response.text, re.S) data = zip(hashs, album_ids, FileNames) for i in data: hash = i[0] album_ids = i[1] FileName = i[2].encode('utf-8').decode('unicode_escape') # print(hash, album_ids, FileName) download_url = '' params = { 'r': 'play/getdata', 'callback': 'jQuery19107150201841602037_1602314563329', 'hash': '{}'.format(hash), 'album_id': '{}'.format(album_ids), 'dfid': '3ve7aQ2XyGmN0yE3uv3WcaHs', 'mid': 'ac3836df72c523f46a85d8a5fd90fe59', 'platid': '4', '_': '1602312793005', } for i in html_data: page_url = i[0] name = i[1] print(page_url) func(page_url) Print (" = = = = = = = = = = = = = = = = = = = = = = = = = = is climbing a song take {} = = = = = = = = = = = = = = = = = = = = = = = = '. The format (name))Copy the code

Save the data

def download(url, title): Filename = 'save address' + title + '.mp3' response = requests. Get (url=url, headers=headers) mode='wb') as f: f.write(response.content) print(title)Copy the code

Run the code and the result is shown below

Did you learn?