Python crawls all videos from Tiktok’s personal home page
Preparations:
- First, open the Douyin APP
- Select the anchor to be captured and enter the anchor home page
- Click on the three dots in the upper right corner
- Click Share homepage
- Click the copy link (for example: v.douyin.com/dJHQm5D/)
- Open the copied link in your browser (the address has been changed to a long address)
- Open the browser check
- Tap mobile mode
- Copy works list page address
Python to crawl all videos of Tiktok personal home page, code example:
import time
import requests
import json
from urllib import parse
import re
importOS # Douyin video URL: Request URL: path ="./fuqiu/"
if not os.path.exists(path):
os.makedirs(path)
# url = "https://www.iesdouyin.com/web/api/v2/aweme/post/?sec_uid=MS4wLjABAAAAhcyXr1LoGVFpQfCHXATmfm3bBHogPhLhSk3-Mgfp26I&count= 21&max_cursor=0&aid=1128&_signature=Oy2-RAAAWjtoNHLRaElWWDstvl&dytk="
# url = "https://www.iesdouyin.com/web/api/v2/aweme/post/?sec_uid=MS4wLjABAAAAak8ElxZLmBZk6jessKl6pA3KEpkijlAr_Wdci_9DKXg&count= 21&max_cursor=0&aid=1128&_signature=O-74MgAAWvpo9zSnIm-gXzvu-C&dytk="
# url = "https://www.iesdouyin.com/web/api/v2/aweme/post/?sec_uid=MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek&count= 21&max_cursor=0&aid=1128&_signature=OkwX9QAAW11pVdtg0uoN9zpMF-&dytk="
url = "https://www.iesdouyin.com/web/api/v2/aweme/post/?sec_uid=MS4wLjABAAAAO0MC3lWFsz8NStRyaUADb7bxsIV-Tz4q-_HEcRkFLvU&count= 21&max_cursor=0&aid=1128&_signature=PN2LqwAAXctvxEc-QBhvGjzdi7&dytk="Resource_dict = {# file_name: url} headers = {'User-Agent': "Mozilla / 5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
}
if __name__ == "__main__"R = requests. Get (url=url, headers=headers, stream=True) r = requests.200Print ()Initial Access Status:Loads (r) # data_json = json.loads(r.ext) # data_json = json.loads(r.ext) Print (data_json.keys()) has_more = data_json['has_more']
# max_cursor = data_json['max_cursor']
max_cursor = 0# Next use loops to solve the "hidden content" problem we mentioned earlier # Determine if HasMore istrueIf this is true, there are hidden contents. If this is true, there are hidden contents. If this is true, there are hidden contents. If this is true, there are hidden contents0
while has_more:
print('has_more:', has_more) url_parsed = parse.urlparse(url) # split url bits = list(url_parsed) # split URL # ['https'.'www.iesdouyin.com'.'/web/api/v2/aweme/post/'.' '.'sec_uid=MS4wLjABA # AAA0NL6UPqIabTDseE8xmFLBQPQBfdIYAF2qyWT9M2N-SHwOr5Jo9D_0BJsYfSQnAVH&count=21&max # _cursor=0&aid=1128&_signature=R6Ub1QAAJ-gQklOOeJfpTEelG8&dytk='.' ']
qs = parse.parse_qs(bits[4] # select the fourth element # {'sec_uid': ['MS4wLjABAAAA0NL6UPqIabTDseE8xmFLBQPQBfdIYAF2qyWT9M2N-SHwOr5Jo9D_0B # JsYfSQnAVH'].'count': ['21'].'max_cursor': ['0'].'aid': ['1128'].'_signature # ': ['R6Ub1QAAJ-gQklOOeJfpTEelG8']}
qs['max_cursor'] = max_cursor # replace the value of bits[4] = parse.urlenCode (qs, True) # Concatenate the replaced fields without escaping url_new = parse.urlunparse(bits) # reconcatenate the entire URL # as long as hasmore is nottrue, repeatedly visits the author's home page link until this is successfully returnedfalse
r = requests.get(url=url_new, headers=headers, stream=True)
data_json = json.loads(r.text)
has_more = data_json['has_more'] # reset hasMore until return tofalseMax_cursor = data_json['max_cursor'# print() # print()'has_more22:',has_more)
print('maxcursor22:', max_cursor)
# print('url_new:',url_new)
# print('has_more22:',len(data_json['aweme_list'#]))for i in range(len(data_json['aweme_list'])):
# print(data_json['aweme_list'][i]['video'] ['play_addr_lowbr'] ['url_list'] [0] # the main path where we want to save the video filefor i in range(len(data_json['aweme_list'])): # url_1 = data_json['aweme_list'][i]['video'] ['play_addr_lowbr'] ['url_list'] [0] # t = data_json['aweme_list'][i]['desc'[[get requests]] # requests R = requests. Get (url=url_1, headers=headers, Stream =True) print(r) print(r) reponse_body_lenth = int(r.tradespeople."Content-Length"Print () # print()"The data length of the video is :"Reponse_body_lenth) # path_1 = path + t[1:] + '.mp4'
print(f"Before path_1 = {path_1}"# RSTR = r # RSTR = r # RSTR = r"[/ \; * # $% $! @ ^ & ()...?"< > |]"# '/ \ : *? " < > |Sub (RSTR, "", path_1) # replace "" # print(f"After path_1 = {path_1}") # print(f"After path_1 = {path_1}") Write_all = 0 for chunk in r. ter_content(chunk_size=1000000): Write_all += xh.write(chunk) # print(" download progress: Print (2) total_count += 1 print(f"total_count = {total_count}") # "" "Copy the code