preface

These days have been listening to pingshu, found a lot of resources on the Himalayas, but unfortunately are paid, so I rushed a month of membership, simply write a reptile, climb down a few 10, enough for me to listen to a year

To analyze

Open the Chrome console, hit Play, and the first port you get is

Learn Python from Python 01

https://mpay.ximalaya.com/mobile/track/pay/244130607/?device=pc
Copy the code

Of course, this is a paid book, so if your browser doesn’t have a member’s cookie, you can’t access the number 244130607, which is called a trackId in their interface, and each audio file has a unique trackId

That’s the number at the end of the screen, that’s the unique trackId that gets the audio file, so look at what this interface returns. Finally, if your time is not very tight, and want to quickly improve, the most important thing is not afraid of hardship, I suggest you can contact Wei: 762459510, that is really good, many people progress quickly, need you not afraid of hardship oh! You can go to add a look at ~

{ "ret": 0,"msg": "0","trackId": 244130607, "uid": 170217760,"albumId": 30816438,"title": "" three body" in the first quarter The tenth party and big tear ", "domain" : "http://audiopay.cos.xmcdn.com", "totalLength" : 12780565, "sampleDuration" : 0," sampleLength": 0," isAuthorized": true,"apiVersion": "1.0.0"," seed": 9583,"fileId": "27 31 44 * 62 * 1 * * * 8 48 52 * * * 6 * 4 * 6 * 17 * 16 * 6 * 35 * 35 * * 43 * 25 * * 27 48 * 63 * 58 47 * 60 * 64 * 4 * 50 * * 15 * 39 49 * * * 59 2*36*48*48*16*58*18*44*2*32*12*7*52*64*51*26*29*4*22*"," buyKey": "617574686f72697a6564"," duration": 1578,"ep": "20NvOoh6T39X3qwKO4cY5g5bVhg+1nfPHIQafFTmCXihnrqF2P jczO8O0auK1KJhDrJ30XMYfKJo2uz+xgwd3rwRPi5f","highestQualityLevel": 1," downloadQualityLevel": 1," authorizedType": 1}Copy the code

Here, I am a member, so I can open this URL directly in the browser. There are only a few seed and fileId useful fields, and the path of M4A is calculated by JS encryption algorithm, and the main domain name is splicing. Then EP obtains the URL access parameter Buy_key Sign token TIMESTAMP through another encryption algorithm, and finally splice them together to form a complete AUDIO URL

Two JS encryption algorithms

After I debug I found the two encryption JS algorithm

  1. Path JS algorithm to calculate M4A:
function vt(t) { this._randomSeed = t, this.cg_hun() } vt.prototype = { cg_hun: function() { this._cgStr = ""; var t = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890" , e = t.length , n = 0; for (n = 0; n < e; n++) { var r = this.ran() * t.length , o = parseInt(r); this._cgStr += t.charAt(o), t = t.split(t.charAt(o)).join("") } }, cg_fun: function(t) { t = t.split("*"); var e = "" , n = 0; for (n = 0; n < t.length - 1; n++) e += this._cgStr.charAt(t[n]); return e }, ran: function() { this._randomSeed = (211 * this._randomSeed + 30031) % 65536; return this._randomSeed / 65536 }, }; c = function(t, e) { var n = new vt(t).cg_fun(e); return "/" === n[0] ? n : "/".concat(n)} console.log(c(9583,"27*31*44*62*1*8*6*48*52*4* 6*17*16*6*35*35*6*43*25*27*48*63*58*4*50*47*60*6 4 * 15 * * * 59 39 49 48 48 * * * 2 * 36 * 16 * 58 * 18 * 44 * 2 * 32 * * 7 * 12 * 64 * 52 51 26 29 * 4 * 22 * * * "))Copy the code

Run with Node to get the path output of M4a

/group3/M04/9E/88/wKgMbF4ejn2TfGPRAMMEFYoRHXs027.m4a
Copy the code
  1. Js algorithm to calculate URL parameters through EP:
Z = function() { throw new TypeError("Invalid attempt to destructure non-iterable instance") }J = function(t, e) { var n = [] , r = ! 0 , o = ! 1 , i = void 0; try { for (var a, u = t[Symbol.iterator](); ! (r = (a = u.next()).done) && (n.push(a.value), ! e || n.length ! == e); r = ! 0); } catch (t) { o = ! 0, i = t } finally { try { r || null == u.return || u.return() } finally { if (o) throw i } } return n } Q = function(t) { if (Array.isArray(t)) return t}tt = function(t, e) { return Q(t) || J(t, e) || Z()}function yt(t, e) { for (var n, r = [], o = 0, i = "", a = 0; 256 > a; a++) r[a] = a; for (a = 0; 256 > a; a++) o = (o + r[a] + t.charCodeAt(a % t.length)) % 256, n = r[a], r[a] = r[o], r[o] = n; for (var u = o = a = 0; u < e.length; u++) o = (o + r[a = (a + 1) % 256]) % 256, n = r[a], r[a] = r[o], r[o] = n, i += String.fromCharCode(e.charCodeAt(u) ^ r[(r[a] + r[o]) % 256]); The return I} var mt = yt (" xm, "" A [uJ = † U3af present N") gt = [19, 1, 4, 7, 30, 14, 28, 8, 24, 17, 6, 35, 34, 16, 9, 10, 13, 22, 32, 29, 31, 21, 18, 3, 2, 23, 25, 27, 11, 20, 5, 15, 12, 0, 33, 26] bt = function(t) { var e1 = yt( function(t, e) { for (var n = [], r = 0; r < t.length; r++) { for (var o = "a" <= t[r] && "z" >= t[r] ? t[r] .charCodeAt() - 97 : t[r].charCodeAt() - "0" .charCodeAt() + 26, i = 0; 36 > i; i++) if (e[i] == o) { o = i; break } n[r] = 25 < o ? String.fromCharCode(o - 26 + "0".charCodeAt()) : String.fromCharCode(o + 97) } return n.join("") }("d" + mt + "9",gt) , e2 = function(t) { if (!t) return ""; var e, n, r, o, i, a = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 1, 1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 1, 1, 1, 1, 1, 1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1]; for (o = (t = t.toString()).length, r = 0, i = ""; r < o; ) { do { e = a[255 & t.charCodeAt(r++)] } while (r < o && -1 == e);if (-1 == e) break; do { n = a[255 & t.charCodeAt(r++)] } while (r < o && -1 == n);if (-1 == n) break; i += String.fromCharCode(e << 2 | (48 & n) >> 4); do {  if (61 == (e = 255 & t.charCodeAt(r++))) return i; e = a[e] } while (r < o && -1 == e);if (-1 == e) break; i += String.fromCharCode((15 & n) << 4 | (60 & e) >> 2); do { if (61 == (n = 255 & t.charCodeAt(r++))) return i; n = a[n] } while (r < o && -1 == n);if (-1 == n) break; i += String.fromCharCode((3 & e) << 6 | n) } return i }(t) ) .split("-")console.log(e1) } var c = bt("20NvOoh6T39X3qwKO4cY5g5bVhg+1nfPHIQafFTmCXihnrqF2PjczO8O0auK1KJhDrJ30XMYfKJo2uz+xgwd3rwRPi5f")Copy the code

This section of JS is quite complicated, and I was killed when debugging. It is not in the same place, so I copied it back and forth. Finally, I sorted this algorithm into this JS file, still using Node to run, and output:

[  '617574686f72697a6564',  
'ef9a0678d77870843ef203d6333ce021',  '
5790',  '1598533668']
Copy the code

Buy_key sign token TIMESTAMP With these two JS algorithms, we can completely parse the parameters returned by this interface. Finally, if your time is not very tight, and want to quickly improve, the most important thing is not afraid of hardship, I suggest you can contact Wei: 762459510, that is really good, many people progress quickly, need you not afraid of hardship oh! You can go to add a look at ~

Python code mimics the encryption algorithm

  1. Calculate m4A path encryption algorithm
class vt(): def __init__(self,t): self._randomSeed = t self.cg_hun() def ran(self): self._randomSeed = (211 * self._randomSeed + 30031) % 65536 return self._randomSeed / 65536 def cg_hun(self): self._cgStr = "" t = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890" e = len(t) n = 0 for i in range(e): r = self.ran() * len(t) o = int(r) self._cgStr += t[o] t = "".join(t.split(t[o])) def cg_fun(self,t): t = [int(i) if i else 0 for i in t.split("*")] e = "" n = 0; for n in range(n,len(t)-1): e += self._cgStr[t[n]] return edef path_decode(seed,fileId): c = vt(seed) p = c.cg_fun(fileId) return p if __name__ == '__main__': result = path_decode(9583,"27*31*44*62*1*8*6*48*52*4*6*17*16*6 43 * 25 * 35 * 35 * 6 * * * 27 48 * 63 * 58 * 4 * 50 * 47 * 60 * 64 * 15 * 39 * * 59 49 48 48 * * * 2 * 36 * 16 * 58 * 18 * 44 * 2 * 32 * * 7 * 12 * 64 * 52 51 26 29 * 4 * 22 * * * ") print(result)Copy the code
  1. The algorithm for calculating url parameters from ep:
Def yt(t, e): r = [0 for I in range(256)] o = "for a in range(0,256): r[a] = a; For a in range (0256) : o = (o + r[a] + ord(t[a % len(t)])) % 256 n = r[a] r[a] = r[o] r[o] = n u = 0 o = 0 a = 0 for u in range(0,len(e)): a = (a + 1) % 256 o = (o + r[a]) % 256 n = r[a] r[a] = r[o] r[o] = n i += chr(ord(e[u]) ^ r[(r[a] + r[o]) % 256]) return  idef bt(t): def arg1(t,e): n = [' ' for i in range(256)] for r in range(0,len(t)): if "a" <= t[r] and "z" >= t[r]: O = ord(t[r]) -97 else: o = ord(t[r]) -ord ("0") + 26 for I in range(0,36): if (e[I] == o): o = I break if 25< o: n[r] = chr(o - 26 + ord("0")) else: n[r] = chr(o + 97) return "".join(n).strip() a1 = arg1("d" + mt + "9", gt) def arg2(t): if not t: return "" e = n = r = o = i = a = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 1, 1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 1, 1, 1, 1, 1, 1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 1, 1, 1, 1, 1]; o = len(t) i = "" r = 0 while r < o: while True: e = a[255 & ord(t[r])] r += 1 if not (r < o and -1 == e): break if (-1 == e): break while True: n = a[255 & ord(t[r])] r += 1 if not (r < o and -1 == n): break if (-1 == n): break i += chr(e << 2 | (48 & n) >> 4) while True: e = (255 & ord(t[r])) if 61 == e: return i r += 1 e = a[e] if not (r < o and -1 == e): break if (-1 == e): break i += chr((15 & n) << 4 | (60 & e) >> 2); while True: n = (255 & ord(t[r])) if (61 == n): return i r += 1 n = a[n] if not (r < o and -1 == n): break if (-1 == n): break i += chr((3 & e) << 6 | n) return i a2 = arg2(t) buy_key,sign,token,timestamp = yt(a1,a2).split('-') data = dict( Buy_key =buy_key, sign=sign, token=token, timestamp=timestamp,) return datamt = yt("xm", "A [uJ =† U3af ÷N") gt = [19, 1, 4, 7, 30, 14, 28, 8, 24, 17, 6, 35, 34, 16, 9, 10, 13, 22, 32, 29, 31, 21, 18, 3, 2, 23, 25, 27, 11, 20, 5, 15, 12, 0, 33, 26] def ep_decode(ep): data = bt(ep) return dataif __name__ == '__main__': print(ep_decode('20NvOoh6T39X3qwKO4cY5g5bVhg+1nfPHIQafFTmCXihnrqF2PjczO8O0auK1KJhDrJ30XMYfKJo2uz+xgwd3rwRPi5f'))Copy the code

This is where the interface is fully parsed.

Free Interface Analysis

If you are not a member, you can still listen to the free audio, I found a free audio interface

https://www.ximalaya.com/revision/play/v1/audio?id=324681559&ptype=1
Copy the code
{"ret": 200,"data": {"trackId": 324681559,"canPlay": true,"isPaid": false,"hasBuy": true,"src": "https://aod.cos.tx.xmcdn.com/group84/M03/4A/A6/wKg5Hl8s0cTwcp6xABQ0EbeuW5Q193.m4a","albumIsSample": false,"sampleDuration": 48,"isBaiduMusic": false,"firstPlayStatus": true,"isVipFree": false}}
Copy the code

This interface is relatively simple, the return value directly contains the M4A audio address, no encryption measures, and the number in the URL is still the trackId, it is worth mentioning that the trackId of free audio cannot be used in the paid interface, I guess it is the problem of version iteration, or the problem of different client. Because at that time, I not only analyzed the interface of the web page, but also captured the interface of the client side of the computer. I forgot whether it was the web page or the client side.

Interface to parse the entire book

The main key parameters of himalaya interface are trackId and albumId. TrackId corresponds to a unique audio, while albumId corresponds to a unique book.

https://www.ximalaya.com/revision/album/v1/getTracksList?albumId=30816438&pageNum=1&pageSize=1000
Copy the code

Xm-sign = xM-sign = xM-sign = xM-sign = xM-sign = xM-sign = xM-sign = xM-sign

import requestsimport timeimport hashlibimport randomimport jsonfrom requests.packages.urllib3. exceptions import InsecureRequestWarningrequests. Packages. Urllib3. Disable_warnings (InsecureRequestWarning) # def get sign signature get_sign(headers): serverTimeUrl = "https://www.ximalaya.com/revision/time" response = requests.get(serverTimeUrl,headers=headers,verify=False) serverTime = response.text nowTime = str(round(time.time()*1000)) sign = str(hashlib.md5("himalaya-{}" .format(serverTime).encode()).hexdigest()) + "({})" .format(str(round(random.random()*100))) + serverTime + "({})".format(str(round(random.random()*100))) + nowTime Headers ["xm-sign"] = sign return headersdef get_header(): headers = {" user-agent ": "Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit / 537.36 (KHTML, Like Gecko) Chrome/75.0.3770.90 Safari/537.36"} headers = get_sign(headers) return headersif __name__ == 'main__': # this is a search interface url = "https://www.ximalaya.com/revision/search/main?core=all&spellchecker=true& device=iPhone&kw=%E9%9B%AA%E4%B8%AD%E6%82%8D%E5%88%80%E8%A1%8C&page=1&rows=20& condition=relation&fq=&paidFilter=false" s = requests.get(url,headers=get_header(),verify=False) print(s.json())Copy the code

There are a lot of other interfaces, which I won’t bother to say because I don’t want to write, that will allow me to download the entire book

The final integration

I wrote ximalaya scan login script, because I can’t copy the cookie in the browser every time, it’s silly to rework

import requestsimport refrom threading import Threadimport timeimport requestsfrom io import BytesIOimport http.cookiejar as cookielibfrom PIL import Imageimport sysimport psutilfrom base64 import b64decodeimport osrequests.packages.urllib3.disable_warnings() class show_code(Thread): def __init__(self,data): Thread.__init__(self) self.data = data def run(self): Img = image.open (BytesIO(self.data)) # PIL image.show ()def is_login(session) Headers = {' user-agent ':"Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit / 537.36 (KHTML, Like Gecko) Chrome / 84.0.4147.89 Safari / 537.36 "} url = "https://www.ximalaya.com/revision/main/getCurrentUser" try: session.cookies.load(ignore_discard=True) except Exception: pass response = session.get(url,verify=False,headers=headers) if response.json()['ret'] == 200: print(response.json()) return session,True else: return session,Falsedef login(): if not os.path.exists(".cookie"): os.makedirs('.cookie') if not os.path.exists('.cookie/xmly.txt'): print("hello") with open(".cookie/xmly.txt",'w') as f: f.write("") session = requests.session() session.cookies = cookielib.LWPCookieJar(filename='.cookie/xmly.txt') session,status = is_login(session) if not status: url = "https://passport.ximalaya.com/web/qrCode/gen?level=L" response = session.get(url,verify=False) data = response.json() # with open('qrcode.jpg','wb') as f: # f.write(b64decode(data['img'])) t= show_code(b64decode(data['img'])) t.start() qrId = data['qrId'] url = 'https://passport.ximalaya.com/web/qrCode/check/%s/%s' % (qrId,int(time.time()*1000)) while 1: response = session.get(url,verify=False) data = response.json() # code = re.findall("window.wx_code='(.*?)'",response.text) # sys.exit() if data['ret'] == 0: # for proc in psutil.process_iter(): # If proc.name() == "microsoft.photos. # proc.kill() # except Exception as e: # print(e) break time.sleep(1) session.cookies.save() return sessionif __name__ == '__main__': login()Copy the code

A simple scan code login script, if the cookie is automatically saved into a file, the next time to use directly call:

session = login()
Copy the code

You can access various interfaces while still logged in. Finally, if your time is not very tight, and want to quickly improve, the most important thing is not afraid of hardship, I suggest you can contact Wei: 762459510, that is really good, many people progress quickly, need you not afraid of hardship oh! You can go to add a look at ~