The target site
‘aHR0cHM6Ly93d3cueW91emhpY2FpLmNvbS9udjEvMDEwMTAxMDAwMTAxMDEwMS5odG1s’
Analysis of the
Open the console packet capture, the first status code 203, will generate a cookie, the second access will bring the cookie
Knowing that it is cookie encryption, use hook to locate the encryption location
Object.defineProperty(document, "cookie", { get: function(val){ return val; }, set: function(val){ debugger; return val; }})Copy the code
Locate the location and click on the upper stack
The parameter B is the value we want
Black box call
Copy the entire JS code and run it under the proxy to see what environment it needs
For those unfamiliar with proxies, see these articles
zhuanlan.zhihu.com/p/30299114
www.cnblogs.com/tugenhua070…
zhuanlan.zhihu.com/p/60791215
Proxy I’ll put it at the end
And then print the value of b
Because the JS code is loaded dynamically, we need to replace each returned JS code to get the value of B
Take the environment out of the way
Go straight to code
import requests import execjs class spider: def __init__(self): self.session = requests.Session() self.headers = { 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'sec-ch-ua': '"Chromium"; v="94", "Google Chrome"; v="94", "; Not A Brand"; v="99"', 'sec-ch-ua-mobile': '? 0', 'sec-CH-UA-platform ': '"Windows",' upgrade-insecure -Requests': '1', 'user-agent ': 'Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml; Q = 0.9, image/avif, image/webp image/apng, * / *; Q = 0.8, application/signed - exchange; v=b3; Q = 0.9 ', 'the Sec - Fetch - Site' : 'none', 'the Sec - Fetch - Mode' : 'navigate', 'the Sec - Fetch - User' : '? 1', 'Sec-Fetch-Dest': 'document', 'Accept-Language': 'zh-CN,zh; } # js environment self.js_env = """ var CryptoJS = require("crypto "); Let navigator = {userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/92.0.4515.131 Safari/537.36", Platform: "Win32", appCodeName: "Mozilla", language: "zh-CN", WebDriver: false, cookieEnabled: true, appVersion: "5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",}; Object.defineProperties(navigator,{ [Symbol.toStringTag]: { value:"Navigator" } }) location = { pathname: 'www.youzhicai.com', href: 'https://www.youzhicai.com/nv1/0101010001010101.html', host: 'www.youzhicai.com', reload: function (){}, } document = {} window = { navigator: navigator, location: location, document: document, }; """ # call cookie self.get_cookie = """ function getcookie(){return b} """ def run(self): response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers) js_code = response.text.replace("<script>", "").replace("</script>", Js_env + js_code + self.get_cookie context = execjs.compile(all_code) # execute function cookie = Context. call("getcookie") print(" first cookie: Add_dict_to_cookiejar (self.session.cookies, {" spVRscode ": cookie}) response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers) print(response.text) if __name__ == '__main__': spider = spider() spider.run()Copy the code
Successfully get the page
The proxy agent is
let rawindexof = String.prototype.indexOf
String.prototype.indexOf = function (str) {
var res = rawindexof.call(this, str)
console.log(`[String] "${this}" is indexof "${str}", res is ${res}`)
return res
}
let mynavigator = {
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
platform: "Win32",
appCodeName: "Mozilla",
language: "zh-CN",
webdriver: false,
cookieEnabled: true,
appVersion: "5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
};
Object.defineProperties(mynavigator, {
[Symbol.toStringTag]: {
value: "Navigator"
}
})
let myhistory = {
};
let myscreen = {
height: 768,
width: 1366,
colorDepth: 24,
};
let mylocation = {
pathname: 'www.youzhicai.com',
href: 'https://www.youzhicai.com/nv1/0101010001010101.html',
host: 'www.youzhicai.com',
};
let Document = function Document(){}
let HTMLDocument = function HTMLDocument(){}
Object.setPrototypeOf(HTMLDocument, Document.prototype)
Object.defineProperties(HTMLDocument.prototype, {
[Symbol.toStringTag]: {
value: "HTMLDocument"
}
})
let mydocument = {
createElement: function (){
return {};
},
getElementsByTagName: function (str){
console.log(str)
if(str == "meta"){
let metaRes = []
metaRes["meta-pro"] = {
"content": {
"length": 6
}
}
return metaRes
}
},
};
// 为document指向原型
Object.setPrototypeOf(mydocument, HTMLDocument.prototype)
let Image = function (){}
let mywindow = {
XMLHttpRequest: function () {},
sessionStorage: {},
localStorage: {},
navigator: mynavigator,
scrollTo: function (){},
addEventListener: function () {},
attachEvent: function () {},
screen: myscreen,
location: mylocation,
chrome: {},
document: mydocument,
history: myhistory
};
Object.defineProperties(global, {
[Symbol.toStringTag]: {
value: "Window"
}
})
let rawstringify = JSON.stringify;
JSON.stringify = function (Object) {
if ((Object?.value ?? Object) === global) {
return "global"
} else {
return rawstringify(Object)
}
}
function getMethodHandler(WatchName) {
let methodhandler = {
apply(target, thisArg, argArray) {
let result = Reflect.apply(target, thisArg, argArray)
console.log(`[${WatchName}] apply function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
return result
},
construct(target, argArray, newTarget) {
var result = Reflect.construct(target, argArray, newTarget)
console.log(`[${WatchName}] construct function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
return result;
}
}
return methodhandler
}
function getObjhandler(WatchName) {
let handler = {
get(target, propKey, receiver) {
let result = Reflect.get(target, propKey, receiver)
if (result instanceof Object) {
if (typeof result === "function") {
console.log(`[${WatchName}] getting propKey is [${propKey}] , it is function`)
//return new Proxy(result,getMethodHandler(WatchName))
} else {
console.log(`[${WatchName}] getting propKey is [${propKey}], result is [${result}]`);
}
return new Proxy(result, getObjhandler(`${WatchName}.${propKey}`))
}
console.log(`[${WatchName}] getting propKey is [${propKey?.description ?? propKey}], result is [${result}]`);
return result;
},
set(target, propKey, value, receiver) {
if (value instanceof Object) {
console.log(`[${WatchName}] setting propKey is [${propKey}], value is [${value}]`);
} else {
console.log(`[${WatchName}] setting propKey is [${propKey}], value is [${value}]`);
}
return Reflect.set(target, propKey, value, receiver);
},
has(target, propKey) {
var result = Reflect.has(target, propKey);
console.log(`[${WatchName}] has propKey [${propKey}], result is [${result}]`)
return result;
},
deleteProperty(target, propKey) {
var result = Reflect.deleteProperty(target, propKey);
console.log(`[${WatchName}] delete propKey [${propKey}], result is [${result}]`)
return result;
},
getOwnPropertyDescriptor(target, propKey) {
var result = Reflect.getOwnPropertyDescriptor(target, propKey);
console.log(`[${WatchName}] getOwnPropertyDescriptor propKey [${propKey}] result is [${result}]`)
return result;
},
defineProperty(target, propKey, attributes) {
var result = Reflect.defineProperty(target, propKey, attributes);
console.log(`[${WatchName}] defineProperty propKey [${propKey}] attributes is [${attributes}], result is [${result}]`)
return result
},
getPrototypeOf(target) {
var result = Reflect.getPrototypeOf(target)
console.log(`[${WatchName}] getPrototypeOf result is [${result}]`)
return result;
},
setPrototypeOf(target, proto) {
console.log(`[${WatchName}] setPrototypeOf proto is [${proto}]`)
return Reflect.setPrototypeOf(target, proto);
},
preventExtensions(target) {
console.log(`[${WatchName}] preventExtensions`)
return Reflect.preventExtensions(target);
},
isExtensible(target) {
var result = Reflect.isExtensible(target)
console.log(`[${WatchName}] isExtensible, result is [${result}]`)
return result;
},
ownKeys(target) {
var result = Reflect.ownKeys(target)
console.log(`[${WatchName}] invoke ownkeys, result is [${result}]`)
return result
},
apply(target, thisArg, argArray) {
let result = Reflect.apply(target, thisArg, argArray)
console.log(`[${WatchName}] apply function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
return result
},
construct(target, argArray, newTarget) {
var result = Reflect.construct(target, argArray, newTarget)
console.log(`[${WatchName}] construct function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
return result;
}
}
return handler;
}
const navigator = new Proxy(Object.create(mynavigator), getObjhandler("navigator"));
const history = new Proxy(Object.create(myhistory), getObjhandler("history"))
const screen = new Proxy(Object.create(myscreen), getObjhandler("screen"));
const location = new Proxy(mylocation, getObjhandler("location"));
const document = new Proxy(mydocument, getObjhandler("document"));
const window = new Proxy(Object.assign(global, mywindow), getObjhandler("window"));
//checkproxy()
module.exports = {
window,
navigator,
screen,
location,
Image,
document,
history,
Document
}
Copy the code
Add the import in front of the running JA file
let {
window,
navigator,
location,
screen,
Image,
document,
history,
Document
} = require('Proxy.js');
Copy the code
Algorithm of reduction
Analysis takes this code out and restores it bit by bit
\
Its key and the string that needs to be encrypted can be found above
The result is the same
You can see that this is a DES encryption algorithm
The flow is the first request to get the JS code, the key and the string to be encrypted, and then the Python encryption request
code
from pyDes import des, ECB, PAD_PKCS5 import binascii import requests import re class spider: def __init__(self): self.session = requests.Session() self.headers = { 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'sec-ch-ua': '"Chromium"; v="94", "Google Chrome"; v="94", "; Not A Brand"; v="99"', 'sec-ch-ua-mobile': '? 0', 'sec-CH-UA-platform ': '"Windows",' upgrade-insecure -Requests': '1', 'user-agent ': 'Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml; Q = 0.9, image/avif, image/webp image/apng, * / *; Q = 0.8, application/signed - exchange; v=b3; Q = 0.9 ', 'the Sec - Fetch - Site' : 'none', 'the Sec - Fetch - Mode' : 'navigate', 'the Sec - Fetch - User' : '? 1', 'Sec-Fetch-Dest': 'document', 'Accept-Language': 'zh-CN,zh; Q =0.9',} def des_encrypt(self, KEY, s): """ DES encrypt :param s: original string :return: The value is a character string in hexadecimal format. """ secret_key = KEY # password iv = secret_key # offset # secret_key: encryption KEY, CBC: encryption mode, Des_obj = des(secret_key, ECB, iv, pad=None, Secret_bytes = des_obj.encrypt(s, Return binascii.b2a_hex(secret_bytes) def run(self): response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers) a = re.findall("; var a= '(.*?) ';" , response.text)[0] b = re.findall("; var b = '(.*?) ';" , response.text)[0] print(a) print(b) cookie = self.des_encrypt(a, b).decode() print(" First cookie: Add_dict_to_cookiejar (self.session.cookies, {" spVRscode ": cookie}) response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers) print(response.text) if __name__ == '__main__': spider = spider() spider.run()Copy the code
The request is successful