The target site

‘aHR0cHM6Ly93d3cueW91emhpY2FpLmNvbS9udjEvMDEwMTAxMDAwMTAxMDEwMS5odG1s’

Analysis of the

Open the console packet capture, the first status code 203, will generate a cookie, the second access will bring the cookie

Knowing that it is cookie encryption, use hook to locate the encryption location

Object.defineProperty(document, "cookie", { get: function(val){ return val; }, set: function(val){ debugger; return val; }})Copy the code

Locate the location and click on the upper stack

The parameter B is the value we want

Black box call

Copy the entire JS code and run it under the proxy to see what environment it needs

For those unfamiliar with proxies, see these articles

zhuanlan.zhihu.com/p/30299114

www.cnblogs.com/tugenhua070…

zhuanlan.zhihu.com/p/60791215

Proxy I’ll put it at the end

And then print the value of b

Because the JS code is loaded dynamically, we need to replace each returned JS code to get the value of B

Take the environment out of the way

Go straight to code

import requests import execjs class spider: def __init__(self): self.session = requests.Session() self.headers = { 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'sec-ch-ua': '"Chromium"; v="94", "Google Chrome"; v="94", "; Not A Brand"; v="99"', 'sec-ch-ua-mobile': '? 0', 'sec-CH-UA-platform ': '"Windows",' upgrade-insecure -Requests': '1', 'user-agent ': 'Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml; Q = 0.9, image/avif, image/webp image/apng, * / *; Q = 0.8, application/signed - exchange; v=b3; Q = 0.9 ', 'the Sec - Fetch - Site' : 'none', 'the Sec - Fetch - Mode' : 'navigate', 'the Sec - Fetch - User' : '? 1', 'Sec-Fetch-Dest': 'document', 'Accept-Language': 'zh-CN,zh; } # js environment self.js_env = """ var CryptoJS = require("crypto "); Let navigator = {userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/92.0.4515.131 Safari/537.36", Platform: "Win32", appCodeName: "Mozilla", language: "zh-CN", WebDriver: false, cookieEnabled: true, appVersion: "5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",}; Object.defineProperties(navigator,{ [Symbol.toStringTag]: { value:"Navigator" } }) location = { pathname: 'www.youzhicai.com', href: 'https://www.youzhicai.com/nv1/0101010001010101.html', host: 'www.youzhicai.com', reload: function (){}, } document = {} window = { navigator: navigator, location: location, document: document, }; """ # call cookie self.get_cookie = """ function getcookie(){return b} """ def run(self): response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers) js_code = response.text.replace("<script>", "").replace("</script>", Js_env + js_code + self.get_cookie context = execjs.compile(all_code) # execute function cookie = Context. call("getcookie") print(" first cookie: Add_dict_to_cookiejar (self.session.cookies, {" spVRscode ": cookie}) response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers) print(response.text) if __name__ == '__main__': spider = spider() spider.run()Copy the code

Successfully get the page

The proxy agent is

let rawindexof = String.prototype.indexOf
String.prototype.indexOf = function (str) {
    var res = rawindexof.call(this, str)
    console.log(`[String] "${this}" is indexof "${str}", res is ${res}`)
    return res
}

let mynavigator = {
    userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
    platform: "Win32",
    appCodeName: "Mozilla",
    language: "zh-CN",
    webdriver: false,
    cookieEnabled: true,
    appVersion: "5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
};
Object.defineProperties(mynavigator, {
    [Symbol.toStringTag]: {
        value: "Navigator"
    }
})

let myhistory = {

};
let myscreen = {
    height: 768,
    width: 1366,
    colorDepth: 24,

};
let mylocation = {
    pathname: 'www.youzhicai.com',
    href: 'https://www.youzhicai.com/nv1/0101010001010101.html',
    host: 'www.youzhicai.com',
};
let Document = function Document(){}
let HTMLDocument = function HTMLDocument(){}
Object.setPrototypeOf(HTMLDocument, Document.prototype)
Object.defineProperties(HTMLDocument.prototype, {
    [Symbol.toStringTag]: {
        value: "HTMLDocument"
    }
})
let mydocument = {
    createElement: function (){
        return {};
    },
    getElementsByTagName: function (str){
        console.log(str)
        if(str == "meta"){
            let metaRes = []
            metaRes["meta-pro"] = {
                "content": {
                    "length": 6
                }
            }
            return metaRes
        }
    },


};
// 为document指向原型
Object.setPrototypeOf(mydocument, HTMLDocument.prototype)
let Image = function (){}

let mywindow = {
    XMLHttpRequest: function () {},
    sessionStorage: {},
    localStorage: {},
    navigator: mynavigator,
    scrollTo: function (){},
    addEventListener: function () {},
    attachEvent: function () {},
    screen: myscreen,
    location: mylocation,
    chrome: {},
    document: mydocument,
    history: myhistory
};
Object.defineProperties(global, {
    [Symbol.toStringTag]: {
        value: "Window"
    }
})
let rawstringify = JSON.stringify;
JSON.stringify = function (Object) {
    if ((Object?.value ?? Object) === global) {
        return "global"
    } else {
        return rawstringify(Object)
    }
}


function getMethodHandler(WatchName) {
    let methodhandler = {
        apply(target, thisArg, argArray) {
            let result = Reflect.apply(target, thisArg, argArray)
            console.log(`[${WatchName}] apply function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
            return result
        },
        construct(target, argArray, newTarget) {
            var result = Reflect.construct(target, argArray, newTarget)
            console.log(`[${WatchName}] construct function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
            return result;
        }
    }
    return methodhandler
}

function getObjhandler(WatchName) {
    let handler = {
        get(target, propKey, receiver) {
            let result = Reflect.get(target, propKey, receiver)
            if (result instanceof Object) {
                if (typeof result === "function") {
                    console.log(`[${WatchName}] getting propKey is [${propKey}] , it is function`)
                    //return new Proxy(result,getMethodHandler(WatchName))
                } else {
                    console.log(`[${WatchName}] getting propKey is [${propKey}], result is [${result}]`);
                }
                return new Proxy(result, getObjhandler(`${WatchName}.${propKey}`))
            }
            console.log(`[${WatchName}] getting propKey is [${propKey?.description ?? propKey}], result is [${result}]`);
            return result;
        },
        set(target, propKey, value, receiver) {
            if (value instanceof Object) {
                console.log(`[${WatchName}] setting propKey is [${propKey}], value is [${value}]`);
            } else {
                console.log(`[${WatchName}] setting propKey is [${propKey}], value is [${value}]`);
            }
            return Reflect.set(target, propKey, value, receiver);
        },
        has(target, propKey) {
            var result = Reflect.has(target, propKey);
            console.log(`[${WatchName}] has propKey [${propKey}], result is [${result}]`)
            return result;
        },
        deleteProperty(target, propKey) {
            var result = Reflect.deleteProperty(target, propKey);
            console.log(`[${WatchName}] delete propKey [${propKey}], result is [${result}]`)
            return result;
        },
        getOwnPropertyDescriptor(target, propKey) {
            var result = Reflect.getOwnPropertyDescriptor(target, propKey);
            console.log(`[${WatchName}] getOwnPropertyDescriptor  propKey [${propKey}] result is [${result}]`)
            return result;
        },
        defineProperty(target, propKey, attributes) {
            var result = Reflect.defineProperty(target, propKey, attributes);
            console.log(`[${WatchName}] defineProperty propKey [${propKey}] attributes is [${attributes}], result is [${result}]`)
            return result
        },
        getPrototypeOf(target) {
            var result = Reflect.getPrototypeOf(target)
            console.log(`[${WatchName}] getPrototypeOf result is [${result}]`)
            return result;
        },
        setPrototypeOf(target, proto) {
            console.log(`[${WatchName}] setPrototypeOf proto is [${proto}]`)
            return Reflect.setPrototypeOf(target, proto);
        },
        preventExtensions(target) {
            console.log(`[${WatchName}] preventExtensions`)
            return Reflect.preventExtensions(target);
        },
        isExtensible(target) {
            var result = Reflect.isExtensible(target)
            console.log(`[${WatchName}] isExtensible, result is [${result}]`)
            return result;
        },
        ownKeys(target) {
            var result = Reflect.ownKeys(target)
            console.log(`[${WatchName}] invoke ownkeys, result is [${result}]`)
            return result
        },
        apply(target, thisArg, argArray) {
            let result = Reflect.apply(target, thisArg, argArray)
            console.log(`[${WatchName}] apply function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
            return result
        },
        construct(target, argArray, newTarget) {
            var result = Reflect.construct(target, argArray, newTarget)
            console.log(`[${WatchName}] construct function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
            return result;
        }
    }
    return handler;
}

const navigator = new Proxy(Object.create(mynavigator), getObjhandler("navigator"));
const history = new Proxy(Object.create(myhistory), getObjhandler("history"))
const screen = new Proxy(Object.create(myscreen), getObjhandler("screen"));
const location = new Proxy(mylocation, getObjhandler("location"));
const document = new Proxy(mydocument, getObjhandler("document"));
const window = new Proxy(Object.assign(global, mywindow), getObjhandler("window"));

//checkproxy()
module.exports = {
    window,
    navigator,
    screen,
    location,
    Image,
    document,
    history,
    Document
}
Copy the code

Add the import in front of the running JA file

let {
    window,
    navigator,
    location,
    screen,
    Image,
    document,
    history,
    Document
} = require('Proxy.js');
Copy the code

Algorithm of reduction

Analysis takes this code out and restores it bit by bit

\

Its key and the string that needs to be encrypted can be found above

The result is the same

You can see that this is a DES encryption algorithm

The flow is the first request to get the JS code, the key and the string to be encrypted, and then the Python encryption request

code

from pyDes import des, ECB, PAD_PKCS5 import binascii import requests import re class spider: def __init__(self): self.session = requests.Session() self.headers = { 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'sec-ch-ua': '"Chromium"; v="94", "Google Chrome"; v="94", "; Not A Brand"; v="99"', 'sec-ch-ua-mobile': '? 0', 'sec-CH-UA-platform ': '"Windows",' upgrade-insecure -Requests': '1', 'user-agent ': 'Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml; Q = 0.9, image/avif, image/webp image/apng, * / *; Q = 0.8, application/signed - exchange; v=b3; Q = 0.9 ', 'the Sec - Fetch - Site' : 'none', 'the Sec - Fetch - Mode' : 'navigate', 'the Sec - Fetch - User' : '? 1', 'Sec-Fetch-Dest': 'document', 'Accept-Language': 'zh-CN,zh; Q =0.9',} def des_encrypt(self, KEY, s): """ DES encrypt :param s: original string :return: The value is a character string in hexadecimal format. """ secret_key = KEY # password iv = secret_key # offset # secret_key: encryption KEY, CBC: encryption mode, Des_obj = des(secret_key, ECB, iv, pad=None, Secret_bytes = des_obj.encrypt(s, Return binascii.b2a_hex(secret_bytes) def run(self): response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers) a = re.findall("; var a= '(.*?) ';" , response.text)[0] b = re.findall("; var b = '(.*?) ';" , response.text)[0] print(a) print(b) cookie = self.des_encrypt(a, b).decode() print(" First cookie: Add_dict_to_cookiejar (self.session.cookies, {" spVRscode ": cookie}) response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers) print(response.text) if __name__ == '__main__': spider = spider() spider.run()Copy the code

The request is successful