Start Selenium Gird using the extension in Docker and use the tunnel agent. how to set proxy with authentication in selenium chromedriver python

Proxy with Authentication is not supported for Chrome Headless, but is supported for Docker Selenium or Selenium GIRD clusters. Start the selenium docker

docker run -d -p 4444:4444 --shm-size=2g -m 800M --memory-swap=800M --name=chrome  --restart=always selenium/standalone-chrome
Copy the code

Selenium uses tunnel dynamic proxy (which generates local ZIP plug-in files)

import os
import time
import zipfile

from selenium import webdriver
from scrapy.selector import Selector

PROXY_HOST = 'http-dyn.abuyun.com'  # rotating proxy or host
PROXY_PORT = 9020  # port
PROXY_USER = ' '  # username
PROXY_PASS = ' '  # password

REMOTE_SELENIUM = '111.22.111.11:4444' Remote Docker Selenium address

manifest_json = "" {"version": "1.0.0", "Manifest_version ": 2, "name": "Chrome Proxy", "permissions": [ "proxy", "tabs", "unlimitedStorage", "storage", "
      
       ", "webRequest", "webRequestBlocking" ], "background": {" scripts ": [" background. Js"]}, "minimum_chrome_version" : "22.0.0}" "" "
      

background_js = """ var config = { mode: "fixed_servers", rules: { singleProxy: { scheme: "http", host: "%s", port: parseInt(%s) }, bypassList: ["localhost"] } }; chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); function callbackFn(details) { return { authCredentials: { username: "%s", password: "%s" } }; } chrome.webRequest.onAuthRequired.addListener( callbackFn, {urls: ["
      
       "]}, ['blocking'] ); "" "
       % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)


def get_chromedriver(use_proxy=False, user_agent=None, use_docker=True) :
    path = os.path.dirname(os.path.abspath(__file__))
    chrome_options = webdriver.ChromeOptions()
    if use_proxy:
        pluginfile = 'proxy_auth_plugin.zip'

        with zipfile.ZipFile(pluginfile, 'w') as zp:
            zp.writestr("manifest.json", manifest_json)
            zp.writestr("background.js", background_js)
        chrome_options.add_extension(pluginfile)
    if user_agent:
        chrome_options.add_argument('--user-agent=%s' % user_agent)
    if use_docker:
        driver = webdriver.Remote(
            command_executor="http://{}/wd/hub".format(REMOTE_SELENIUM),
            # command_executor = "http://192.168.95.56:4444/wd/hub",
            options=chrome_options
        )
    else:
        driver = webdriver.Chrome(
            os.path.join(path, '/usr/local/bin/chromedriver'),
            chrome_options=chrome_options)
    return driver


def main() :
    Use docker with proxy
    driver = get_chromedriver(use_proxy=True, use_docker=True)
    print(driver)
    n = 0
    while True:
        # driver = get_chromedriver(use_proxy=True, use_docker=True)
        # print(driver)
        driver.get('https://www.cip.cc')
        ip_text = Selector(text=driver.page_source).xpath(
            '//pre/text()').extract_first().strip()
        print(ip_text)
        driver.close()
        time.sleep(3)
        n += 1
        if n > 10:
            break
    driver.quit()


if __name__ == '__main__':
    main()
Copy the code

rendering

Selenium uses the HTTP proxy (HOST:PORT proxy).

from selenium import webdriver
PROXY = "88.157.149.250:8080" # IP:PORT or HOST:PORT

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)

chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.get("http://www.cip.cc")
print(chrome.page_source)
Copy the code