Start Selenium Gird using the extension in Docker and use the tunnel agent. how to set proxy with authentication in selenium chromedriver python
Proxy with Authentication is not supported for Chrome Headless, but is supported for Docker Selenium or Selenium GIRD clusters. Start the selenium docker
docker run -d -p 4444:4444 --shm-size=2g -m 800M --memory-swap=800M --name=chrome --restart=always selenium/standalone-chrome
Copy the code
Selenium uses tunnel dynamic proxy (which generates local ZIP plug-in files)
import os
import time
import zipfile
from selenium import webdriver
from scrapy.selector import Selector
PROXY_HOST = 'http-dyn.abuyun.com' # rotating proxy or host
PROXY_PORT = 9020 # port
PROXY_USER = ' ' # username
PROXY_PASS = ' ' # password
REMOTE_SELENIUM = '111.22.111.11:4444' Remote Docker Selenium address
manifest_json = "" {"version": "1.0.0", "Manifest_version ": 2, "name": "Chrome Proxy", "permissions": [ "proxy", "tabs", "unlimitedStorage", "storage", "
", "webRequest", "webRequestBlocking" ], "background": {" scripts ": [" background. Js"]}, "minimum_chrome_version" : "22.0.0}" "" "
background_js = """ var config = { mode: "fixed_servers", rules: { singleProxy: { scheme: "http", host: "%s", port: parseInt(%s) }, bypassList: ["localhost"] } }; chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); function callbackFn(details) { return { authCredentials: { username: "%s", password: "%s" } }; } chrome.webRequest.onAuthRequired.addListener( callbackFn, {urls: ["
"]}, ['blocking'] ); "" "
% (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)
def get_chromedriver(use_proxy=False, user_agent=None, use_docker=True) :
path = os.path.dirname(os.path.abspath(__file__))
chrome_options = webdriver.ChromeOptions()
if use_proxy:
pluginfile = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(pluginfile, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
chrome_options.add_extension(pluginfile)
if user_agent:
chrome_options.add_argument('--user-agent=%s' % user_agent)
if use_docker:
driver = webdriver.Remote(
command_executor="http://{}/wd/hub".format(REMOTE_SELENIUM),
# command_executor = "http://192.168.95.56:4444/wd/hub",
options=chrome_options
)
else:
driver = webdriver.Chrome(
os.path.join(path, '/usr/local/bin/chromedriver'),
chrome_options=chrome_options)
return driver
def main() :
Use docker with proxy
driver = get_chromedriver(use_proxy=True, use_docker=True)
print(driver)
n = 0
while True:
# driver = get_chromedriver(use_proxy=True, use_docker=True)
# print(driver)
driver.get('https://www.cip.cc')
ip_text = Selector(text=driver.page_source).xpath(
'//pre/text()').extract_first().strip()
print(ip_text)
driver.close()
time.sleep(3)
n += 1
if n > 10:
break
driver.quit()
if __name__ == '__main__':
main()
Copy the code
rendering
Selenium uses the HTTP proxy (HOST:PORT proxy).
from selenium import webdriver
PROXY = "88.157.149.250:8080" # IP:PORT or HOST:PORT
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)
chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.get("http://www.cip.cc")
print(chrome.page_source)
Copy the code