1. Cookie login

  • Take advantage of cookie features: Cookies remain for a long time to avoid frequent login
  • Cookies are generally generated by front-end development using JS, which can be cracked by capturing packages. However, this difficulty is a little high, but cracking JS is what crawlers must face directly
  • Tesseract (Optical Word Recognition) in the 2OCR library solves most traditional captcha
  • The tesserract-OCR software is installed first, and then the PyTesSerrAct class library is installed

Note:

  1. For Windows, you need to download the software installation package and configure environment variables
  2. Tesseract-ocr: sudo apt-get tesseract-ocr: Sudo apt-get Tesseract-ocr

Code platform

  • Code rabbit and QQ superman code, there are Python access, manual code platform need to charge.
  • For QQ Superman coding platform, you need to register the developer account first, and fill in your personal account for authentication and billing in the identification program. After logging in, you will start billing (6 cents per code).

Selenium simulates pulling to crack sliding captcha

Due to a long time, the slide verification code has been changed, the slide verification code has been abandoned, now for reference only

Communication group: PIL import Image from time import sleep from Selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver import ActionChains from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import random headers = {"User-Agent": "Mozilla / 5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36"
}
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option('w3c', False)
caps = DesiredCapabilities.CHROME
caps['loggingPrefs'] = {'performance': 'ALL'}
class SliderVerificationCode(object):
    def __init__(self):  Initialize some information
        self.left = 60  The left starting notch is usually some distance from the left side of the image with a slider
        self.url = 'https://passport.bilibili.com/login'
        self.driver = webdriver.Chrome(executable_path='C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')
        self.wait = WebDriverWait(self.driver, 20)  Set the wait time to 20 seconds
        self.phone = "17369251763"
        self.passwd = "abcdefg"
    def input_name_password(self):  Enter the account password
        self.driver.get(self.url)
        self.driver.maximize_window()
        input_name = self.driver.find_element_by_xpath("//input[@id='login-username']")
        input_pwd = self.driver.find_element_by_xpath("//input[@id='login-passwd']")
        input_name.send_keys("username")
        self.wait = WebDriverWait(self.driver, 3)
        input_pwd.send_keys("passport")
    def click_login_button(self):  Click the login button, and a picture of the verification code appears
        login_btn = self.driver.find_element_by_class_name("btn-login")
        sleep(random.randint(3, 6))
        login_btn.click()
 
    def get_geetest_image(self):  # Get captcha image
        gapimg = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_bg')))
        sleep(2)
        gapimg.screenshot(r'./captcha1.png')
        # Change the tag style to show picture 2 with JS code
        js = 'var change = document.getElementsByClassName("geetest_canvas_fullbg"); change[0].style = "display:block;" '
        self.driver.execute_script(js)
        sleep(2)
        fullimg = self.wait.until(
            EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_slice')))
        fullimg.screenshot(r'./captcha2.png')
    def is_similar(self, image1, image2, x, y):
        ' '#image1: notched image :param image2: notched image :param x: position x: Param y: position y: return: (x,y) position is the same pixels' #image1: notched image :param image2: unnotched image: Param x: position x: Param y: position y: return: (x,y) position is the same pixels'' '
        Get the pixels of two images at the specified positions
        pixel1 = image1.load()[x, y]
        pixel2 = image2.load()[x, y]
        # Set a threshold to allow error
        threshold = 60
        There are three channels for each pixel in the color map
        if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
                pixel1[2] - pixel2[2]) < threshold:
            return True
        else:
            return False
    def get_diff_location(self):  Get the starting point of the notch map
        captcha1 = Image.open('captcha1.png')
        captcha2 = Image.open('captcha2.png')
        for x in range(self.left, captcha1.size[0]):  # left to right x direction
            for y in range(captcha1.size[1]):  # in the y direction from top to bottom
                if not self.is_similar(captcha1, captcha2, x, y):
                    return x  Find the left edge of the notch in the x direction
    def get_move_track(self, gap):
        track = []  # Movement trajectory
        current = 0  # Current displacement
        # Deceleration threshold
        mid = gap * 4 / 5  Speed up the first 4/5 and slow down the last 1/5T = 0.2# compute interval
        v = 0  # velocity
        while current < gap:
            if current < mid:
                a = 5  # acceleration is +5
            else:
                a = -5  The acceleration is minus 5
            v0 = v  # initial velocity v0
            v = v0 + a * t  # Current speed
            move = v0 * t + 1 / 2 * a * t * t  # Moving distance
            current += move  # Current displacement
            track.append(round(move))  # Add trajectory
        return track
    def move_slider(self, track):
        slider = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.geetest_slider_button')))
        ActionChains(self.driver).click_and_hold(slider).perform()
        for x in track:  # There is only movement in the horizontal direction
            ActionChains(self.driver).move_by_offset(xoffset=x, yoffset=0).perform()
        sleep(1)
        ActionChains(self.driver).release().perform()  # Release the mouse
    def main(self):
        self.input_name_password()
        self.click_login_button()
        self.get_geetest_image()
        gap = self.get_diff_location()  # Notch left starting position
        gap = gap - 6  # Subtract the distance from the left side of the slider to the left side of the image in the x direction to give the slider the actual distance to move
        track = self.get_move_track(gap)
        self.move_slider(track)
if __name__ == "__main__":
    springAutumn = SliderVerificationCode()
    springAutumn.main()Copy the code