1. Find the data source and get the url

Ctrip travel website

2. Import required modules

from selenium import webdriver
import time
from pc import data_bool
import random
from selenium.webdriver.common.keys import Keys
Copy the code

3. Instantiate the object

Bool_ = datA_bool () # instantiationCopy the code

4. Write the region and province to be typed

Diqu = [' Beijing ', 'Shanghai', 'shenzhen, chengdu, guangzhou, hangzhou, sanya, haikou, urumqi, Lhasa,' wuhan ', 'nanjing', 'xi 'an', 'southern xinjiang', 'marble', 'shangri-la'. Harbin 'xishuangbanna', 'mountain', ' ', 'gansu', 'zhejiang', 'tianjin', 'in jiangsu, fujian, chongqing, hunan, liaoning, shandong, Inner Mongolia,' anhui ', 'sichuan, hubei, hainan, 'hebei', ' 'in qinghai, shaanxi,' jiangxi ', 'ningxia', 'yunnan', 'of jilin, henan, guangxi, heilongjiang, shanxi, xinjiang, guizhou, Tibet, yunnan] # need to type in the region The provinces setCopy the code

5, set up the function, with exception capture processing

Viewing Parameter Data

! [](https://p6-tt-ipv6.byteimg.com/origin/pgc-image/73d08fa140b24ca9ad6344db31161ecb)

Def result(driver): parent=driver. Find_element_by_class_name ('main_col') # str_introduce=parent.find_elements_by_css_selector('p.list_product_title') except: Str_rule =[] print(' id ', 'id', 'id ') str_product=parent.find_elements_by_css_selector('p.list_product_tip') except: Str_product =[] print(' null ') list_box=parent.find_elements_by_css_selector('list_label_box') list_box_item=[] for i in list_box: List_box_item.append (i.ind_elements_by_css_selector ('span')) except: list_box_item=[] print(' list fetch empty value ') list_retail=parent.find_elements_by_css_selector('p.list_product_retail') except: List_retail =[] print(' list_retail ') str_grade=parent.find_elements_by_css_selector('p.list_change_grade') except: Str_grade =[] print(' grade= null ') str_number=parent.find_elements_by_css_selector('div.list_change_one') except: Str_number =[] print(' null ') # try: str_remark=parent.find_elements_by_css_selector('div.list_change_two') except: Str_remark =[] print(' print ') Str_price =parent. Find_elements_by_css_selector ('span. List_sr_price ') except: str_price=[] print(' fetch to null ') # count =[] # count =[] # count =[] # count =[] # count =[] # count =[] # count =[] # count =[] # count =[] # Introduce = 'no' # supplier try: retail = list_retail [I] text except: retail = 'no' # rating information try: grade = str_grade [I] text except: Grade ='0 '# count = count [I].count = count [I].count = count [I].count = count [I].count = count Note ='0 '# price=str_price[I]. Except: price=' 0' try: List_box_item =list_box_item[I] Box = [] rank. Append ([introduce, retail, grade, number, remark, price, box]) bool_. MemoryCsv (rank, 'electronic data. CSV', 'a') parent.find_element_by_css_selector('a.down').click() except: Parent.find_element_by_css_selector (' a.own ').click() price(' data lost \n')Copy the code

6. Define the function to grab the first page

Def range_(begin,end,driver): def range_(begin,end,driver) Sleep (random. Randint (20,25)) # print(f' page {I} ') # print the number of pagesCopy the code

7, write click event, automatically open the web page to find Ctrip

def t(x,x1,x2): for index,i in enumerate(x[x1:x2]): driver=webdriver.Chrome() time.sleep(3) driver.get('https://vacations.ctrip.com/?startcity=1&salecity=1&cityname=%E5%8C%97%E4%BA%AC') Input_tag =driver.find_element_by_class_name('search_txt') # Sleep (3) range_(0,35,driver) driver.close() print(f'{I} complete ') time.sleep(3) print(f'{I} complete ') time.sleep(3) print(f'{I} complete ') time.sleep(4) print(f'{I} complete ') time.sleep(4)Copy the code

8, import multithreading module, write multithreading

Import threading t1 = threading. Thread (target = t, args = (diqu, 11, 23)) t2 = threading. Thread (target = t, args = (diqu, 23, 31)) T3 = threading. Thread (target = t, args = (diqu, 4) 31) t1. The start (t2), start (t3). The start ()Copy the code

PC is a PY file

Inside is the code to get the header and store it

! [](https://p1.pstatp.com/origin/pgc-image/1e38f123ac854858acbe8ed3698bb294)

import requests import requests import chardet import random import csv from openpyxl import Workbook from pandas import  DataFrame,Series class data_bool(): def __init__(self): pass def get_html(self, url): User_agent = ['Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36', "Mozilla/5.0 (Macintosh; U;  Intel Mac OS X 10_6_8; En-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"] headers = {' user-agent ': Response = requests. Get (url=url, Response. encoding =' utF-8 'response.encoding =' utF-8' response.encoding =' utF-8 Def MemoryCsv(self, data: list, fileName: STR, mode='w'): With open(fileName, mode=mode, encoding=' utF-8 ', newline=' ")as f: csvfile = csv.writer(f) # write data for each in data: Csvfile. writerow(each) print(fileName) def MemoryExcel(self, data, fileName) wb = Workbook() sheet = wb.active for each in data: Wb sheet. Append (each). The save (fileName) print (fileName, 'storage success) def Memonry_pandas_csv (self, data: list, fileName, mode =' w ') : DataFrame(data).to_csv(fileName,mode=mode,header=False,index=False)Copy the code

Did you learn? If you have a problem, you can click the blue font below to communicate and answer it.

Free Python learning materials and group communication solutions click to join

mo4tech.com (Moment For Technology) is a global community with thousands techies from across the global hang out!Passionate technologists, be it gadget freaks, tech enthusiasts, coders, technopreneurs, or CIOs, you would find them all here.

Python to build automated crawler ctrip, too strong this tool!

1. Find the data source and get the url

2. Import required modules

3. Instantiate the object

4. Write the region and province to be typed

5, set up the function, with exception capture processing

6. Define the function to grab the first page

7, write click event, automatically open the web page to find Ctrip

8, import multithreading module, write multithreading

PC is a PY file

Python to build automated crawler ctrip, too strong this tool!

1. Find the data source and get the url

2. Import required modules

3. Instantiate the object

4. Write the region and province to be typed

5, set up the function, with exception capture processing

6. Define the function to grab the first page

7, write click event, automatically open the web page to find Ctrip

8, import multithreading module, write multithreading

PC is a PY file

Related Posts

What is a 10X programmer

The Geek way of advanced programmers: Arduino Journey

Android platform RTMP/RTSP player development series of decoding and rendering