import requests
from lxml import etree, html


header = {
    'user-agent': 'the Mozilla / 5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'the Chrome / 90.0.4430.212 Safari / 537.36'.'cookie': '_octo = GH1.1.1603785005.1622016794; logged_in=no; _gh_sess=Y9sNKguvJvVZBhj5Hy8%2BYEjMct2okitWOD%2BH8LLc%2FcFRnwLnTC%2BbZa9%2BxHAq5l1n%2FJ4uQrx4Vk2vS8JxzbE%2F6%2FeAIGKgr9t y%2Byz%2FRaD1SFH1YdqKh23FyR8gorjxXDjG2Z6U8kmW9iG61c0P8arKwSSylKpCV8aN6U1ApjCqSURVjV9ic9pSVucAVUw%2FoFesTuKQQqmNh3RlOYXEk rBecHFJj2vYXx%2B768Sxo%2FM6sxJ0pnavDSDIDWWHIIh%2FNeWosGcMAgd3BivBWhIfgbIDDw%3D%3D--1BqbR%2BZukQlv2cZf--byyWOrkNOr5SxXtdt %2BdUvw%3D%3D; tz=Asia%2FShanghai'
}


def get_data() :
    with open("./1.txt"."w") as f:
        f.truncate()
    data = []
    for num in range(1.30) :print('the first' + str(num) + 'page... ')
        data_list = get_data_from_url(num)
        data.extend(data_list)
    print('Get data done, write to file... ')
    for index in range(len(data)):
        item = data[index]
        with open("./1.txt"."a", encoding="utf-8") as f:
            f.write('(' + str(index + 1) + ') < ' + item['id'] + '> [' + item['href'] + ']] : ' + item['text'] + '\r\n')
    print('Write file complete... ')


def get_data_from_url(page) :
    url = get_url(page)
    res = requests.get(url, headers=header)
    res.encoding = 'utf-8'
    tree = html.fromstring(res.content)
    tags_a = tree.xpath('//*[@id="repo-content-pjax-container"]/div/div[4]/div[2]/div/div/div/div[2]/a')
    tags_a_len = len(tags_a)
    data_list = []
    for index in range(1, tags_a_len):
        detail = tags_a[index].attrib
        data_list.append({
            'text': tags_a[index].text,
            'id': detail['id'].'href': 'https://github.com' + detail['href']})return data_list


def get_url(page) :
    url = 'https://github.com/NervJS/taro/issues?'
    param = {
        'page': page
    }
    for i in param:
        string = i + '=' + str(param[i]) + '&'
        url = url + string
    return url


get_data()

Copy the code