LOL爬虫小工具

初学Python,根据网上的爬虫教程结合爱好写了个爬虫,能爬取LOL国服官网资料库里的图片
运行截图

LOL爬虫小工具

运行结果

LOL爬虫小工具

LOL爬虫小工具

LOL爬虫小工具

LOL爬虫小工具

下面是源码

# coding=utf-8
import requests
import re
import json
import os
import urllib
import time
import threading  
from queue import Queue  


def getHttpStatusCode(url):
    try:
        request = requests.get(url)
        httpStatusCode = request.status_code
        return httpStatusCode
    except requests.exceptions.HTTPError as e:
        return e


def Get_Hero_Jsurl(queue):
    url = 'https://lol.qq.com/biz/hero/champion.js'
    response = requests.get(url).content.decode('gb2312')
    regular = r'data":(.*?),"version":"'
    dict_js = json.loads(re.findall(regular, response)[0])
    for _item in dict_js:
        EN = dict_js[_item]['id']
        queue.put(EN.format(id=_item))


def Stitching_JS_links(name_en):
    link = 'https://lol.qq.com/biz/hero/{}.js'.format(name_en)
    return link


def Parsing_hero_JS(url):
    response = requests.get(url).content.decode('gb2312')
    regular = r'data":(.*?),"version":"'
    js_data = json.loads(re.findall(regular, response)[0])
    result = {
        'name_CN': js_data['name'] + ' ' + js_data['title'],
        'name_EN': js_data['id'],
        'skins': js_data['skins'],
        'skill': js_data['spells'],
        'passive': js_data['passive'],
    }
    return result


def Download_hero_resources(detail_url_list: Queue, id):
    while len(detail_url_list.queue):
        hero_name = detail_url_list.get()
        Metadata = Parsing_hero_JS(Stitching_JS_links(hero_name))
        path = os.getcwd() + '\英雄相关\' + Metadata['name_CN']
        if not os.path.exists(path + '\头像'):
            os.makedirs(path + '\头像')
        if not os.path.exists(path + '\皮肤'):
            os.makedirs(path + '\皮肤')
        if not os.path.exists(path + '\技能'):
            os.makedirs(path + '\技能')
        # 下载皮肤和头像图  [img]https://game.gtimg.cn/images/lol/act/img/skin/big1000.jpg[/img]
        for item_skin in Metadata['skins']:
            url_0 = 'https://game.gtimg.cn/images/lol/act/img/skin/big{}.jpg'.format(
                item_skin['id'])
            url_1 = 'https://game.gtimg.cn/images/lol/act/img/skin/small{}.jpg'.format(
                item_skin['id'])
            urllib.request.urlretrieve(
                url_0,
                path + '\皮肤\' + item_skin['name'].replace('/', '') + '.jpg')
            urllib.request.urlretrieve(
                url_1,
                path + '\头像\' + item_skin['name'].replace('/', '') + '.jpg')
            pass
        # 下载技能图  [img]https://game.gtimg.cn/images/lol/act/img/spell/AnnieR.png[/img]
        for item_skill in Metadata['skill']:
            url_3 = 'https://game.gtimg.cn/images/lol/act/img/spell/{}.png'.format(
                item_skill['id'])
            save_path = path + '\技能\' + item_skill[
                'name'] + '_' + item_skill['id'].replace(hero_name,
                                                         '') + '.png'
            if (getHttpStatusCode(url_3) != 404):
                urllib.request.urlretrieve(url_3, save_path.replace('/', '-'))
        url_3 = 'https://game.gtimg.cn/images/lol/act/img/passive/{}'.format(
            Metadata['passive']['image']['full'])
        if (getHttpStatusCode(url_3) == 404):
            url_3 = 'https://game.gtimg.cn/images/lol/act/img/spell/{}'.format(
                Metadata['passive']['image']['full'])
        save_path = path + '\技能\' + Metadata['passive'][
            'name'] + '_P' + '.png'
        # test = getHttpStatusCode(url_3)
        urllib.request.urlretrieve(url_3, save_path.replace('/', '-'))


def Features_1():
    detail_url_queue = Queue(maxsize=1000)
    thread = threading.Thread(target=Get_Hero_Jsurl, args=(detail_url_queue, ))
    html_thread = []
    thread.start()
    for i in range(20):
        thread2 = threading.Thread(target=Download_hero_resources,
                                   args=(detail_url_queue, i))
        html_thread.append(thread2)
    start_time = time.time()
    print('将在2秒后启动多线程下载')
    time.sleep(2)
    for i in range(20):
        html_thread[i].start()
    sum_num = len(detail_url_queue.queue)
    thread.join()
    for i in range(20):
        html_thread[i].join()
    time_ = time.time() - start_time
    print('共下载了{}位英雄资源'.format(sum_num))
    print("用时: {}分{}秒".format(int(time_ // 60), int(time_ % 60) + 1))


def Features_2():
    hero_js = 'https://lol.qq.com/biz/hero/item.js'
    response = requests.get(hero_js).content.decode('gb2312')
    正则 = r'data":(.*?),"tree'
    list_js = re.findall(正则, response)
    dict_js = json.loads(list_js[0])
    Item_name = {}
    Item_url = {}
    if not os.path.exists(os.getcwd() + '\装备'):
        os.makedirs(os.getcwd() + '\装备')
    for i in dict_js:
        Item_name[i] = dict_js[i]['name']
        url0 = 'https://game.gtimg.cn/images/lol/act/img/item/'
        url1 = '.png'
        Item_url[i] = url0 + i + url1
    p = 0
    for i in Item_url:
        urllib.request.urlretrieve(
            Item_url[i],
            os.getcwd() + '\装备\' + Item_name[i] + '.png')
        p += 1
        _JD = (p / len(Item_url)) * 100
        print('已完成 ', end='')
        print('%.2lf' % _JD, end=' %
')


def main():
    print('LOL爬虫小工具 作者:艾斯托维亚')
    print('———————————————————————————————————————')
    print(' 1 |  从官网下载最新英雄头像、皮肤原画、技能图标')
    print(' 2 |    从官网下载最新最新装备图    ')
    print('———————————————————————————————————————')
    n = input('请输入数字以选择功能
')
    n = int(n)
    if n > 0 and n < 3:
        swicth = {
            1: Features_1,
            2: Features_2,
        }
        swicth[n]()
        input('数据保存在软件同目录
按任意键退出
')
    else:
        print('错误输入
程序即将退出')
        time.sleep(2)


if __name__ == "__main__":
    main()

欢迎各位大佬指点
成品下载链接:

https://www.lanzoux.com/iNUnbf71utc

© 版权声明
THE END
如果内容对您有所帮助,就支持一下吧!
点赞0 分享
评论 共47条

请登录后发表评论