Python3爬虫使用requests爬取lol英雄皮肤
此次爬取lol英雄皮肤一共有两个版本,分别是多线程版本和非多线程版本。
多线程版本
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2/25/2020 2:24 PM
# @Author : XiaoXia
# @Blog : https://xiaoxiablogs.top
# @File : lol_hero_photo.py
import datetime
import requests
import simplejson
import os
import threading
# 多线程版本
class HeroImage(threading.Thread):
# lol英雄获取英雄皮肤列表网站
url_demo = "https://game.gtimg.cn/images/lol/act/img/js/hero/"
# 设置ua
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.100 Safari/537.36"
headers = {
'User-Agent': ua
}
def __init__(self, hero_id, hero_name):
threading.Thread.__init__(self)
self.hero_id = hero_id
self.hero_name = hero_name.replace("/", "")
def run(self):
print("{}的皮肤爬取开始了!!!".format(self.hero_name))
hero_images_list = self.getImagesUrl()
self.dirIsExist()
for hero_images in hero_images_list:
self.saveImage(hero_images["url"], hero_images['name'].replace("/", ""))
print("{}皮肤爬取完成!!!".format(self.hero_name))
def dirIsExist(self):
"""
判断文件夹是否存在,不存在则创建
"""
if not os.path.exists("./hero/"):
os.mkdir("./hero/")
path = "./hero/{}/".format(self.hero_name)
if not os.path.exists(path):
os.mkdir(path)
def getImagesUrl(self) -> list:
"""
获取皮肤照片链接
:return: 皮肤照片数组
"""
response = self.getJson(self.url_demo + self.hero_id + ".js")
images = simplejson.loads(response.text)['skins']
image_list = []
'''
skinId: 图片的编号
name: 皮肤名称
mainImg: 图片地址
'''
for image in images:
image_dic = {
"name": image['name'],
"url": image['mainImg']
}
# 由于其中还有一些炫彩模型,所以要去除掉
if image_dic['url']:
image_list.append(image_dic)
return image_list
def saveImage(self, url: str, image_name: str):
"""
通过链接获取图片并且将图片保存到相应的目录下
:param path: 保存目录
:param image_name: 图片名称
:param url: 图片地址
"""
response = requests.get(url, headers=self.headers)
image_path = "./hero/{}/{}.jpg".format(self.hero_name, image_name)
with response:
# 得到图片的二进制文件
image_file = response.content
with open(image_path, "wb+") as f:
f.write(image_file)
f.flush()
@staticmethod
def getJson(hero_url: str) -> requests.models.Response:
"""
获取json响应
:param hero_url: 英雄列表的获取链接
:return:
"""
response = requests.get(hero_url, headers=HeroImage.headers)
return response
if __name__ == "__main__":
# 用于计算程序运行时间的,不需要可直接删除该语句
start_time = datetime.datetime.now()
# lol英雄列表
hero_list = "https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js"
jsons = HeroImage.getJson(hero_list)
heros = simplejson.loads(jsons.text)["hero"]
for hero in heros:
'''
编号: heroId
称号: name
英文名: alias
中文名: title
'''
name = hero['name'] + '-' + hero['title']
name = name.replace("/", "")
thread = HeroImage(hero['heroId'], name)
thread.start()
print(threading.active_count())
# 用于计算程序运行时间的,不需要可直接删除该循环
while True:
if threading.active_count() <= 1:
print("全部爬取完毕")
end_time = datetime.datetime.now()
print("总用时为:", end_time-start_time)
break
非多线程版本
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2/25/2020 2:24 PM
# @Author : XiaoXia
# @Blog : https://xiaoxiablogs.top
# @File : lol_hero_photo.py
import datetime
import requests
from lxml import etree
from pprint import pprint
import simplejson
import os
# lol英雄网站
url_demo = "https://game.gtimg.cn/images/lol/act/img/js/hero/"
# lol英雄列表
hero_list = "https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js"
# 设置ua
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.100 Safari/537.36"
headers = {
'User-Agent': ua
}
def dirIsExist(dir_name: str):
"""
判断文件夹是否存在,不存在则创建
:param dir_name: 文件夹名称
"""
if not os.path.exists("./hero/"):
os.mkdir("./hero/")
path = "./hero/{}/".format(dir_name)
if not os.path.exists(path):
os.mkdir(path)
def getJson(hero_url: str) -> requests.models.Response:
"""
获取json响应
:param hero_url: 英雄列表的获取链接
:return:
"""
response = requests.get(hero_url)
return response
def getImagesUrl(hero_id: str) -> list:
"""
获取皮肤照片链接
:param hero_id: 英雄编号
:return: 皮肤照片数组
"""
response = getJson(url_demo + hero_id + ".js")
images = simplejson.loads(response.text)['skins']
image_list = []
'''
skinId: 图片的编号
name: 皮肤名称
mainImg: 图片地址
'''
for image in images:
image_dic = {
"name": image['name'],
"url": image['mainImg']
}
# 由于其中还有一些炫彩模型,所以要去除掉
if image_dic['url']:
image_list.append(image_dic)
return image_list
def saveImage(url: str, image_name: str, path: str):
"""
通过链接获取图片并且将图片保存到相应的目录下
:param path: 保存目录
:param image_name: 图片名称
:param url: 图片地址
"""
response = requests.get(url, headers=headers)
image_path = path + image_name + ".jpg"
with response:
# 得到图片的二进制文件
image_file = response.content
with open(image_path, "wb+") as f:
f.write(image_file)
f.flush()
if __name__ == "__main__":
# 该语句是用于计算程序运行时间的,不需要可以删除
start_time = datetime.datetime.now()
jsons = getJson(hero_list)
heros = simplejson.loads(jsons.text)["hero"]
for hero in heros:
'''
编号: heroId
称号: name
英文名: alias
中文名: title
'''
name = hero['name'] + '-' + hero['title']
name = name.replace("/", "")
# 获取每个英雄的皮肤名称及链接列表
image_lists = getImagesUrl(hero['heroId'])
# 创建该英雄的文件夹
dirIsExist(name)
for img in image_lists:
# 联盟中有K/DA的皮肤,所以需要将/去掉
print(img["name"].replace("/", ""))
saveImage(img['url'], img["name"].replace("/", ""), './hero/{}/'.format(name))
print("全部爬取完毕")
# 下面部分是用于计算程序运行时间的,不需要可以删除
end_time = datetime.datetime.now()
print("总用时为:", end_time - start_time)
本作品采用 知识共享署名-相同方式共享 4.0 国际许可协议 进行许可。
It is with sad regret to inform you StarDataGroup.com is shutting down.
It has been a tough year all round and we decided to go out with a bang!
Any group of databases listed below is $49 or $149 for all 16 databases in this one time offer.
LinkedIn DatabaseYou can purchase it at www.StarDataGroup.com and view samples.
43,535,433 LinkedIn RecordsUSA B2B Companies Database
28,147,835 CompaniesForex
Forex South Africa 113,550 Forex Traders
Forex Australia 135,696 Forex Traders
UK Companies DatabaseForex UK 779,674 Forex Traders
521,303 CompaniesGerman Databases
German Companies Database: 2,209,191 Companies
German Executives Database: 985,048 Executives
Australian Companies Database1,806,596 CompaniesUAE Companies Database
950,652 CompaniesAffiliate Marketers Database
494,909 recordsSouth African Databases
B2B Companies Database: 1,462,227 Companies
Directors Database: 758,834 Directors
Healthcare Database: 376,599 Medical Professionals
Wholesalers Database: 106,932 Wholesalers
Real Estate Agent Database: 257,980 Estate Agents
Forex South Africa: 113,550 Forex Traders
Visit www.stardatagroup.com or contact us with any queries.
Kind Regards,
StarDataGroup.com
It is with sad regret to inform you StarDataGroup.com is shutting down.
Fire sale till the 7th of Feb.
Any group of databases listed below is $49 or $149 for all 16 databases in this one time offer.
LinkedIn DatabaseYou can purchase it at www.StarDataGroup.com and view samples.
43,535,433 LinkedIn RecordsUSA B2B Companies Database
28,147,835 CompaniesForex
Forex South Africa 113,550 Forex Traders
Forex Australia 135,696 Forex Traders
UK Companies DatabaseForex UK 779,674 Forex Traders
521,303 CompaniesGerman Databases
German Companies Database: 2,209,191 Companies
German Executives Database: 985,048 Executives
Australian Companies Database1,806,596 CompaniesUAE Companies Database
950,652 CompaniesAffiliate Marketers Database
494,909 recordsSouth African Databases
B2B Companies Database: 1,462,227 Companies
Directors Database: 758,834 Directors
Healthcare Database: 376,599 Medical Professionals
Wholesalers Database: 106,932 Wholesalers
Real Estate Agent Database: 257,980 Estate Agents
Forex South Africa: 113,550 Forex Traders
Visit www.stardatagroup.com or contact us with any queries.
Kind Regards,
StarDataGroup.com
Use SendBulkMails.com to run email campaigns from your own private dashboard.
Cold emails are allowed and won't get you blocked :)
1Mil emails / mo @ $99 USDDedicated IP and Domain IncludedDetailed statistical reports (delivery, bounce, clicks etc.)Quick and easy setup with extended support at no extra cost.Cancel anytime!Regards,
www.SendBulkMails.com
Do you need more clients?
We have amazing databases starting at $9.99 until the end of the Month!
Visit us at StarDataGroup.com
SendBulkMails.com allows you to reach out to clients via cold email marketing.
1Mil emails starter packageDedicated IP and Domain IncludedDetailed statistical reports (delivery, bounce, clicks etc.)Quick and easy setup with extended support at no extra cost.Cancel anytime!SendBulkMails.com