|
本帖最后由 Stone_00x 于 2024-4-13 17:25 编辑
我想根据ID爬取LOL对应的皮肤数量跟最后战绩,但是爬不到数据,路过的大婶帮忙看下
网址:https://fa8.pw/
- import requests
- import json
- import pandas as pd
- from datetime import datetime
- import winreg,sys
- from fake_useragent import UserAgent
- key = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
- r'Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders')
- deskPath = winreg.QueryValueEx(key, "Desktop")[0]
- url_id = "https://luck.92.edri.mobi/shop/shop/getAccount"
- myheaders = {
- "User-Agent": UserAgent().random
- }
- data_list = []
- lastpage = 0
- allowed_regions = ["卡拉曼达","暗影岛","征服之海","诺克萨斯","战争学院","雷瑟守备","艾欧尼亚","黑色玫瑰"]
- dic_goods={"67310":"1807","67311":"1808","67320":"1817","67328":"1825","67321":"1818","67327":"1824","67313":"1810","67316":"1813","67318":"1815"}
- # dic_goods={"67305":"1802"}#皮肤
- for agent_goodsid,goodsid in dic_goods.items():
- #获取每个区的总页数
- mydata = {"agent_goodsid" : agent_goodsid , "goodsid": goodsid, "page": "1", "userid": "959", "type": "new"}
- html = requests.post(url_id, headers=myheaders, data=mydata).text
- page = -(-json.loads(html)['count']//10)
-
- if page==0:
- continue
- else:
- daqu=json.loads(html)['data'][0]['number']['2']
- print(f"{daqu},page:{page}")
-
- for page_num in range(page):
- try:
- mydata = {"agent_goodsid" : agent_goodsid , "goodsid": goodsid, "page": page_num + 1, "userid": "959", "type": "new"}
- html = requests.post(url_id, headers=myheaders, data=mydata).text
- text = json.loads(html)['data']
- if len(text) > 0:
- lastpage += 1
- for item in text:
- area = item['number']['2']
- name = item['number']['3']
- detail = item['number']['4']
- detail=detail.replace('|','----')
- if area not in allowed_regions:
- continue # Skip if region not in allowed list
- lst1 = [i for i in detail.split('----')]
- if not "英雄:" in lst1[2]:
- lst1[2] = "英雄:" + lst1[2]
- lst = [i.split(':') for i in lst1 if ":" in i]
- data_dict = {"大区": area, "ID": name, **{item[0]: item[1] for item in lst}}
- data_dict['页码'] = page_num + 1 # 添加页码字段
- data_list.append(data_dict)
- except Exception as e:
- print(f"Error occurred: {e}")
- continue # Continue to the next iteration if an error occurs
- df = pd.DataFrame(data_list)
- if df.empty:
- print("没有数据,程序退出。")
- sys.exit()
- numeric_columns = [column for column in df.columns]
- df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='ignore')
- df = df[df['皮肤'] > 400]
- if '最后游戏' in df.columns:
- df = df[df.columns].sort_values(by=["皮肤","最后游戏", "等级"], ascending=[False,True, True])
- else:
- df = df[df.columns].sort_values(by=["皮肤", "等级"], ascending=[False, True])
- columns = ["页码", "大区", "ID", "等级", "英雄", "皮肤", "单", "组"]
- df=df[columns]
-
- exl_name = "EZ__PF" if goodsid == "1802" else "EZ"
- current_time = datetime.now().strftime("%m%d__%H.%M")
- with pd.ExcelWriter(f"{deskPath}\\{exl_name}__{current_time}.xlsx", engine='xlsxwriter') as writer:
- df.to_excel(writer, index=False, sheet_name='Sheet1')
- print(f"共{lastpage}页,数据已保存到桌面。")
- import winsound
- winsound.Beep(440,1000)
复制代码- import os
- import sys
- import tkinter as tk
- from tkinter import filedialog
- from datetime import datetime
- import requests
- import json
- import pandas as pd
- import hashlib
- from fake_useragent import UserAgent
- import re
- def md5_hash(text):
- md5 = hashlib.md5()
- md5.update(text.encode('utf-8'))
- encrypted_text = md5.hexdigest()
- return encrypted_text
- url_cx = "https://fa8.pw/api/api.php?act=cx"
- head_cx = {
- "User-Agent": UserAgent().random,
- "Cookie": "__51vcke__KEAGaASdi4vVsbMk=1efc3001-fb11-50f9-9147-77f4f6eb3599; __51vuft__KEAGaASdi4vVsbMk=1712214416217; sign=8b8db337e75948fe5bb408040a5d9618; __51uvsct__KEAGaASdi4vVsbMk=15; swl=true; __vtins__KEAGaASdi4vVsbMk=%7B%22sid%22%3A%20%227009469d-4a54-519c-bd22-7eb46204f9e8%22%2C%20%22vd%22%3A%203%2C%20%22stt%22%3A%20199586%2C%20%22dr%22%3A%203335%2C%20%22expires%22%3A%201712976322096%2C%20%22ct%22%3A%201712974522096%7D",
- "Referer": "https://fa8.pw/"
- }
- dic_dq = {
- '艾欧尼亚': '1',
- '比尔吉沃特': '2',
- '祖安': '3',
- '诺克萨斯': '4',
- '德玛西亚': '6',
- '班德尔城': '5',
- '皮尔特沃夫': '7',
- '战争学院': '8',
- '弗雷尔卓德': '9',
- '巨神峰': '10',
- '雷瑟守备': '11',
- '无畏先锋': '12',
- '裁决之地': '13',
- '黑色玫瑰': '14',
- '暗影岛': '15',
- '钢铁烈阳': '17',
- '恕瑞玛': '16',
- '水晶之痕': '18',
- '影流': '22',
- '守望之海': '23',
- '扭曲丛林': '20',
- '征服之海': '24',
- '卡拉曼达': '25',
- '皮城警备': '27',
- '巨龙之巢': '26',
- '均衡教派': '19',
- '男爵领域': '30',
- '峡谷之巅': '31'
- }
- # 创建tkinter应用程序
- root = tk.Tk()
- root.withdraw() # 隐藏tkinter主窗口
- # 获取桌面路径
- desktop_path = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
- # 打开文件选择对话框,默认选择桌面路径
- file_path = filedialog.askopenfilename(initialdir=desktop_path)
- root.destroy()
- if file_path == "":
- sys.exit()
- df = pd.DataFrame(columns=["页码", "大区", "ID", "等级", "英雄", "皮肤", "单", "组", "最后游戏"])
- df1 = pd.read_excel(file_path)
- with pd.ExcelWriter(file_path) as writer:
- # 将原始数据保存在Excel文件的第二张表中
- df1.to_excel(writer, sheet_name='原始数据', index=False)
- # 处理数据并保存在第一张表中
- pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}'
- for index, row in df1.iterrows():
- my_skin = row["皮肤"]
- if my_skin < 450:
- continue
- mydq = row["大区"]
- mydq_num = dic_dq.get(row["大区"], "") # 获取大区名称对应的编号,如果找不到,默认为空字符串
- myname = row["ID"]
- mysc = md5_hash(f"name={myname}dq-{mydq_num}")
- mydata = {
- "name": myname,
- "dq": mydq_num,
- "start": "0",
- "end": "10",
- "type": "lol",
- "tag": "1",
- "sc": mysc
- }
- html = requests.post(url_cx, data=mydata, headers=head_cx)
- data = json.loads(html.text)
- if data['zhanji'] is None:
- continue
-
- if '过期' in data['zhanji']:
- print("已过免费查询时间段,程序退出。")
- sys.exit()
-
- if '没有' in data['zhanji']:
- continue
- # 正则匹配战绩中的第一个时间
- match = re.search(pattern, data['zhanji'])[0]
- skin_count = int(data['skin'].split("|")[1].split("</font>")[0].split("皮 肤:")[1]) # 皮肤数量
- last_match = data['lastGameDate'] # 最后一场战绩
- ds = data['dsdj'] # 单双排
- zp = data['lhdj'] # 组排
- # 在这里对应DataFrame的行索引与循环中的index值
- df.at[index, '皮肤'] = skin_count
- df.at[index, '单'] = ds
- df.at[index, '组'] = zp
- df.at[index, '最后游戏'] = match
- # 将其他字段也放在循环外面,避免重复赋值
- df["页码"] = df1["页码"]
- df["大区"] = df1["大区"]
- df["ID"] = df1["ID"]
- df["等级"] = df1["等级"]
- df["英雄"] = df1["英雄"]
- if df.empty:
- print("没有数据,程序退出。")
- sys.exit()
- df = df[df['皮肤'] > 400]
- df['最后游戏'] = df['最后游戏'].fillna('').str.slice(0, 10)
- df['最后游戏'] = df['最后游戏'].str.replace('-', '/')
- df.to_excel(writer, sheet_name='处理后数据', index=False)
- import winsound
- winsound.Beep(440, 1000)
复制代码- // 定义一个函数,用于连续点击链接元素
- function clickLinkMultipleTimesWithDelay(numClicks, delay) {
- // 获取链接元素
- var linkElement = document.getElementById('getE');
-
- // 如果链接元素存在,则执行点击操作
- if (linkElement) {
- var i = 0;
- function clickNext() {
- if (i < numClicks) {
- linkElement.click(); // 模拟点击链接元素
- i++;
- setTimeout(clickNext, delay); // 设置延迟后继续点击
- }
- }
- clickNext();
- } else {
- console.log('找不到链接元素。');
- }
- }
- // 调用函数,点击链接元素10次,每次点击间隔100毫秒(0.1秒)
- clickLinkMultipleTimesWithDelay(10, 100);
复制代码
|
|
|