limit超过4500就没有办法爬取 网址https://www.sceea.cn/Information/Sunshine?id=210&nf=2023#
[Python] 纯文本查看 复制代码import requests
import json
web_url = "https://www.sceea.cn/Information/GetSunshineList"
payload = {
"Limit": "200000",
"Page": "1",
"gslbid": "210",
"domParam": "",
"Key": ""
}
headers = {
"Origin": "https://www.sceea.cn",
"Referer": "https://wwwsceeacn/Information/Sunshine?id=210&nf=2023",
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8"
}
response = requests.post(web_url, data=payload, headers=headers)
json_data = response.json()["gsb"]
print(response.text)
ar_data = []
for item in json_data:
ar_data.append([
item["GSXM1"],
item["GSXM2"],
item["GSXM3"],
item["GSXM4"],
item["GSXM5"],
item["GSXM6"],
item["GSXM7"],
item["GSLBID"],
item["ID"]
])
# 将数据写入Excel文件中的Sheet2
import openpyxl
workbook = openpyxl.Workbook()
sheet = workbook.active
for row in range(len(ar_data)):
for col in range(len(ar_data[row])):
sheet.cell(row=row+2, column=col+1).value = ar_data[row][col]
workbook.save("output.xlsx")