【某趣阁】小说下载，纯python脚本，无需selenium等自动 ...

脚本说明
某趣阁小说下载，纯python脚本，无需selenium等自动化工具
命令行支持小说搜索，和下载
只需要request和bs4依赖
pip install requests bs4 -i https://pypi.tuna.tsinghua.edu.cn/simple
完整代码
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @file : main.py
import time
import urllib.parse
import requests
import copy
import os
from bs4 import BeautifulSoup
HEADERS = {
"authority": "www.biqg.cc",
"accept": "application/json",
"accept-language": "zh,en;q=0.9,zh-CN;q=0.8",
"cache-control": "no-cache",
"pragma": "no-cache",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
"x-requested-with": "XMLHttpRequest",
}
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DOWNLOAD_PATH = os.path.join(BASE_DIR, "result")
def get_hm_cookie(url):
session = requests.Session()
session.get(url=url, headers=HEADERS, timeout=10)
return session
def search(key_word):
new_header = copy.deepcopy(HEADERS)
new_header["referer"] = urllib.parse.quote(
      f"https://www.biqg.cc/s?q={key_word}", safe="/&=:?"
)
hm_url = urllib.parse.quote(
      f"https://www.biqg.cc/user/hm.html?q={key_word}", safe="/&=:?"
)
session = get_hm_cookie(hm_url)
params = {
      "q": f"{key_word}",
}
try:
      response = session.get(
         "https://www.biqg.cc/user/search.html",
         params=params,
         headers=new_header,
         timeout=10,
      )
except Exception as e:
      print(f"搜索{key_word}时失败，错误信息:{e}")
      return [], session
data = response.json()
return data, session
def download_by_tag(tag, href, result_path, session):
title = f"{tag.text}"
url = f"https://www.biqg.cc{href}"
print(f"开始下载第{title} url: {url}")
result_file_name = os.path.join(result_path, f"{title}.txt")
with open(result_file_name, "w+", encoding="utf-8") as f:
      content_response = session.get(url, headers=HEADERS)
      content_soup = BeautifulSoup(content_response.content, "html.parser")
      text = content_soup.find(id="chaptercontent")
      for i in text.get_text().split("　　")[1:-2]:
         f.write(f"{i}\n")
time.sleep(0.2)
def download_txt(download_url, path_name, session):
"""
下载小说
:param download_url: 下载链接
:param path_name: 存储文件名
:return:
"""
result_path = os.path.join(DOWNLOAD_PATH, path_name)
if not os.path.exists(result_path):
      os.makedirs(result_path, exist_ok=True)
try:
      response = session.get(download_url, headers=HEADERS, timeout=10)
      soup = BeautifulSoup(response.content, "html.parser")
      down_load_url = soup.select("div[class='listmain'] dl dd a")
      for tag in down_load_url:
         href = tag["href"]
         if href == "javascript:dd_show()":
            hide_dd = soup.select("span[class='dd_hide'] dd a")
            for hide_tag in hide_dd:
                  href = hide_tag["href"]
                  download_by_tag(hide_tag, href, result_path, session)
         else:
            download_by_tag(tag, href, result_path, session)
except Exception as e:
      import traceback
      print(traceback.format_exc())
      print(f"下载{download_url}失败，错误信息:{e}")
def run():
while True:
      keyword = input("请输入搜索的小说名or输入q退出:")
      if keyword.replace(" ", "").lower() == "q":
         break
      if not keyword:
         continue
      data_list, session = search(keyword)
      if not data_list or data_list == 1:
         print("请重试.......")
         continue
      for i in range(len(data_list)):
         item = data_list
         articlename = item.get("articlename")
         author = item.get("author")
         print(f"编号：{i} 书名：{articlename}----->{author}")
      while True:
         try:
            num_book = int(input("请输入需要下载的编号:"))
         except Exception:
            print("请输入正确的编号")
            continue
         try:
            item = data_list[num_book]
         except Exception:
            print("编号超出了预期，请请重新输入")
            continue
         break
      url_list = f"https://www.biqg.cc{item.get('url_list')}"
      print(f"开始下载{url_list}")
      path_name = f"{item.get('articlename', '')}___{item.get('author', '')}"
      download_txt(url_list, path_name, session)
if __name__ == "__main__":
run()
运行
python main.py
# 输入小说名
# 选择要下载的编号
# 等待下载就行了
常见问题
1、会存在偶尔失败问题，犹豫网络问题导致，失败后重新运行就行
运行截图

[email protected] (919.99 KB, 下载次数: 0)
下载附件
2024-11-23 15:49 上传

脚本, 编号

【某趣阁】小说下载，纯python脚本，无需selenium等自动化

相关帖子

热门主题

国产英伟达，摩尔把上市融资的75亿元拿去买

✅DMIT 三网 GIA CMIN2 MALIBU EB 维多利亚

有MJJ遇到过TG号全部设备都被登出了吗？

【快讯】HostHatch Seoul HH 新节点首尔

Hk-One-0.5G-52-LS 少量放貨速度

公司项目分享：硅谷人工智能公司 Nexa AI

拿到了 300 来部短剧的海外发行版权，下一

长话短说大家觉得花三十万结婚，存款花完

建议拉黑 IObit 旗下所有软件

重度苹果用户投华做了两面派

热门板块

公告

网站帮助 - Yoo趣儿

我们的愿景

在 Yoo趣儿投放广告

Yoo趣儿网站用户应遵守规则