[Python] 纯文本查看 复制代码import asyncio
import aiohttp
import requests
from lxml import etree
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import os
import edge_tts
import asyncio
class biQuGe:
def __init__(self):
self.browser_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ Safari/537.36 Edg/'
self.novel_url = 'http://www.ibiquzw.org/24_24218/'
self.timeout = 5
self.chapter_info = []
self.novel_name = None
self.novel_txt_path = None
self.novel_mp3_path = None
def get_novel_info(self):
r = requests.get(url=self.novel_url, headers=self.browser_header, timeout=self.timeout)
r.encoding = r.apparent_encoding
h = etree.HTML(r.text)
chapter_name = h.xpath('//div[@id="list"]//a/text()')
chapter_link = h.xpath('//div[@id="list"]//a/@href')
self.novel_name = h.xpath('//h1/text()')[0]
chapter_name = ['章 '.join(str(i).split('章')) for i in chapter_name]
chapter_name = [' '.join(str(i).split()) for i in chapter_name]
chapter_link = ['http://www.ibiquzw.org/' + i for i in chapter_link]
chapter_info = [list(i) for i in zip(chapter_name[12:], chapter_link[12:])]
for i in range(len(chapter_info)):
xx = str(i + 1).zfill(4)
chapter_info[i][0] = xx + "_" + str(chapter_info[i][0])
self.chapter_info = chapter_info
def get_chapter_content(self, info):
async def run_tts(text: str, output: str, voice: str = 'zh-CN-YunxiNeural') -> None:
communicate = edge_tts.Communicate(text, voice)
await communicate.save(output)
chapter_link = info[1]
chapter_name = info[0]
chapter_txt_path = self.novel_txt_path + chapter_name + '.txt'
chapter_mp3_path = self.novel_mp3_path + chapter_name + '.mp3'
if os.path.isfile(chapter_txt_path):
print('正在下载:', chapter_name)
r = requests.get(url=chapter_link, headers=self.browser_header)
r.encoding = r.apparent_encoding
h = etree.HTML(r.text)
data = h.xpath('//div[@id="content"]/text()')
data = [''.join(str(i).split()) for i in data]
data = [i for i in data if i != '']
# data = [i.replace('|天才一秒记住言情小说s23us.com', '') for i in data]
# print(data)
text = ''
if data:
for i in data:
text += i
text += '\n'
print('\t正在转换:', chapter_name)
asyncio.run(run_tts(text, chapter_mp3_path))
with open(file=chapter_txt_path, mode='w+', encoding='utf-8') as f:
for i in data:
print('\t\t转换成功:', chapter_name)
def main(self):
self.novel_txt_path = './' + self.novel_name + '/txt/'
self.novel_mp3_path = './' + self.novel_name + '/mp3/'
os.makedirs(self.novel_txt_path, exist_ok=True)
os.makedirs(self.novel_mp3_path, exist_ok=True)
with ThreadPoolExecutor(max_workers=10) as executor:
futures = [executor.submit(self.get_chapter_content, i) for i in self.chapter_info]
for future in as_completed(futures):
result = future.result()
if __name__ == "__main__":
novel = biQuGe()