From fad289c245913084355f14e28ac3e3be4a07655d Mon Sep 17 00:00:00 2001 From: SigureMo Date: Mon, 3 May 2021 16:04:09 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20batch=20get?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 - .vscode/settings.json | 6 ++ README.md | 19 ++++- justfile | 10 +++ yutto/__main__.py | 11 ++- yutto/api/acg_video.py | 2 + yutto/api/bangumi.py | 2 + yutto/cli/batch_get.py | 132 +++++++++++++++++++++++++++++++ yutto/cli/check_options.py | 32 +++++++- yutto/cli/get.py | 15 ++-- yutto/processor/downloader.py | 15 +++- yutto/processor/filter.py | 58 ++++++++++++++ yutto/processor/path_resolver.py | 0 yutto/processor/progressor.py | 6 +- yutto/utils/console/logger.py | 2 +- yutto/utils/ffmpeg.py | 2 + 16 files changed, 288 insertions(+), 27 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 yutto/cli/batch_get.py create mode 100644 yutto/processor/path_resolver.py diff --git a/.gitignore b/.gitignore index 610e32f..30e5f03 100644 --- a/.gitignore +++ b/.gitignore @@ -117,9 +117,6 @@ dmypy.json # macOS .DS_Store -# Editor/IDE Configures -.vscode - # Media files *.aac *.mp3 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..15703f7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "python.formatting.provider": "black", + "python.formatting.blackArgs": ["--line-length", "120"], + "python.pythonPath": "/opt/homebrew/bin/python3", + "python.analysis.typeCheckingMode": "strict" +} diff --git a/README.md b/README.md index 2edc64a..649c5e7 100644 --- a/README.md +++ b/README.md @@ -14,22 +14,33 @@ yutto,一个可爱且任性的 B 站下载器(CLI) ## 可用程度 -现在只能下载单话番剧,但如果我想做的话很快各种功能就可以做好了,毕竟 baseline 都搭好了。 +安装预览版: + +```bash +pip install --pre yutto +``` + +现在只能简单地下载视频,字幕弹幕等功能还没有做。 由于 yutto 的弹幕支持方式可能需要考虑一段时间,暂时我不太想用 danmaku2ass,所以关于弹幕的支持会延后一段时间。 现在可以通过以下命令来尝试下载《転スラ日記》第一话 ```bash -pip install --pre yutto yutto -q 64 get https://www.bilibili.com/bangumi/play/ep395211 ``` +或者通过 batch get 命令也是可以的 + +```bash +yutto -q 64 batch get https://www.bilibili.com/bangumi/play/ep395211 -p 1 +``` + ## TODO List - [ ] 好多,不知道该写些啥,等剩余任务较少时候再来写吧…… ## References -- https://github.com/SigureMo/bilili -- https://github.com/changmenseng/AsyncBilibiliDownloader +- 基本结构: +- 协程下载: diff --git a/justfile b/justfile index 974b106..1481093 100644 --- a/justfile +++ b/justfile @@ -24,3 +24,13 @@ upgrade-pip: upgrade: python setup.py build python setup.py install + +clean: + rm *.m4s + rm *.mp4 + rm *.aac + +clean-builds: + rm -rf build/ + rm -rf dist/ + rm -rf yutto.egg-info/ diff --git a/yutto/__main__.py b/yutto/__main__.py index 23426fb..5a16b39 100644 --- a/yutto/__main__.py +++ b/yutto/__main__.py @@ -1,6 +1,6 @@ import argparse -from yutto.cli import get, info, check_options +from yutto.cli import get, info, batch_get, check_options from yutto.__version__ import __version__ from yutto.utils.ffmpeg import FFmpeg from yutto.utils.console.colorful import colored_string @@ -39,6 +39,8 @@ def main(): parser.add_argument("-c", "--sessdata", default="", help="Cookies 中的 SESSDATA 字段") parser.add_argument("--path-pattern", default="{auto}", help="多级目录的存储路径 Pattern") parser.add_argument("--no-subtitle", action="store_true", help="不生成字幕文件") + parser.add_argument("--embed-danmaku", action="store_true", help="将弹幕文件嵌入到视频中") + parser.add_argument("--embed-subtitle", default=None, help="将字幕文件嵌入到视频中(需输入语言代码)") parser.add_argument("--no-color", action="store_true", help="不使用颜色") parser.add_argument("--debug", action="store_true", help="启用 debug 模式") parser.set_defaults(action=run) @@ -50,8 +52,11 @@ def main(): # 子命令 info # TODO # 子命令 batch - # TODO - + parser_batch = subparsers.add_parser("batch", help="批量获取视频") + subparsers_batch = parser_batch.add_subparsers() + # 子命令 batch get + parser_batch_get = subparsers_batch.add_parser("get", help="批量获取视频") + batch_get.add_get_arguments(parser_batch_get) # 执行各自的 action args = parser.parse_args() check_options.check_basic_options(args) diff --git a/yutto/api/acg_video.py b/yutto/api/acg_video.py index b4dcbc6..4a10cdb 100644 --- a/yutto/api/acg_video.py +++ b/yutto/api/acg_video.py @@ -57,6 +57,8 @@ async def get_acg_video_playurl( if not resp.ok: raise NoAccessError("无法下载该视频(cid: {cid})".format(cid=cid)) resp_json = await resp.json() + if resp_json.get("data") is None: + raise NoAccessError("无法下载该视频(cid: {cid}),原因:{msg}".format(cid=cid, msg=resp_json.get("message"))) if resp_json["data"].get("dash") is None: raise UnSupportedTypeError("该视频(cid: {cid})尚不支持 DASH 格式".format(cid=cid)) return ( diff --git a/yutto/api/bangumi.py b/yutto/api/bangumi.py index d20995f..10fdb64 100644 --- a/yutto/api/bangumi.py +++ b/yutto/api/bangumi.py @@ -96,6 +96,8 @@ async def get_bangumi_playurl( if not resp.ok: raise NoAccessError("无法下载该视频(cid: {cid})".format(cid=cid)) resp_json = await resp.json() + if resp_json.get("result") is None: + raise NoAccessError("无法下载该视频(cid: {cid}),原因:{msg}".format(cid=cid, msg=resp_json.get("message"))) if resp_json["result"].get("dash") is None: raise UnSupportedTypeError("该视频(cid: {cid})尚不支持 DASH 格式".format(cid=cid)) if resp_json["result"]["is_preview"] == 1: diff --git a/yutto/cli/batch_get.py b/yutto/cli/batch_get.py new file mode 100644 index 0000000..1932019 --- /dev/null +++ b/yutto/cli/batch_get.py @@ -0,0 +1,132 @@ +import argparse +import os +import sys + +import aiohttp + +from yutto.api.acg_video import get_acg_video_list, get_acg_video_playurl, get_acg_video_title +from yutto.api.bangumi import ( + get_bangumi_list, + get_bangumi_playurl, + get_bangumi_title, + get_season_id_by_episode_id, + get_season_id_by_media_id, +) +from yutto.api.types import AId, AudioUrlMeta, AvId, BvId, CId, EpisodeId, MediaId, SeasonId, VideoUrlMeta +from yutto.cli import check_options +from yutto.processor.crawler import gen_cookies, gen_headers +from yutto.processor.downloader import download_video +from yutto.processor.filter import parse_episodes +from yutto.processor.urlparser import ( + regexp_acg_video_av, + regexp_acg_video_av_short, + regexp_acg_video_bv, + regexp_acg_video_bv_short, + regexp_bangumi_ep, + regexp_bangumi_ep_short, + regexp_bangumi_md, + regexp_bangumi_ss, + regexp_bangumi_ss_short, +) +from yutto.utils.console.formatter import repair_filename +from yutto.utils.console.logger import Badge, Logger +from yutto.utils.functiontools.sync import sync + + +def add_get_arguments(parser: argparse.ArgumentParser): + parser.add_argument("url", help="视频主页 url") + parser.add_argument("-p", "--episodes", default="^~$", help="选集") + parser.add_argument("-s", "--with-section", action="store_true", help="同时下载附加剧集(PV、预告以及特别篇等专区内容)") + parser.set_defaults(action=run) + + +@sync +async def run(args: argparse.Namespace): + check_options.check_batch_options(args) + async with aiohttp.ClientSession( + headers=gen_headers(), + cookies=gen_cookies(args.sessdata), + timeout=aiohttp.ClientTimeout(total=5), + ) as session: + download_list: list[tuple[list[VideoUrlMeta], list[AudioUrlMeta], str]] = [] + if ( + (match_obj := regexp_bangumi_ep.match(args.url)) + or (match_obj := regexp_bangumi_ep_short.match(args.url)) + or (match_obj := regexp_bangumi_ss.match(args.url)) + or (match_obj := regexp_bangumi_ss_short.match(args.url)) + or (match_obj := regexp_bangumi_md.match(args.url)) + ): + # 匹配为番剧 + if "episode_id" in match_obj.groupdict().keys(): + episode_id = EpisodeId(match_obj.group("episode_id")) + season_id = await get_season_id_by_episode_id(session, episode_id) + elif "season_id" in match_obj.groupdict().keys(): + season_id = SeasonId(match_obj.group("season_id")) + else: + media_id = MediaId(match_obj.group("media_id")) + season_id = await get_season_id_by_media_id(session, media_id) + title = await get_bangumi_title(session, season_id) + Logger.custom(title, Badge("番剧", fore="black", back="cyan")) + bangumi_list = await get_bangumi_list(session, season_id) + # 如果没有 with_section 则不需要专区内容 + bangumi_list = list(filter(lambda item: args.with_section or not item["is_section"], bangumi_list)) + # 选集过滤 + episodes = parse_episodes(args.episodes, len(bangumi_list)) + bangumi_list = list(filter(lambda item: item["id"] in episodes, bangumi_list)) + for i, bangumi_item in enumerate(bangumi_list): + Logger.info("正在努力解析第 {}/{} 个视频".format(i + 1, len(bangumi_list)), end="\r") + avid = bangumi_item["avid"] + cid = bangumi_item["cid"] + episode_id = bangumi_item["episode_id"] + filename = bangumi_item["name"] + videos, audios = await get_bangumi_playurl(session, avid, episode_id, cid) + # TODO: 根据 Path Pattern 动态决定位置 + download_list.append((videos, audios, filename)) + elif ( + (match_obj := regexp_acg_video_av.match(args.url)) + or (match_obj := regexp_acg_video_av_short.match(args.url)) + or (match_obj := regexp_acg_video_bv.match(args.url)) + or (match_obj := regexp_acg_video_bv_short.match(args.url)) + ): + # 匹配为投稿视频 + if "aid" in match_obj.groupdict().keys(): + avid = AId(match_obj.group("aid")) + else: + avid = BvId(match_obj.group("bvid")) + title = await get_acg_video_title(session, avid) + Logger.custom(title, Badge("投稿视频", fore="black", back="cyan")) + acg_video_list = await get_acg_video_list(session, avid) + # 选集过滤 + episodes = parse_episodes(args.episodes, len(acg_video_list)) + acg_video_list = list(filter(lambda item: item["id"] in episodes, acg_video_list)) + for i, acg_video_item in enumerate(acg_video_list): + Logger.info("正在努力解析第 {}/{} 个视频".format(i + 1, len(acg_video_list)), end="\r") + cid = acg_video_item["cid"] + filename = acg_video_item["name"] + videos, audios = await get_acg_video_playurl(session, avid, cid) + # TODO: 根据 Path Pattern 动态决定位置 + download_list.append((videos, audios, filename)) + else: + Logger.error("url 不正确~") + sys.exit(1) + for videos, audios, filename in download_list: + await download_video( + session, + videos, + audios, + args.dir, + repair_filename(filename), + { + "require_video": args.require_video, + "video_quality": args.video_quality, + "video_download_codec": args.vcodec.split(":")[0], + "video_save_codec": args.vcodec.split(":")[1], + "require_audio": args.require_audio, + "audio_quality": args.audio_quality, + "audio_download_codec": args.acodec.split(":")[0], + "audio_save_codec": args.acodec.split(":")[1], + "overwrite": args.overwrite, + "block_size": int(args.block_size * 1024 * 1024), + "num_workers": args.num_workers, + }, + ) diff --git a/yutto/cli/check_options.py b/yutto/cli/check_options.py index f0c3de5..750d0e1 100644 --- a/yutto/cli/check_options.py +++ b/yutto/cli/check_options.py @@ -12,6 +12,7 @@ from yutto.utils.asynclib import install_uvloop from yutto.utils.console.colorful import set_no_color from yutto.utils.console.logger import Badge, Logger, set_logger_debug from yutto.utils.ffmpeg import FFmpeg +from yutto.processor.filter import check_episodes def check_basic_options(args: argparse.Namespace): @@ -20,7 +21,7 @@ def check_basic_options(args: argparse.Namespace): ffmpeg = FFmpeg() # 在使用 --no-color 或者环境变量 NO_COLOR 非空时都应该不显示颜色 - # Also see: https://no-color.org/ + # See also: https://no-color.org/ if args.no_color or os.environ.get("NO_COLOR"): set_no_color() @@ -28,7 +29,7 @@ def check_basic_options(args: argparse.Namespace): if args.debug: set_logger_debug() else: - # 为保证协程任务的可读性,仅在非 debug 模式启用 uvloop + # 为保证协程错误栈的可读性,debug 模式不启用 uvloop install_uvloop() # vcodec 检查 @@ -76,13 +77,36 @@ def check_basic_options(args: argparse.Namespace): # TODO: proxy 检验 + # 不下载视频无法嵌入字幕 + if not args.require_video and args.embed_subtitle: + Logger.error("不下载视频时无法嵌入字幕") + sys.exit(1) + + # 不下载视频无法嵌入弹幕 + if not args.require_video and args.embed_danmaku: + Logger.error("不下载视频时无法嵌入弹幕") + sys.exit(1) + + # 嵌入弹幕功能仅支持 ASS 弹幕 + if args.embed_danmaku and args.danmaku != "ass": + Logger.error("嵌入弹幕功能仅支持 ASS 弹幕") + sys.exit(1) + # 大会员身份校验 if not args.sessdata: - Logger.warning("未提供 SESSDATA,无法下载会员专属剧集") + Logger.warning("未提供 SESSDATA,无法下载会员专享剧集") elif asyncio.run(check_is_vip(args.sessdata)): Logger.custom("成功以大会员身份登录~", badge=Badge("大会员", fore="white", back="magenta")) else: - Logger.warning("以非大会员身份登录,无法下载会员专属剧集") + Logger.warning("以非大会员身份登录,无法下载会员专享剧集") + + +def check_batch_options(args: argparse.Namespace): + """ 检查批量下载相关选项 """ + # 检查 episodes 格式(简单的正则检查,后续过滤剧集时还有完整检查) + if not check_episodes(args.episodes): + Logger.error("选集参数({})格式不正确".format(args.episodes)) + sys.exit(1) async def check_is_vip(sessdata: str = "") -> bool: diff --git a/yutto/cli/get.py b/yutto/cli/get.py index a7b0a63..5ade973 100644 --- a/yutto/cli/get.py +++ b/yutto/cli/get.py @@ -29,19 +29,17 @@ def add_get_arguments(parser: argparse.ArgumentParser): @sync async def run(args: argparse.Namespace): - # args.sessdata = "0a7f9758%2C1629361847%2Ca86ac*21" - # # args.sessdata = "" - # sessdata = "0a7f9758%2C1629361847%2Ca86ac*21" - # # sessdata = "dfasdlfsa" async with aiohttp.ClientSession( headers=gen_headers(), cookies=gen_cookies(args.sessdata), - cookie_jar=aiohttp.DummyCookieJar(), timeout=aiohttp.ClientTimeout(total=5), ) as session: if (match_obj := regexp_bangumi_ep.match(args.url)) or (match_obj := regexp_bangumi_ep_short.match(args.url)): + # 匹配为番剧 episode_id = EpisodeId(match_obj.group("episode_id")) season_id = await get_season_id_by_episode_id(session, episode_id) + title = await get_bangumi_title(session, season_id) + Logger.custom(title, Badge("番剧", fore="black", back="cyan")) bangumi_list = await get_bangumi_list(session, season_id) for bangumi_item in bangumi_list: if bangumi_item["episode_id"] == episode_id: @@ -53,14 +51,13 @@ async def run(args: argparse.Namespace): Logger.error("在列表中未找到该剧集") sys.exit(1) videos, audios = await get_bangumi_playurl(session, avid, episode_id, cid) - title = await get_bangumi_title(session, season_id) - Logger.custom(title, Badge("番剧", fore="black", back="cyan")) elif ( (match_obj := regexp_acg_video_av.match(args.url)) or (match_obj := regexp_acg_video_av_short.match(args.url)) or (match_obj := regexp_acg_video_bv.match(args.url)) or (match_obj := regexp_acg_video_bv_short.match(args.url)) ): + # 匹配为投稿视频 page: int = 1 if "aid" in match_obj.groupdict().keys(): avid = AId(match_obj.group("aid")) @@ -68,12 +65,12 @@ async def run(args: argparse.Namespace): avid = BvId(match_obj.group("bvid")) if match_obj.group("page") is not None: page = int(match_obj.group("page")) + title = await get_acg_video_title(session, avid) + Logger.custom(title, Badge("投稿视频", fore="black", back="cyan")) acg_video_list = await get_acg_video_list(session, avid) cid = acg_video_list[page - 1]["cid"] filename = acg_video_list[page - 1]["name"] videos, audios = await get_acg_video_playurl(session, avid, cid) - title = await get_acg_video_title(session, avid) - Logger.custom(title, Badge("投稿视频", fore="black", back="cyan")) else: Logger.error("url 不正确~") sys.exit(1) diff --git a/yutto/processor/downloader.py b/yutto/processor/downloader.py index 69269a8..e707837 100644 --- a/yutto/processor/downloader.py +++ b/yutto/processor/downloader.py @@ -42,6 +42,20 @@ def slice(start: int, total_size: Optional[int], block_size: Optional[int] = Non def combine(*l_list: list[Any]) -> list[Any]: + """将多个 list 「均匀」地合并到一个 list + + # example + + ``` + l_list = [ + [1, 2, 3, 4, 5], + [6, 7, 8], + [9, 10, 11, 12] + ] + combine(l_list) + # [1, 6, 9, 2, 7, 10, 3, 8, 11, 4, 12, 5] + ``` + """ results: list[Any] = [] for i in range(max([len(l) for l in l_list])): for l in l_list: @@ -154,7 +168,6 @@ async def download_video( args.extend(["-y"]) args.append(output_path) - Logger.debug("FFmpeg > ffmpeg {}".format(" ".join(args))) ffmpeg.exec(args) # fmt: on Logger.info("合并完成!") diff --git a/yutto/processor/filter.py b/yutto/processor/filter.py index 6884539..e2ed84e 100644 --- a/yutto/processor/filter.py +++ b/yutto/processor/filter.py @@ -1,8 +1,11 @@ +import re +import sys from typing import Optional, TypeVar from yutto.api.acg_video import AudioUrlMeta, VideoUrlMeta from yutto.media.codec import AudioCodec, VideoCodec, gen_acodec_priority, gen_vcodec_priority from yutto.media.quality import AudioQuality, VideoQuality, gen_audio_quality_priority, gen_video_quality_priority +from yutto.utils.console.logger import Logger def select_video( @@ -70,3 +73,58 @@ def filter_none_value(l: list[Optional[T]]) -> list[T]: return result # ? 不清楚直接这么写为什么类型不匹配 # return list(filter(lambda x: x is not None, l)) + + +def check_episodes(episodes_str: str) -> bool: + return bool(re.match(r"([\-\d\^\$]+(~[\-\d\^\$]+)?)(,[\-\d\^\$]+(~[\-\d\^\$]+)?)*", episodes_str)) + + +def parse_episodes(episodes_str: str, total: int) -> list[int]: + """ 将选集字符串转为列表(标号从 1 开始) """ + + def reslove_negetive(value: int) -> int: + if value == 0: + Logger.error("不可使用 0 作为剧集号(剧集号从 1 开始计算)") + sys.exit(1) + return value if value > 0 else value + total + 1 + + # 解析字符串为列表 + Logger.info("全 {} 话".format(total)) + if check_episodes(episodes_str): + episodes_str = episodes_str.replace("^", "1") + episodes_str = episodes_str.replace("$", "-1") + episode_list: list[int] = [] + for episode_item in episodes_str.split(","): + if "~" in episode_item: + start, end = episode_item.split("~") + start, end = int(start), int(end) + start, end = reslove_negetive(start), reslove_negetive(end) + if not (end >= start): + Logger.error("终点值({})应不小于起点值({})".format(end, start)) + sys.exit(1) + episode_list.extend(list(range(start, end + 1))) + else: + episode_item = int(episode_item) + episode_item = reslove_negetive(episode_item) + episode_list.append(episode_item) + else: + episode_list = [] + + episode_list = sorted(list(set(episode_list))) + + # 筛选满足条件的剧集 + out_of_range: list[int] = [] + episodes: list[int] = [] + for episode in episode_list: + if episode in range(1, total + 1): + if episode not in episodes: + episodes.append(episode) + else: + out_of_range.append(episode) + if out_of_range: + Logger.warning("剧集 {} 不存在".format(",".join(list(map(str, out_of_range))))) + + Logger.info("已选择第 {} 话".format(",".join(list(map(str, episodes))))) + if not episodes: + Logger.warning("没有选中任何剧集") + return episodes diff --git a/yutto/processor/path_resolver.py b/yutto/processor/path_resolver.py new file mode 100644 index 0000000..e69de29 diff --git a/yutto/processor/progressor.py b/yutto/processor/progressor.py index 3a53c4a..a1ac38c 100644 --- a/yutto/processor/progressor.py +++ b/yutto/processor/progressor.py @@ -3,6 +3,7 @@ import time from yutto.utils.console.formatter import size_format from yutto.utils.file_buffer import AsyncFileBuffer +from yutto.utils.console.logger import Logger async def show_progress(file_buffers: list[AsyncFileBuffer], total_size: int): @@ -19,14 +20,15 @@ async def show_progress(file_buffers: list[AsyncFileBuffer], total_size: int): size_now = size_written + size_in_buffer speed = (size_now - size) / (t_now - t + 10 ** -6) - print( - "{} {}({} 块) {} {}/s".format( + Logger.print( + "[File: {:>10} + Buffer: {:>10}({:>4} 块)]/{:>10} {:>10}/s".format( size_format(size_written), size_format(size_in_buffer), sum([len(file_buffer.buffer) for file_buffer in file_buffers]), size_format(total_size), size_format(speed), ), + end="\r", ) t, size = t_now, size_now await asyncio.sleep(0.5) diff --git a/yutto/utils/console/logger.py b/yutto/utils/console/logger.py index 8be5d81..a89397e 100644 --- a/yutto/utils/console/logger.py +++ b/yutto/utils/console/logger.py @@ -41,7 +41,7 @@ class Badge: WARNING_BADGE = Badge("WARN", fore="black", back="yellow") ERROR_BADGE = Badge("ERROR", fore="white", back="red") INFO_BADGE = Badge("INFO", fore="black", back="green") -DEBUG_BADGE = Badge("ERROR", fore="black", back="blue") +DEBUG_BADGE = Badge("DEBUG", fore="black", back="blue") class Logger(metaclass=Singleton): diff --git a/yutto/utils/ffmpeg.py b/yutto/utils/ffmpeg.py index fe03d3a..6ce18c7 100644 --- a/yutto/utils/ffmpeg.py +++ b/yutto/utils/ffmpeg.py @@ -5,6 +5,7 @@ import subprocess from functools import cached_property from yutto.utils.functiontools.singleton import Singleton +from yutto.utils.console.logger import Logger class FFmpegNotFoundError(Exception): @@ -25,6 +26,7 @@ class FFmpeg(object, metaclass=Singleton): def exec(self, args: list[str]): cmd = [self.path] cmd.extend(args) + Logger.debug(" ".join(cmd)) return subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @cached_property