feat: support batch get

This commit is contained in:
SigureMo 2021-05-03 16:04:09 +08:00
parent a3ad5adf1b
commit fad289c245
No known key found for this signature in database
GPG Key ID: F99A3CD7BD76B247
16 changed files with 288 additions and 27 deletions

3
.gitignore vendored
View File

@ -117,9 +117,6 @@ dmypy.json
# macOS
.DS_Store
# Editor/IDE Configures
.vscode
# Media files
*.aac
*.mp3

6
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,6 @@
{
"python.formatting.provider": "black",
"python.formatting.blackArgs": ["--line-length", "120"],
"python.pythonPath": "/opt/homebrew/bin/python3",
"python.analysis.typeCheckingMode": "strict"
}

View File

@ -14,22 +14,33 @@ yutto一个可爱且任性的 B 站下载器CLI
## 可用程度
现在只能下载单话番剧,但如果我想做的话很快各种功能就可以做好了,毕竟 baseline 都搭好了。
安装预览版:
```bash
pip install --pre yutto
```
现在只能简单地下载视频,字幕弹幕等功能还没有做。
由于 yutto 的弹幕支持方式可能需要考虑一段时间,暂时我不太想用 danmaku2ass所以关于弹幕的支持会延后一段时间。
现在可以通过以下命令来尝试下载《転スラ日記》第一话
```bash
pip install --pre yutto
yutto -q 64 get https://www.bilibili.com/bangumi/play/ep395211
```
或者通过 batch get 命令也是可以的
```bash
yutto -q 64 batch get https://www.bilibili.com/bangumi/play/ep395211 -p 1
```
## TODO List
- [ ] 好多,不知道该写些啥,等剩余任务较少时候再来写吧……
## References
- https://github.com/SigureMo/bilili
- https://github.com/changmenseng/AsyncBilibiliDownloader
- 基本结构:<https://github.com/SigureMo/bilili>
- 协程下载:<https://github.com/changmenseng/AsyncBilibiliDownloader>

View File

@ -24,3 +24,13 @@ upgrade-pip:
upgrade:
python setup.py build
python setup.py install
clean:
rm *.m4s
rm *.mp4
rm *.aac
clean-builds:
rm -rf build/
rm -rf dist/
rm -rf yutto.egg-info/

View File

@ -1,6 +1,6 @@
import argparse
from yutto.cli import get, info, check_options
from yutto.cli import get, info, batch_get, check_options
from yutto.__version__ import __version__
from yutto.utils.ffmpeg import FFmpeg
from yutto.utils.console.colorful import colored_string
@ -39,6 +39,8 @@ def main():
parser.add_argument("-c", "--sessdata", default="", help="Cookies 中的 SESSDATA 字段")
parser.add_argument("--path-pattern", default="{auto}", help="多级目录的存储路径 Pattern")
parser.add_argument("--no-subtitle", action="store_true", help="不生成字幕文件")
parser.add_argument("--embed-danmaku", action="store_true", help="将弹幕文件嵌入到视频中")
parser.add_argument("--embed-subtitle", default=None, help="将字幕文件嵌入到视频中(需输入语言代码)")
parser.add_argument("--no-color", action="store_true", help="不使用颜色")
parser.add_argument("--debug", action="store_true", help="启用 debug 模式")
parser.set_defaults(action=run)
@ -50,8 +52,11 @@ def main():
# 子命令 info
# TODO
# 子命令 batch
# TODO
parser_batch = subparsers.add_parser("batch", help="批量获取视频")
subparsers_batch = parser_batch.add_subparsers()
# 子命令 batch get
parser_batch_get = subparsers_batch.add_parser("get", help="批量获取视频")
batch_get.add_get_arguments(parser_batch_get)
# 执行各自的 action
args = parser.parse_args()
check_options.check_basic_options(args)

View File

@ -57,6 +57,8 @@ async def get_acg_video_playurl(
if not resp.ok:
raise NoAccessError("无法下载该视频cid: {cid}".format(cid=cid))
resp_json = await resp.json()
if resp_json.get("data") is None:
raise NoAccessError("无法下载该视频cid: {cid}),原因:{msg}".format(cid=cid, msg=resp_json.get("message")))
if resp_json["data"].get("dash") is None:
raise UnSupportedTypeError("该视频cid: {cid})尚不支持 DASH 格式".format(cid=cid))
return (

View File

@ -96,6 +96,8 @@ async def get_bangumi_playurl(
if not resp.ok:
raise NoAccessError("无法下载该视频cid: {cid}".format(cid=cid))
resp_json = await resp.json()
if resp_json.get("result") is None:
raise NoAccessError("无法下载该视频cid: {cid}),原因:{msg}".format(cid=cid, msg=resp_json.get("message")))
if resp_json["result"].get("dash") is None:
raise UnSupportedTypeError("该视频cid: {cid})尚不支持 DASH 格式".format(cid=cid))
if resp_json["result"]["is_preview"] == 1:

132
yutto/cli/batch_get.py Normal file
View File

@ -0,0 +1,132 @@
import argparse
import os
import sys
import aiohttp
from yutto.api.acg_video import get_acg_video_list, get_acg_video_playurl, get_acg_video_title
from yutto.api.bangumi import (
get_bangumi_list,
get_bangumi_playurl,
get_bangumi_title,
get_season_id_by_episode_id,
get_season_id_by_media_id,
)
from yutto.api.types import AId, AudioUrlMeta, AvId, BvId, CId, EpisodeId, MediaId, SeasonId, VideoUrlMeta
from yutto.cli import check_options
from yutto.processor.crawler import gen_cookies, gen_headers
from yutto.processor.downloader import download_video
from yutto.processor.filter import parse_episodes
from yutto.processor.urlparser import (
regexp_acg_video_av,
regexp_acg_video_av_short,
regexp_acg_video_bv,
regexp_acg_video_bv_short,
regexp_bangumi_ep,
regexp_bangumi_ep_short,
regexp_bangumi_md,
regexp_bangumi_ss,
regexp_bangumi_ss_short,
)
from yutto.utils.console.formatter import repair_filename
from yutto.utils.console.logger import Badge, Logger
from yutto.utils.functiontools.sync import sync
def add_get_arguments(parser: argparse.ArgumentParser):
parser.add_argument("url", help="视频主页 url")
parser.add_argument("-p", "--episodes", default="^~$", help="选集")
parser.add_argument("-s", "--with-section", action="store_true", help="同时下载附加剧集PV、预告以及特别篇等专区内容")
parser.set_defaults(action=run)
@sync
async def run(args: argparse.Namespace):
check_options.check_batch_options(args)
async with aiohttp.ClientSession(
headers=gen_headers(),
cookies=gen_cookies(args.sessdata),
timeout=aiohttp.ClientTimeout(total=5),
) as session:
download_list: list[tuple[list[VideoUrlMeta], list[AudioUrlMeta], str]] = []
if (
(match_obj := regexp_bangumi_ep.match(args.url))
or (match_obj := regexp_bangumi_ep_short.match(args.url))
or (match_obj := regexp_bangumi_ss.match(args.url))
or (match_obj := regexp_bangumi_ss_short.match(args.url))
or (match_obj := regexp_bangumi_md.match(args.url))
):
# 匹配为番剧
if "episode_id" in match_obj.groupdict().keys():
episode_id = EpisodeId(match_obj.group("episode_id"))
season_id = await get_season_id_by_episode_id(session, episode_id)
elif "season_id" in match_obj.groupdict().keys():
season_id = SeasonId(match_obj.group("season_id"))
else:
media_id = MediaId(match_obj.group("media_id"))
season_id = await get_season_id_by_media_id(session, media_id)
title = await get_bangumi_title(session, season_id)
Logger.custom(title, Badge("番剧", fore="black", back="cyan"))
bangumi_list = await get_bangumi_list(session, season_id)
# 如果没有 with_section 则不需要专区内容
bangumi_list = list(filter(lambda item: args.with_section or not item["is_section"], bangumi_list))
# 选集过滤
episodes = parse_episodes(args.episodes, len(bangumi_list))
bangumi_list = list(filter(lambda item: item["id"] in episodes, bangumi_list))
for i, bangumi_item in enumerate(bangumi_list):
Logger.info("正在努力解析第 {}/{} 个视频".format(i + 1, len(bangumi_list)), end="\r")
avid = bangumi_item["avid"]
cid = bangumi_item["cid"]
episode_id = bangumi_item["episode_id"]
filename = bangumi_item["name"]
videos, audios = await get_bangumi_playurl(session, avid, episode_id, cid)
# TODO: 根据 Path Pattern 动态决定位置
download_list.append((videos, audios, filename))
elif (
(match_obj := regexp_acg_video_av.match(args.url))
or (match_obj := regexp_acg_video_av_short.match(args.url))
or (match_obj := regexp_acg_video_bv.match(args.url))
or (match_obj := regexp_acg_video_bv_short.match(args.url))
):
# 匹配为投稿视频
if "aid" in match_obj.groupdict().keys():
avid = AId(match_obj.group("aid"))
else:
avid = BvId(match_obj.group("bvid"))
title = await get_acg_video_title(session, avid)
Logger.custom(title, Badge("投稿视频", fore="black", back="cyan"))
acg_video_list = await get_acg_video_list(session, avid)
# 选集过滤
episodes = parse_episodes(args.episodes, len(acg_video_list))
acg_video_list = list(filter(lambda item: item["id"] in episodes, acg_video_list))
for i, acg_video_item in enumerate(acg_video_list):
Logger.info("正在努力解析第 {}/{} 个视频".format(i + 1, len(acg_video_list)), end="\r")
cid = acg_video_item["cid"]
filename = acg_video_item["name"]
videos, audios = await get_acg_video_playurl(session, avid, cid)
# TODO: 根据 Path Pattern 动态决定位置
download_list.append((videos, audios, filename))
else:
Logger.error("url 不正确~")
sys.exit(1)
for videos, audios, filename in download_list:
await download_video(
session,
videos,
audios,
args.dir,
repair_filename(filename),
{
"require_video": args.require_video,
"video_quality": args.video_quality,
"video_download_codec": args.vcodec.split(":")[0],
"video_save_codec": args.vcodec.split(":")[1],
"require_audio": args.require_audio,
"audio_quality": args.audio_quality,
"audio_download_codec": args.acodec.split(":")[0],
"audio_save_codec": args.acodec.split(":")[1],
"overwrite": args.overwrite,
"block_size": int(args.block_size * 1024 * 1024),
"num_workers": args.num_workers,
},
)

View File

@ -12,6 +12,7 @@ from yutto.utils.asynclib import install_uvloop
from yutto.utils.console.colorful import set_no_color
from yutto.utils.console.logger import Badge, Logger, set_logger_debug
from yutto.utils.ffmpeg import FFmpeg
from yutto.processor.filter import check_episodes
def check_basic_options(args: argparse.Namespace):
@ -20,7 +21,7 @@ def check_basic_options(args: argparse.Namespace):
ffmpeg = FFmpeg()
# 在使用 --no-color 或者环境变量 NO_COLOR 非空时都应该不显示颜色
# Also see: https://no-color.org/
# See also: https://no-color.org/
if args.no_color or os.environ.get("NO_COLOR"):
set_no_color()
@ -28,7 +29,7 @@ def check_basic_options(args: argparse.Namespace):
if args.debug:
set_logger_debug()
else:
# 为保证协程任务的可读性,仅在非 debug 模式启用 uvloop
# 为保证协程错误栈的可读性debug 模式不启用 uvloop
install_uvloop()
# vcodec 检查
@ -76,13 +77,36 @@ def check_basic_options(args: argparse.Namespace):
# TODO: proxy 检验
# 不下载视频无法嵌入字幕
if not args.require_video and args.embed_subtitle:
Logger.error("不下载视频时无法嵌入字幕")
sys.exit(1)
# 不下载视频无法嵌入弹幕
if not args.require_video and args.embed_danmaku:
Logger.error("不下载视频时无法嵌入弹幕")
sys.exit(1)
# 嵌入弹幕功能仅支持 ASS 弹幕
if args.embed_danmaku and args.danmaku != "ass":
Logger.error("嵌入弹幕功能仅支持 ASS 弹幕")
sys.exit(1)
# 大会员身份校验
if not args.sessdata:
Logger.warning("未提供 SESSDATA无法下载会员专属剧集")
Logger.warning("未提供 SESSDATA无法下载会员专剧集")
elif asyncio.run(check_is_vip(args.sessdata)):
Logger.custom("成功以大会员身份登录~", badge=Badge("大会员", fore="white", back="magenta"))
else:
Logger.warning("以非大会员身份登录,无法下载会员专属剧集")
Logger.warning("以非大会员身份登录,无法下载会员专享剧集")
def check_batch_options(args: argparse.Namespace):
""" 检查批量下载相关选项 """
# 检查 episodes 格式(简单的正则检查,后续过滤剧集时还有完整检查)
if not check_episodes(args.episodes):
Logger.error("选集参数({})格式不正确".format(args.episodes))
sys.exit(1)
async def check_is_vip(sessdata: str = "") -> bool:

View File

@ -29,19 +29,17 @@ def add_get_arguments(parser: argparse.ArgumentParser):
@sync
async def run(args: argparse.Namespace):
# args.sessdata = "0a7f9758%2C1629361847%2Ca86ac*21"
# # args.sessdata = ""
# sessdata = "0a7f9758%2C1629361847%2Ca86ac*21"
# # sessdata = "dfasdlfsa"
async with aiohttp.ClientSession(
headers=gen_headers(),
cookies=gen_cookies(args.sessdata),
cookie_jar=aiohttp.DummyCookieJar(),
timeout=aiohttp.ClientTimeout(total=5),
) as session:
if (match_obj := regexp_bangumi_ep.match(args.url)) or (match_obj := regexp_bangumi_ep_short.match(args.url)):
# 匹配为番剧
episode_id = EpisodeId(match_obj.group("episode_id"))
season_id = await get_season_id_by_episode_id(session, episode_id)
title = await get_bangumi_title(session, season_id)
Logger.custom(title, Badge("番剧", fore="black", back="cyan"))
bangumi_list = await get_bangumi_list(session, season_id)
for bangumi_item in bangumi_list:
if bangumi_item["episode_id"] == episode_id:
@ -53,14 +51,13 @@ async def run(args: argparse.Namespace):
Logger.error("在列表中未找到该剧集")
sys.exit(1)
videos, audios = await get_bangumi_playurl(session, avid, episode_id, cid)
title = await get_bangumi_title(session, season_id)
Logger.custom(title, Badge("番剧", fore="black", back="cyan"))
elif (
(match_obj := regexp_acg_video_av.match(args.url))
or (match_obj := regexp_acg_video_av_short.match(args.url))
or (match_obj := regexp_acg_video_bv.match(args.url))
or (match_obj := regexp_acg_video_bv_short.match(args.url))
):
# 匹配为投稿视频
page: int = 1
if "aid" in match_obj.groupdict().keys():
avid = AId(match_obj.group("aid"))
@ -68,12 +65,12 @@ async def run(args: argparse.Namespace):
avid = BvId(match_obj.group("bvid"))
if match_obj.group("page") is not None:
page = int(match_obj.group("page"))
title = await get_acg_video_title(session, avid)
Logger.custom(title, Badge("投稿视频", fore="black", back="cyan"))
acg_video_list = await get_acg_video_list(session, avid)
cid = acg_video_list[page - 1]["cid"]
filename = acg_video_list[page - 1]["name"]
videos, audios = await get_acg_video_playurl(session, avid, cid)
title = await get_acg_video_title(session, avid)
Logger.custom(title, Badge("投稿视频", fore="black", back="cyan"))
else:
Logger.error("url 不正确~")
sys.exit(1)

View File

@ -42,6 +42,20 @@ def slice(start: int, total_size: Optional[int], block_size: Optional[int] = Non
def combine(*l_list: list[Any]) -> list[Any]:
"""将多个 list 「均匀」地合并到一个 list
# example
```
l_list = [
[1, 2, 3, 4, 5],
[6, 7, 8],
[9, 10, 11, 12]
]
combine(l_list)
# [1, 6, 9, 2, 7, 10, 3, 8, 11, 4, 12, 5]
```
"""
results: list[Any] = []
for i in range(max([len(l) for l in l_list])):
for l in l_list:
@ -154,7 +168,6 @@ async def download_video(
args.extend(["-y"])
args.append(output_path)
Logger.debug("FFmpeg > ffmpeg {}".format(" ".join(args)))
ffmpeg.exec(args)
# fmt: on
Logger.info("合并完成!")

View File

@ -1,8 +1,11 @@
import re
import sys
from typing import Optional, TypeVar
from yutto.api.acg_video import AudioUrlMeta, VideoUrlMeta
from yutto.media.codec import AudioCodec, VideoCodec, gen_acodec_priority, gen_vcodec_priority
from yutto.media.quality import AudioQuality, VideoQuality, gen_audio_quality_priority, gen_video_quality_priority
from yutto.utils.console.logger import Logger
def select_video(
@ -70,3 +73,58 @@ def filter_none_value(l: list[Optional[T]]) -> list[T]:
return result
# ? 不清楚直接这么写为什么类型不匹配
# return list(filter(lambda x: x is not None, l))
def check_episodes(episodes_str: str) -> bool:
return bool(re.match(r"([\-\d\^\$]+(~[\-\d\^\$]+)?)(,[\-\d\^\$]+(~[\-\d\^\$]+)?)*", episodes_str))
def parse_episodes(episodes_str: str, total: int) -> list[int]:
""" 将选集字符串转为列表(标号从 1 开始) """
def reslove_negetive(value: int) -> int:
if value == 0:
Logger.error("不可使用 0 作为剧集号(剧集号从 1 开始计算)")
sys.exit(1)
return value if value > 0 else value + total + 1
# 解析字符串为列表
Logger.info("{}".format(total))
if check_episodes(episodes_str):
episodes_str = episodes_str.replace("^", "1")
episodes_str = episodes_str.replace("$", "-1")
episode_list: list[int] = []
for episode_item in episodes_str.split(","):
if "~" in episode_item:
start, end = episode_item.split("~")
start, end = int(start), int(end)
start, end = reslove_negetive(start), reslove_negetive(end)
if not (end >= start):
Logger.error("终点值({})应不小于起点值({}".format(end, start))
sys.exit(1)
episode_list.extend(list(range(start, end + 1)))
else:
episode_item = int(episode_item)
episode_item = reslove_negetive(episode_item)
episode_list.append(episode_item)
else:
episode_list = []
episode_list = sorted(list(set(episode_list)))
# 筛选满足条件的剧集
out_of_range: list[int] = []
episodes: list[int] = []
for episode in episode_list:
if episode in range(1, total + 1):
if episode not in episodes:
episodes.append(episode)
else:
out_of_range.append(episode)
if out_of_range:
Logger.warning("剧集 {} 不存在".format(",".join(list(map(str, out_of_range)))))
Logger.info("已选择第 {}".format(",".join(list(map(str, episodes)))))
if not episodes:
Logger.warning("没有选中任何剧集")
return episodes

View File

View File

@ -3,6 +3,7 @@ import time
from yutto.utils.console.formatter import size_format
from yutto.utils.file_buffer import AsyncFileBuffer
from yutto.utils.console.logger import Logger
async def show_progress(file_buffers: list[AsyncFileBuffer], total_size: int):
@ -19,14 +20,15 @@ async def show_progress(file_buffers: list[AsyncFileBuffer], total_size: int):
size_now = size_written + size_in_buffer
speed = (size_now - size) / (t_now - t + 10 ** -6)
print(
"{} {}({} 块) {} {}/s".format(
Logger.print(
"[File: {:>10} + Buffer: {:>10}({:>4} 块)]/{:>10} {:>10}/s".format(
size_format(size_written),
size_format(size_in_buffer),
sum([len(file_buffer.buffer) for file_buffer in file_buffers]),
size_format(total_size),
size_format(speed),
),
end="\r",
)
t, size = t_now, size_now
await asyncio.sleep(0.5)

View File

@ -41,7 +41,7 @@ class Badge:
WARNING_BADGE = Badge("WARN", fore="black", back="yellow")
ERROR_BADGE = Badge("ERROR", fore="white", back="red")
INFO_BADGE = Badge("INFO", fore="black", back="green")
DEBUG_BADGE = Badge("ERROR", fore="black", back="blue")
DEBUG_BADGE = Badge("DEBUG", fore="black", back="blue")
class Logger(metaclass=Singleton):

View File

@ -5,6 +5,7 @@ import subprocess
from functools import cached_property
from yutto.utils.functiontools.singleton import Singleton
from yutto.utils.console.logger import Logger
class FFmpegNotFoundError(Exception):
@ -25,6 +26,7 @@ class FFmpeg(object, metaclass=Singleton):
def exec(self, args: list[str]):
cmd = [self.path]
cmd.extend(args)
Logger.debug(" ".join(cmd))
return subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@cached_property