feat: add chapter info support (#274)

Co-authored-by: SigureMo <sigure.qaq@gmail.com>
This commit is contained in:
ZenTeaCC 2024-06-16 15:35:47 +08:00 committed by GitHub
parent fd3a4f60a7
commit 0164d53cd6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 140 additions and 13 deletions

1
.gitignore vendored
View File

@ -131,6 +131,7 @@ dmypy.json
*.srt
*.nfo
*.jpg
*.ini
# test files
*.test.py

View File

@ -407,6 +407,13 @@ cat ~/.yutto_alias | yutto tensura-nikki --batch --alias-file -
- 参数 `--no-danmaku`
- 默认值 `False`
#### 不生成章节信息
- 参数 `--no-chapter-info`
- 默认值 `False`
不生成章节信息,包含 MetaData 和嵌入视频流的章节信息。
#### 仅生成弹幕文件
- 参数 `--danmaku-only`

View File

@ -60,6 +60,7 @@ clean:
find . -name "*.pb" -print0 | xargs -0 rm -f
find . -name "*.pyc" -print0 | xargs -0 rm -f
find . -name "*.jpg" -print0 | xargs -0 rm -f
find . -name "*.ini" -print0 | xargs -0 rm -f
rm -rf .pytest_cache/
rm -rf .mypy_cache/
find . -maxdepth 3 -type d -empty -print0 | xargs -0 -r rm -r

View File

@ -46,8 +46,16 @@ from yutto.validator import (
validate_user_info,
)
DownloadResourceType: TypeAlias = Literal["video", "audio", "subtitle", "metadata", "danmaku", "cover"]
DOWNLOAD_RESOURCE_TYPES: list[DownloadResourceType] = ["video", "audio", "subtitle", "metadata", "danmaku", "cover"]
DownloadResourceType: TypeAlias = Literal["video", "audio", "subtitle", "metadata", "danmaku", "cover", "chapter_info"]
DOWNLOAD_RESOURCE_TYPES: list[DownloadResourceType] = [
"video",
"audio",
"subtitle",
"metadata",
"danmaku",
"cover",
"chapter_info",
]
def main():
@ -191,6 +199,13 @@ def cli() -> argparse.ArgumentParser:
help="不生成封面",
)
group_common.add_argument(
"--no-chapter-info",
dest="require_chapter_info",
action=create_select_required_action(deselect=["chapter_info"]),
help="不封装章节信息",
)
group_common.set_defaults(
require_video=True,
require_audio=True,
@ -198,6 +213,7 @@ def cli() -> argparse.ArgumentParser:
require_metadata=False,
require_danmaku=True,
require_cover=True,
require_chapter_info=True,
)
group_common.add_argument("--no-color", action="store_true", help="不使用颜色")
group_common.add_argument("--no-progress", action="store_true", help="不显示进度条")
@ -260,7 +276,6 @@ async def run(args_list: list[argparse.Namespace]):
CheeseExtractor(), # 课程单集
]
)
url: str = args.url
# 将 shortcut 转为完整 url
for extractor in extractors:
@ -331,12 +346,15 @@ async def run(args_list: list[argparse.Namespace]):
episode_data,
{
"require_video": args.require_video,
"require_chapter_info": args.require_chapter_info,
"video_quality": args.video_quality,
"video_download_codec": args.vcodec.split(":")[0],
"video_save_codec": args.vcodec.split(":")[1],
"video_download_codec_priority": args.download_vcodec_priority.split(",")
if args.download_vcodec_priority != "auto"
else None,
"video_download_codec_priority": (
args.download_vcodec_priority.split(",")
if args.download_vcodec_priority != "auto"
else None
),
"require_audio": args.require_audio,
"audio_quality": args.audio_quality,
"audio_download_codec": args.acodec.split(":")[0],

View File

@ -5,7 +5,7 @@ from typing import NamedTuple, TypedDict
from yutto.bilibili_typing.codec import AudioCodec, VideoCodec
from yutto.bilibili_typing.quality import AudioQuality, VideoQuality
from yutto.utils.danmaku import DanmakuData
from yutto.utils.metadata import MetaData
from yutto.utils.metadata import ChapterInfoData, MetaData
from yutto.utils.subtitle import SubtitleData
@ -172,6 +172,7 @@ class EpisodeData(TypedDict):
metadata: MetaData | None
danmaku: DanmakuData
cover_data: bytes | None
chapter_info_data: list[ChapterInfoData]
output_dir: str
tmp_dir: str
filename: str
@ -179,6 +180,7 @@ class EpisodeData(TypedDict):
class DownloaderOptions(TypedDict):
require_video: bool
require_chapter_info: bool
video_quality: VideoQuality
video_download_codec: VideoCodec
video_save_codec: str

View File

@ -184,4 +184,5 @@ def _parse_bangumi_metadata(item: dict[str, Any]) -> MetaData:
tag=[], # TODO
website="", # TODO
original_filename="", # TODO
chapter_info_data=[], # There are no chapter info in bangumi for now
)

View File

@ -153,4 +153,5 @@ def _parse_cheese_metadata(item: dict[str, Any]) -> MetaData:
tag=[], # TODO
website="", # TODO
original_filename="", # TODO
chapter_info_data=[], # There are no chapter info in cheese for now
)

View File

@ -25,7 +25,8 @@ from yutto.exceptions import (
)
from yutto.utils.console.logger import Logger
from yutto.utils.fetcher import Fetcher
from yutto.utils.metadata import Actor, MetaData
from yutto.utils.funcutils.data_access import data_has_chained_keys
from yutto.utils.metadata import Actor, ChapterInfoData, MetaData
from yutto.utils.time import get_time_stamp_by_now
@ -259,6 +260,23 @@ async def get_ugc_video_subtitles(client: AsyncClient, avid: AvId, cid: CId) ->
return []
async def get_ugc_video_chapters(client: AsyncClient, avid: AvId, cid: CId) -> list[ChapterInfoData]:
chapter_api = "https://api.bilibili.com/x/player/v2?avid={aid}&bvid={bvid}&cid={cid}"
chapter_url = chapter_api.format(**avid.to_dict(), cid=cid)
chapter_json_info = await Fetcher.fetch_json(client, chapter_url)
if chapter_json_info is None:
return []
if not data_has_chained_keys(chapter_json_info, ["data", "view_points"]):
Logger.warning(f"无法获取该视频的章节信息({format_ids(avid, cid)}),原因:{chapter_json_info.get('message')}")
return []
raw_chapter_info = chapter_json_info["data"]["view_points"]
return [
{"content": chapter_info["content"], "start": chapter_info["from"], "end": chapter_info["to"]}
for chapter_info in raw_chapter_info
]
def _parse_ugc_video_metadata(
video_info: _UgcVideoInfo,
page_info: _UgcVideoPageInfo,
@ -277,6 +295,7 @@ def _parse_ugc_video_metadata(
source="", # TODO
original_filename="", # TODO
website=video_info["bvid"].to_url(),
chapter_info_data=[],
)

View File

@ -15,6 +15,7 @@ from yutto.api.cheese import CheeseListItem, get_cheese_playurl, get_cheese_subt
from yutto.api.danmaku import get_danmaku
from yutto.api.ugc_video import (
UgcVideoListItem,
get_ugc_video_chapters,
get_ugc_video_playurl,
get_ugc_video_subtitles,
)
@ -32,6 +33,7 @@ from yutto.processor.path_resolver import (
from yutto.utils.console.logger import Logger
from yutto.utils.danmaku import EmptyDanmakuData
from yutto.utils.fetcher import Fetcher
from yutto.utils.metadata import attach_chapter_info
async def extract_bangumi_data(
@ -76,6 +78,7 @@ async def extract_bangumi_data(
metadata=metadata,
danmaku=danmaku,
cover_data=cover_data,
chapter_info_data=[],
output_dir=output_dir,
tmp_dir=args.tmp_dir or output_dir,
filename=filename,
@ -128,6 +131,7 @@ async def extract_cheese_data(
metadata=metadata,
danmaku=danmaku,
cover_data=cover_data,
chapter_info_data=[],
output_dir=output_dir,
tmp_dir=args.tmp_dir or output_dir,
filename=filename,
@ -153,8 +157,11 @@ async def extract_ugc_video_data(
await get_ugc_video_playurl(client, avid, cid) if args.require_video or args.require_audio else ([], [])
)
subtitles = await get_ugc_video_subtitles(client, avid, cid) if args.require_subtitle else []
chapter_info_data = await get_ugc_video_chapters(client, avid, cid) if args.require_chapter_info else []
danmaku = await get_danmaku(client, cid, args.danmaku_format) if args.require_danmaku else EmptyDanmakuData
metadata = ugc_video_info["metadata"] if args.require_metadata else None
if metadata and chapter_info_data:
attach_chapter_info(metadata, chapter_info_data)
cover_data = (
await Fetcher.fetch_bin(client, ugc_video_info["metadata"]["thumb"]) if args.require_cover else None
)
@ -184,6 +191,7 @@ async def extract_ugc_video_data(
metadata=metadata,
danmaku=danmaku,
cover_data=cover_data,
chapter_info_data=chapter_info_data,
output_dir=output_dir,
tmp_dir=args.tmp_dir or output_dir,
filename=filename,

View File

@ -21,7 +21,7 @@ from yutto.utils.fetcher import Fetcher
from yutto.utils.ffmpeg import FFmpeg, FFmpegCommandBuilder
from yutto.utils.file_buffer import AsyncFileBuffer
from yutto.utils.funcutils import filter_none_value, xmerge
from yutto.utils.metadata import write_metadata
from yutto.utils.metadata import ChapterInfoData, write_chapter_info, write_metadata
from yutto.utils.subtitle import write_subtitle
@ -176,6 +176,8 @@ def merge_video_and_audio(
audio_path: Path,
cover_data: bytes | None,
cover_path: Path,
chapter_info_data: list[ChapterInfoData],
chapter_info_path: Path,
output_path: Path,
options: DownloaderOptions,
):
@ -215,6 +217,10 @@ def merge_video_and_audio(
output.use(cover_input)
output.set_cover(cover_input)
if video is not None and chapter_info_data:
metadata_input = command_builder.add_metadata_input(chapter_info_path)
output.use(metadata_input)
# see also: https://www.reddit.com/r/ffmpeg/comments/qe7oq1/comment/hi0bmic/?utm_source=share&utm_medium=web2x&context=3
output.with_extra_options(["-strict", "unofficial"])
@ -237,6 +243,8 @@ def merge_video_and_audio(
audio_path.unlink()
if cover_data is not None:
cover_path.unlink()
if chapter_info_data:
chapter_info_path.unlink()
class DownloadState(Enum):
@ -257,6 +265,7 @@ async def start_downloader(
danmaku = episode_data["danmaku"]
metadata = episode_data["metadata"]
cover_data = episode_data["cover_data"]
chapter_info_data = episode_data["chapter_info_data"]
output_dir = Path(episode_data["output_dir"])
tmp_dir = Path(episode_data["tmp_dir"])
filename = episode_data["filename"]
@ -269,6 +278,7 @@ async def start_downloader(
video_path = tmp_dir.joinpath(filename + "_video.m4s")
audio_path = tmp_dir.joinpath(filename + "_audio.m4s")
cover_path = tmp_dir.joinpath(filename + "_cover.jpg")
chapter_info_path = tmp_dir.joinpath(filename + "_chapter_info.ini")
video = select_video(
videos, options["video_quality"], options["video_download_codec"], options["video_download_codec_priority"]
@ -345,6 +355,11 @@ async def start_downloader(
video = video if will_download_video else None
audio = audio if will_download_audio else None
# 保存章节信息
if chapter_info_data:
write_chapter_info(filename, chapter_info_data, chapter_info_path)
# 保存封面
if cover_data is not None:
cover_path.write_bytes(cover_data)
@ -352,5 +367,16 @@ async def start_downloader(
await download_video_and_audio(client, video, video_path, audio, audio_path, options)
# 合并视频 / 音频
merge_video_and_audio(video, video_path, audio, audio_path, cover_data, cover_path, output_path, options)
merge_video_and_audio(
video,
video_path,
audio,
audio_path,
cover_data,
cover_path,
chapter_info_data,
chapter_info_path,
output_path,
options,
)
return DownloadState.DONE

View File

@ -78,7 +78,10 @@ class FFmpegInput:
self.input_id = input_id
self.stream_id = stream_id
def build(self) -> list[str]:
def build_select_command(self) -> list[str]:
return ["-map", str(self.input_id)]
def build_input_command(self) -> list[str]:
return ["-i", str(self.path)]
def __repr__(self):
@ -91,6 +94,11 @@ class FFmpegVideoInput(FFmpegInput): ...
class FFmpegAudioInput(FFmpegInput): ...
class FFmpegMetadataInput(FFmpegInput):
def build_select_command(self) -> list[str]:
return ["-map_metadata", str(self.input_id)]
class FFmpegOutput:
def __init__(self, path: Path | str):
self.path = path
@ -98,6 +106,7 @@ class FFmpegOutput:
self.vcodec: str | None = None
self.acodec: str | None = None
self.cover_input: FFmpegVideoInput | None = None
self.metadata_input: FFmpegMetadataInput | None = None
self.extra_commands: list[str] = []
def use(self, input: FFmpegInput):
@ -116,12 +125,16 @@ class FFmpegOutput:
self.cover_input = cover
return self
def set_metadata(self, metadata: FFmpegMetadataInput):
self.metadata_input = metadata
return self
def with_extra_options(self, command: list[str]):
self.extra_commands.extend(command)
return self
def build(self) -> list[str]:
selected_inputs = concat_commands([["-map", str(input.input_id)] for input in self.used_inputs])
selected_inputs = concat_commands([input.build_select_command() for input in self.used_inputs])
vcodec = ["-vcodec", self.vcodec] if self.vcodec else []
acodec = ["-acodec", self.acodec] if self.acodec else []
# Refer to `-disposition` option in https://www.ffmpeg.org/ffmpeg.html#toc-Main-options
@ -166,6 +179,12 @@ class FFmpegCommandBuilder:
self.inputs.append(input)
return input
def add_metadata_input(self, path: Path | str):
input = FFmpegMetadataInput(path, self.num_inputs, 0)
self.num_inputs += 1
self.inputs.append(input)
return input
def with_extra_options(self, command: list[str]):
self.extra_commands.extend(command)
return self
@ -176,7 +195,7 @@ class FFmpegCommandBuilder:
return output
def build(self):
input_commands = concat_commands([input.build() for input in self.inputs])
input_commands = concat_commands([input.build_input_command() for input in self.inputs])
output_commands = concat_commands([output.build() for output in self.outputs])
return input_commands + self.extra_commands + output_commands

View File

@ -16,6 +16,12 @@ class Actor(TypedDict):
order: int
class ChapterInfoData(TypedDict):
start: int
end: int
content: str
class MetaData(TypedDict):
title: str
show_title: str
@ -29,6 +35,7 @@ class MetaData(TypedDict):
source: str
original_filename: str
website: str
chapter_info_data: list[ChapterInfoData]
def metadata_value_format(metadata: MetaData, metadata_format: dict[str, str]) -> dict[str, Any]:
@ -49,3 +56,20 @@ def write_metadata(metadata: MetaData, video_path: Path, metadata_format: dict[s
xml_content = dict2xml(user_formatted_metadata, wrap=custom_root, indent=" ") # type: ignore
with metadata_path.open("w", encoding="utf-8") as f: # type: ignore
f.write(xml_content) # type: ignore
def attach_chapter_info(metadata: MetaData, chapter_info_data: list[ChapterInfoData]):
metadata["chapter_info_data"] = chapter_info_data
# https://wklchris.github.io/blog/FFmpeg/FFmpeg.html#id26
def write_chapter_info(title: str, chapter_info_data: list[ChapterInfoData], chapter_path: Path):
with chapter_path.open("w", encoding="utf-8") as f:
f.write(";FFMETADATA1\n")
f.write(f"title={title}\n")
for chapter in chapter_info_data:
f.write("[CHAPTER]\n")
f.write("TIMEBASE=1/1\n")
f.write(f"START={chapter['start']}\n")
f.write(f"END={chapter['end']}\n")
f.write(f"title={chapter['content']}\n")