feat: add chapter info support (#274)

Co-authored-by: SigureMo <sigure.qaq@gmail.com>
This commit is contained in:
ZenTeaCC 2024-06-16 15:35:47 +08:00 committed by GitHub
parent fd3a4f60a7
commit 0164d53cd6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 140 additions and 13 deletions

1
.gitignore vendored
View File

@ -131,6 +131,7 @@ dmypy.json
*.srt *.srt
*.nfo *.nfo
*.jpg *.jpg
*.ini
# test files # test files
*.test.py *.test.py

View File

@ -407,6 +407,13 @@ cat ~/.yutto_alias | yutto tensura-nikki --batch --alias-file -
- 参数 `--no-danmaku` - 参数 `--no-danmaku`
- 默认值 `False` - 默认值 `False`
#### 不生成章节信息
- 参数 `--no-chapter-info`
- 默认值 `False`
不生成章节信息,包含 MetaData 和嵌入视频流的章节信息。
#### 仅生成弹幕文件 #### 仅生成弹幕文件
- 参数 `--danmaku-only` - 参数 `--danmaku-only`

View File

@ -60,6 +60,7 @@ clean:
find . -name "*.pb" -print0 | xargs -0 rm -f find . -name "*.pb" -print0 | xargs -0 rm -f
find . -name "*.pyc" -print0 | xargs -0 rm -f find . -name "*.pyc" -print0 | xargs -0 rm -f
find . -name "*.jpg" -print0 | xargs -0 rm -f find . -name "*.jpg" -print0 | xargs -0 rm -f
find . -name "*.ini" -print0 | xargs -0 rm -f
rm -rf .pytest_cache/ rm -rf .pytest_cache/
rm -rf .mypy_cache/ rm -rf .mypy_cache/
find . -maxdepth 3 -type d -empty -print0 | xargs -0 -r rm -r find . -maxdepth 3 -type d -empty -print0 | xargs -0 -r rm -r

View File

@ -46,8 +46,16 @@ from yutto.validator import (
validate_user_info, validate_user_info,
) )
DownloadResourceType: TypeAlias = Literal["video", "audio", "subtitle", "metadata", "danmaku", "cover"] DownloadResourceType: TypeAlias = Literal["video", "audio", "subtitle", "metadata", "danmaku", "cover", "chapter_info"]
DOWNLOAD_RESOURCE_TYPES: list[DownloadResourceType] = ["video", "audio", "subtitle", "metadata", "danmaku", "cover"] DOWNLOAD_RESOURCE_TYPES: list[DownloadResourceType] = [
"video",
"audio",
"subtitle",
"metadata",
"danmaku",
"cover",
"chapter_info",
]
def main(): def main():
@ -191,6 +199,13 @@ def cli() -> argparse.ArgumentParser:
help="不生成封面", help="不生成封面",
) )
group_common.add_argument(
"--no-chapter-info",
dest="require_chapter_info",
action=create_select_required_action(deselect=["chapter_info"]),
help="不封装章节信息",
)
group_common.set_defaults( group_common.set_defaults(
require_video=True, require_video=True,
require_audio=True, require_audio=True,
@ -198,6 +213,7 @@ def cli() -> argparse.ArgumentParser:
require_metadata=False, require_metadata=False,
require_danmaku=True, require_danmaku=True,
require_cover=True, require_cover=True,
require_chapter_info=True,
) )
group_common.add_argument("--no-color", action="store_true", help="不使用颜色") group_common.add_argument("--no-color", action="store_true", help="不使用颜色")
group_common.add_argument("--no-progress", action="store_true", help="不显示进度条") group_common.add_argument("--no-progress", action="store_true", help="不显示进度条")
@ -260,7 +276,6 @@ async def run(args_list: list[argparse.Namespace]):
CheeseExtractor(), # 课程单集 CheeseExtractor(), # 课程单集
] ]
) )
url: str = args.url url: str = args.url
# 将 shortcut 转为完整 url # 将 shortcut 转为完整 url
for extractor in extractors: for extractor in extractors:
@ -331,12 +346,15 @@ async def run(args_list: list[argparse.Namespace]):
episode_data, episode_data,
{ {
"require_video": args.require_video, "require_video": args.require_video,
"require_chapter_info": args.require_chapter_info,
"video_quality": args.video_quality, "video_quality": args.video_quality,
"video_download_codec": args.vcodec.split(":")[0], "video_download_codec": args.vcodec.split(":")[0],
"video_save_codec": args.vcodec.split(":")[1], "video_save_codec": args.vcodec.split(":")[1],
"video_download_codec_priority": args.download_vcodec_priority.split(",") "video_download_codec_priority": (
args.download_vcodec_priority.split(",")
if args.download_vcodec_priority != "auto" if args.download_vcodec_priority != "auto"
else None, else None
),
"require_audio": args.require_audio, "require_audio": args.require_audio,
"audio_quality": args.audio_quality, "audio_quality": args.audio_quality,
"audio_download_codec": args.acodec.split(":")[0], "audio_download_codec": args.acodec.split(":")[0],

View File

@ -5,7 +5,7 @@ from typing import NamedTuple, TypedDict
from yutto.bilibili_typing.codec import AudioCodec, VideoCodec from yutto.bilibili_typing.codec import AudioCodec, VideoCodec
from yutto.bilibili_typing.quality import AudioQuality, VideoQuality from yutto.bilibili_typing.quality import AudioQuality, VideoQuality
from yutto.utils.danmaku import DanmakuData from yutto.utils.danmaku import DanmakuData
from yutto.utils.metadata import MetaData from yutto.utils.metadata import ChapterInfoData, MetaData
from yutto.utils.subtitle import SubtitleData from yutto.utils.subtitle import SubtitleData
@ -172,6 +172,7 @@ class EpisodeData(TypedDict):
metadata: MetaData | None metadata: MetaData | None
danmaku: DanmakuData danmaku: DanmakuData
cover_data: bytes | None cover_data: bytes | None
chapter_info_data: list[ChapterInfoData]
output_dir: str output_dir: str
tmp_dir: str tmp_dir: str
filename: str filename: str
@ -179,6 +180,7 @@ class EpisodeData(TypedDict):
class DownloaderOptions(TypedDict): class DownloaderOptions(TypedDict):
require_video: bool require_video: bool
require_chapter_info: bool
video_quality: VideoQuality video_quality: VideoQuality
video_download_codec: VideoCodec video_download_codec: VideoCodec
video_save_codec: str video_save_codec: str

View File

@ -184,4 +184,5 @@ def _parse_bangumi_metadata(item: dict[str, Any]) -> MetaData:
tag=[], # TODO tag=[], # TODO
website="", # TODO website="", # TODO
original_filename="", # TODO original_filename="", # TODO
chapter_info_data=[], # There are no chapter info in bangumi for now
) )

View File

@ -153,4 +153,5 @@ def _parse_cheese_metadata(item: dict[str, Any]) -> MetaData:
tag=[], # TODO tag=[], # TODO
website="", # TODO website="", # TODO
original_filename="", # TODO original_filename="", # TODO
chapter_info_data=[], # There are no chapter info in cheese for now
) )

View File

@ -25,7 +25,8 @@ from yutto.exceptions import (
) )
from yutto.utils.console.logger import Logger from yutto.utils.console.logger import Logger
from yutto.utils.fetcher import Fetcher from yutto.utils.fetcher import Fetcher
from yutto.utils.metadata import Actor, MetaData from yutto.utils.funcutils.data_access import data_has_chained_keys
from yutto.utils.metadata import Actor, ChapterInfoData, MetaData
from yutto.utils.time import get_time_stamp_by_now from yutto.utils.time import get_time_stamp_by_now
@ -259,6 +260,23 @@ async def get_ugc_video_subtitles(client: AsyncClient, avid: AvId, cid: CId) ->
return [] return []
async def get_ugc_video_chapters(client: AsyncClient, avid: AvId, cid: CId) -> list[ChapterInfoData]:
chapter_api = "https://api.bilibili.com/x/player/v2?avid={aid}&bvid={bvid}&cid={cid}"
chapter_url = chapter_api.format(**avid.to_dict(), cid=cid)
chapter_json_info = await Fetcher.fetch_json(client, chapter_url)
if chapter_json_info is None:
return []
if not data_has_chained_keys(chapter_json_info, ["data", "view_points"]):
Logger.warning(f"无法获取该视频的章节信息({format_ids(avid, cid)}),原因:{chapter_json_info.get('message')}")
return []
raw_chapter_info = chapter_json_info["data"]["view_points"]
return [
{"content": chapter_info["content"], "start": chapter_info["from"], "end": chapter_info["to"]}
for chapter_info in raw_chapter_info
]
def _parse_ugc_video_metadata( def _parse_ugc_video_metadata(
video_info: _UgcVideoInfo, video_info: _UgcVideoInfo,
page_info: _UgcVideoPageInfo, page_info: _UgcVideoPageInfo,
@ -277,6 +295,7 @@ def _parse_ugc_video_metadata(
source="", # TODO source="", # TODO
original_filename="", # TODO original_filename="", # TODO
website=video_info["bvid"].to_url(), website=video_info["bvid"].to_url(),
chapter_info_data=[],
) )

View File

@ -15,6 +15,7 @@ from yutto.api.cheese import CheeseListItem, get_cheese_playurl, get_cheese_subt
from yutto.api.danmaku import get_danmaku from yutto.api.danmaku import get_danmaku
from yutto.api.ugc_video import ( from yutto.api.ugc_video import (
UgcVideoListItem, UgcVideoListItem,
get_ugc_video_chapters,
get_ugc_video_playurl, get_ugc_video_playurl,
get_ugc_video_subtitles, get_ugc_video_subtitles,
) )
@ -32,6 +33,7 @@ from yutto.processor.path_resolver import (
from yutto.utils.console.logger import Logger from yutto.utils.console.logger import Logger
from yutto.utils.danmaku import EmptyDanmakuData from yutto.utils.danmaku import EmptyDanmakuData
from yutto.utils.fetcher import Fetcher from yutto.utils.fetcher import Fetcher
from yutto.utils.metadata import attach_chapter_info
async def extract_bangumi_data( async def extract_bangumi_data(
@ -76,6 +78,7 @@ async def extract_bangumi_data(
metadata=metadata, metadata=metadata,
danmaku=danmaku, danmaku=danmaku,
cover_data=cover_data, cover_data=cover_data,
chapter_info_data=[],
output_dir=output_dir, output_dir=output_dir,
tmp_dir=args.tmp_dir or output_dir, tmp_dir=args.tmp_dir or output_dir,
filename=filename, filename=filename,
@ -128,6 +131,7 @@ async def extract_cheese_data(
metadata=metadata, metadata=metadata,
danmaku=danmaku, danmaku=danmaku,
cover_data=cover_data, cover_data=cover_data,
chapter_info_data=[],
output_dir=output_dir, output_dir=output_dir,
tmp_dir=args.tmp_dir or output_dir, tmp_dir=args.tmp_dir or output_dir,
filename=filename, filename=filename,
@ -153,8 +157,11 @@ async def extract_ugc_video_data(
await get_ugc_video_playurl(client, avid, cid) if args.require_video or args.require_audio else ([], []) await get_ugc_video_playurl(client, avid, cid) if args.require_video or args.require_audio else ([], [])
) )
subtitles = await get_ugc_video_subtitles(client, avid, cid) if args.require_subtitle else [] subtitles = await get_ugc_video_subtitles(client, avid, cid) if args.require_subtitle else []
chapter_info_data = await get_ugc_video_chapters(client, avid, cid) if args.require_chapter_info else []
danmaku = await get_danmaku(client, cid, args.danmaku_format) if args.require_danmaku else EmptyDanmakuData danmaku = await get_danmaku(client, cid, args.danmaku_format) if args.require_danmaku else EmptyDanmakuData
metadata = ugc_video_info["metadata"] if args.require_metadata else None metadata = ugc_video_info["metadata"] if args.require_metadata else None
if metadata and chapter_info_data:
attach_chapter_info(metadata, chapter_info_data)
cover_data = ( cover_data = (
await Fetcher.fetch_bin(client, ugc_video_info["metadata"]["thumb"]) if args.require_cover else None await Fetcher.fetch_bin(client, ugc_video_info["metadata"]["thumb"]) if args.require_cover else None
) )
@ -184,6 +191,7 @@ async def extract_ugc_video_data(
metadata=metadata, metadata=metadata,
danmaku=danmaku, danmaku=danmaku,
cover_data=cover_data, cover_data=cover_data,
chapter_info_data=chapter_info_data,
output_dir=output_dir, output_dir=output_dir,
tmp_dir=args.tmp_dir or output_dir, tmp_dir=args.tmp_dir or output_dir,
filename=filename, filename=filename,

View File

@ -21,7 +21,7 @@ from yutto.utils.fetcher import Fetcher
from yutto.utils.ffmpeg import FFmpeg, FFmpegCommandBuilder from yutto.utils.ffmpeg import FFmpeg, FFmpegCommandBuilder
from yutto.utils.file_buffer import AsyncFileBuffer from yutto.utils.file_buffer import AsyncFileBuffer
from yutto.utils.funcutils import filter_none_value, xmerge from yutto.utils.funcutils import filter_none_value, xmerge
from yutto.utils.metadata import write_metadata from yutto.utils.metadata import ChapterInfoData, write_chapter_info, write_metadata
from yutto.utils.subtitle import write_subtitle from yutto.utils.subtitle import write_subtitle
@ -176,6 +176,8 @@ def merge_video_and_audio(
audio_path: Path, audio_path: Path,
cover_data: bytes | None, cover_data: bytes | None,
cover_path: Path, cover_path: Path,
chapter_info_data: list[ChapterInfoData],
chapter_info_path: Path,
output_path: Path, output_path: Path,
options: DownloaderOptions, options: DownloaderOptions,
): ):
@ -215,6 +217,10 @@ def merge_video_and_audio(
output.use(cover_input) output.use(cover_input)
output.set_cover(cover_input) output.set_cover(cover_input)
if video is not None and chapter_info_data:
metadata_input = command_builder.add_metadata_input(chapter_info_path)
output.use(metadata_input)
# see also: https://www.reddit.com/r/ffmpeg/comments/qe7oq1/comment/hi0bmic/?utm_source=share&utm_medium=web2x&context=3 # see also: https://www.reddit.com/r/ffmpeg/comments/qe7oq1/comment/hi0bmic/?utm_source=share&utm_medium=web2x&context=3
output.with_extra_options(["-strict", "unofficial"]) output.with_extra_options(["-strict", "unofficial"])
@ -237,6 +243,8 @@ def merge_video_and_audio(
audio_path.unlink() audio_path.unlink()
if cover_data is not None: if cover_data is not None:
cover_path.unlink() cover_path.unlink()
if chapter_info_data:
chapter_info_path.unlink()
class DownloadState(Enum): class DownloadState(Enum):
@ -257,6 +265,7 @@ async def start_downloader(
danmaku = episode_data["danmaku"] danmaku = episode_data["danmaku"]
metadata = episode_data["metadata"] metadata = episode_data["metadata"]
cover_data = episode_data["cover_data"] cover_data = episode_data["cover_data"]
chapter_info_data = episode_data["chapter_info_data"]
output_dir = Path(episode_data["output_dir"]) output_dir = Path(episode_data["output_dir"])
tmp_dir = Path(episode_data["tmp_dir"]) tmp_dir = Path(episode_data["tmp_dir"])
filename = episode_data["filename"] filename = episode_data["filename"]
@ -269,6 +278,7 @@ async def start_downloader(
video_path = tmp_dir.joinpath(filename + "_video.m4s") video_path = tmp_dir.joinpath(filename + "_video.m4s")
audio_path = tmp_dir.joinpath(filename + "_audio.m4s") audio_path = tmp_dir.joinpath(filename + "_audio.m4s")
cover_path = tmp_dir.joinpath(filename + "_cover.jpg") cover_path = tmp_dir.joinpath(filename + "_cover.jpg")
chapter_info_path = tmp_dir.joinpath(filename + "_chapter_info.ini")
video = select_video( video = select_video(
videos, options["video_quality"], options["video_download_codec"], options["video_download_codec_priority"] videos, options["video_quality"], options["video_download_codec"], options["video_download_codec_priority"]
@ -345,6 +355,11 @@ async def start_downloader(
video = video if will_download_video else None video = video if will_download_video else None
audio = audio if will_download_audio else None audio = audio if will_download_audio else None
# 保存章节信息
if chapter_info_data:
write_chapter_info(filename, chapter_info_data, chapter_info_path)
# 保存封面
if cover_data is not None: if cover_data is not None:
cover_path.write_bytes(cover_data) cover_path.write_bytes(cover_data)
@ -352,5 +367,16 @@ async def start_downloader(
await download_video_and_audio(client, video, video_path, audio, audio_path, options) await download_video_and_audio(client, video, video_path, audio, audio_path, options)
# 合并视频 / 音频 # 合并视频 / 音频
merge_video_and_audio(video, video_path, audio, audio_path, cover_data, cover_path, output_path, options) merge_video_and_audio(
video,
video_path,
audio,
audio_path,
cover_data,
cover_path,
chapter_info_data,
chapter_info_path,
output_path,
options,
)
return DownloadState.DONE return DownloadState.DONE

View File

@ -78,7 +78,10 @@ class FFmpegInput:
self.input_id = input_id self.input_id = input_id
self.stream_id = stream_id self.stream_id = stream_id
def build(self) -> list[str]: def build_select_command(self) -> list[str]:
return ["-map", str(self.input_id)]
def build_input_command(self) -> list[str]:
return ["-i", str(self.path)] return ["-i", str(self.path)]
def __repr__(self): def __repr__(self):
@ -91,6 +94,11 @@ class FFmpegVideoInput(FFmpegInput): ...
class FFmpegAudioInput(FFmpegInput): ... class FFmpegAudioInput(FFmpegInput): ...
class FFmpegMetadataInput(FFmpegInput):
def build_select_command(self) -> list[str]:
return ["-map_metadata", str(self.input_id)]
class FFmpegOutput: class FFmpegOutput:
def __init__(self, path: Path | str): def __init__(self, path: Path | str):
self.path = path self.path = path
@ -98,6 +106,7 @@ class FFmpegOutput:
self.vcodec: str | None = None self.vcodec: str | None = None
self.acodec: str | None = None self.acodec: str | None = None
self.cover_input: FFmpegVideoInput | None = None self.cover_input: FFmpegVideoInput | None = None
self.metadata_input: FFmpegMetadataInput | None = None
self.extra_commands: list[str] = [] self.extra_commands: list[str] = []
def use(self, input: FFmpegInput): def use(self, input: FFmpegInput):
@ -116,12 +125,16 @@ class FFmpegOutput:
self.cover_input = cover self.cover_input = cover
return self return self
def set_metadata(self, metadata: FFmpegMetadataInput):
self.metadata_input = metadata
return self
def with_extra_options(self, command: list[str]): def with_extra_options(self, command: list[str]):
self.extra_commands.extend(command) self.extra_commands.extend(command)
return self return self
def build(self) -> list[str]: def build(self) -> list[str]:
selected_inputs = concat_commands([["-map", str(input.input_id)] for input in self.used_inputs]) selected_inputs = concat_commands([input.build_select_command() for input in self.used_inputs])
vcodec = ["-vcodec", self.vcodec] if self.vcodec else [] vcodec = ["-vcodec", self.vcodec] if self.vcodec else []
acodec = ["-acodec", self.acodec] if self.acodec else [] acodec = ["-acodec", self.acodec] if self.acodec else []
# Refer to `-disposition` option in https://www.ffmpeg.org/ffmpeg.html#toc-Main-options # Refer to `-disposition` option in https://www.ffmpeg.org/ffmpeg.html#toc-Main-options
@ -166,6 +179,12 @@ class FFmpegCommandBuilder:
self.inputs.append(input) self.inputs.append(input)
return input return input
def add_metadata_input(self, path: Path | str):
input = FFmpegMetadataInput(path, self.num_inputs, 0)
self.num_inputs += 1
self.inputs.append(input)
return input
def with_extra_options(self, command: list[str]): def with_extra_options(self, command: list[str]):
self.extra_commands.extend(command) self.extra_commands.extend(command)
return self return self
@ -176,7 +195,7 @@ class FFmpegCommandBuilder:
return output return output
def build(self): def build(self):
input_commands = concat_commands([input.build() for input in self.inputs]) input_commands = concat_commands([input.build_input_command() for input in self.inputs])
output_commands = concat_commands([output.build() for output in self.outputs]) output_commands = concat_commands([output.build() for output in self.outputs])
return input_commands + self.extra_commands + output_commands return input_commands + self.extra_commands + output_commands

View File

@ -16,6 +16,12 @@ class Actor(TypedDict):
order: int order: int
class ChapterInfoData(TypedDict):
start: int
end: int
content: str
class MetaData(TypedDict): class MetaData(TypedDict):
title: str title: str
show_title: str show_title: str
@ -29,6 +35,7 @@ class MetaData(TypedDict):
source: str source: str
original_filename: str original_filename: str
website: str website: str
chapter_info_data: list[ChapterInfoData]
def metadata_value_format(metadata: MetaData, metadata_format: dict[str, str]) -> dict[str, Any]: def metadata_value_format(metadata: MetaData, metadata_format: dict[str, str]) -> dict[str, Any]:
@ -49,3 +56,20 @@ def write_metadata(metadata: MetaData, video_path: Path, metadata_format: dict[s
xml_content = dict2xml(user_formatted_metadata, wrap=custom_root, indent=" ") # type: ignore xml_content = dict2xml(user_formatted_metadata, wrap=custom_root, indent=" ") # type: ignore
with metadata_path.open("w", encoding="utf-8") as f: # type: ignore with metadata_path.open("w", encoding="utf-8") as f: # type: ignore
f.write(xml_content) # type: ignore f.write(xml_content) # type: ignore
def attach_chapter_info(metadata: MetaData, chapter_info_data: list[ChapterInfoData]):
metadata["chapter_info_data"] = chapter_info_data
# https://wklchris.github.io/blog/FFmpeg/FFmpeg.html#id26
def write_chapter_info(title: str, chapter_info_data: list[ChapterInfoData], chapter_path: Path):
with chapter_path.open("w", encoding="utf-8") as f:
f.write(";FFMETADATA1\n")
f.write(f"title={title}\n")
for chapter in chapter_info_data:
f.write("[CHAPTER]\n")
f.write("TIMEBASE=1/1\n")
f.write(f"START={chapter['start']}\n")
f.write(f"END={chapter['end']}\n")
f.write(f"title={chapter['content']}\n")