✨ feat: add chapter info support (#274)

Co-authored-by: SigureMo <sigure.qaq@gmail.com>
2024-06-16 15:35:47 +08:00 · 2024-06-16 15:35:47 +08:00 · 0164d53cd6
parent fd3a4f60a7
commit 0164d53cd6
12 changed files with 140 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@ -131,6 +131,7 @@ dmypy.json
 *.srt
 *.nfo
 *.jpg
+*.ini

 # test files
 *.test.py
--- a/README.md
+++ b/README.md
@ -407,6 +407,13 @@ cat ~/.yutto_alias | yutto tensura-nikki --batch --alias-file -
 -  参数 `--no-danmaku`
 -  默认值 `False`

+#### 不生成章节信息
+
+-  参数 `--no-chapter-info`
+-  默认值 `False`
+
+不生成章节信息，包含 MetaData 和嵌入视频流的章节信息。
+
 #### 仅生成弹幕文件

 -  参数 `--danmaku-only`
--- a/1
+++ b/1
@ -60,6 +60,7 @@ clean:
  find . -name "*.pb" -print0 | xargs -0 rm -f
  find . -name "*.pyc" -print0 | xargs -0 rm -f
  find . -name "*.jpg" -print0 | xargs -0 rm -f
+  find . -name "*.ini" -print0 | xargs -0 rm -f
  rm -rf .pytest_cache/
  rm -rf .mypy_cache/
  find . -maxdepth 3 -type d -empty -print0 | xargs -0 -r rm -r
--- a/yutto/main.py
+++ b/yutto/main.py
@ -46,8 +46,16 @@ from yutto.validator import (
    validate_user_info,
 )

-DownloadResourceType: TypeAlias = Literal["video", "audio", "subtitle", "metadata", "danmaku", "cover"]
-DOWNLOAD_RESOURCE_TYPES: list[DownloadResourceType] = ["video", "audio", "subtitle", "metadata", "danmaku", "cover"]
+DownloadResourceType: TypeAlias = Literal["video", "audio", "subtitle", "metadata", "danmaku", "cover", "chapter_info"]
+DOWNLOAD_RESOURCE_TYPES: list[DownloadResourceType] = [
+    "video",
+    "audio",
+    "subtitle",
+    "metadata",
+    "danmaku",
+    "cover",
+    "chapter_info",
+]


 def main():
@ -191,6 +199,13 @@ def cli() -> argparse.ArgumentParser:
        help="不生成封面",
    )

+    group_common.add_argument(
+        "--no-chapter-info",
+        dest="require_chapter_info",
+        action=create_select_required_action(deselect=["chapter_info"]),
+        help="不封装章节信息",
+    )
+
    group_common.set_defaults(
        require_video=True,
        require_audio=True,
@ -198,6 +213,7 @@ def cli() -> argparse.ArgumentParser:
        require_metadata=False,
        require_danmaku=True,
        require_cover=True,
+        require_chapter_info=True,
    )
    group_common.add_argument("--no-color", action="store_true", help="不使用颜色")
    group_common.add_argument("--no-progress", action="store_true", help="不显示进度条")
@ -260,7 +276,6 @@ async def run(args_list: list[argparse.Namespace]):
                    CheeseExtractor(),  # 课程单集
                ]
            )
-
            url: str = args.url
            # 将 shortcut 转为完整 url
            for extractor in extractors:
@ -331,12 +346,15 @@ async def run(args_list: list[argparse.Namespace]):
                    episode_data,
                    {
                        "require_video": args.require_video,
+                        "require_chapter_info": args.require_chapter_info,
                        "video_quality": args.video_quality,
                        "video_download_codec": args.vcodec.split(":")[0],
                        "video_save_codec": args.vcodec.split(":")[1],
-                        "video_download_codec_priority": args.download_vcodec_priority.split(",")
-                        if args.download_vcodec_priority != "auto"
-                        else None,
+                        "video_download_codec_priority": (
+                            args.download_vcodec_priority.split(",")
+                            if args.download_vcodec_priority != "auto"
+                            else None
+                        ),
                        "require_audio": args.require_audio,
                        "audio_quality": args.audio_quality,
                        "audio_download_codec": args.acodec.split(":")[0],
--- a/yutto/_typing.py
+++ b/yutto/_typing.py
@ -5,7 +5,7 @@ from typing import NamedTuple, TypedDict
 from yutto.bilibili_typing.codec import AudioCodec, VideoCodec
 from yutto.bilibili_typing.quality import AudioQuality, VideoQuality
 from yutto.utils.danmaku import DanmakuData
-from yutto.utils.metadata import MetaData
+from yutto.utils.metadata import ChapterInfoData, MetaData
 from yutto.utils.subtitle import SubtitleData


@ -172,6 +172,7 @@ class EpisodeData(TypedDict):
    metadata: MetaData | None
    danmaku: DanmakuData
    cover_data: bytes | None
+    chapter_info_data: list[ChapterInfoData]
    output_dir: str
    tmp_dir: str
    filename: str
@ -179,6 +180,7 @@ class EpisodeData(TypedDict):

 class DownloaderOptions(TypedDict):
    require_video: bool
+    require_chapter_info: bool
    video_quality: VideoQuality
    video_download_codec: VideoCodec
    video_save_codec: str
--- a/yutto/api/bangumi.py
+++ b/yutto/api/bangumi.py
@ -184,4 +184,5 @@ def _parse_bangumi_metadata(item: dict[str, Any]) -> MetaData:
        tag=[],  # TODO
        website="",  # TODO
        original_filename="",  # TODO
+        chapter_info_data=[],  # There are no chapter info in bangumi for now
    )
--- a/yutto/api/cheese.py
+++ b/yutto/api/cheese.py
@ -153,4 +153,5 @@ def _parse_cheese_metadata(item: dict[str, Any]) -> MetaData:
        tag=[],  # TODO
        website="",  # TODO
        original_filename="",  # TODO
+        chapter_info_data=[],  # There are no chapter info in cheese for now
    )
--- a/yutto/api/ugc_video.py
+++ b/yutto/api/ugc_video.py
@ -25,7 +25,8 @@ from yutto.exceptions import (
 )
 from yutto.utils.console.logger import Logger
 from yutto.utils.fetcher import Fetcher
-from yutto.utils.metadata import Actor, MetaData
+from yutto.utils.funcutils.data_access import data_has_chained_keys
+from yutto.utils.metadata import Actor, ChapterInfoData, MetaData
 from yutto.utils.time import get_time_stamp_by_now


@ -259,6 +260,23 @@ async def get_ugc_video_subtitles(client: AsyncClient, avid: AvId, cid: CId) ->
    return []


+async def get_ugc_video_chapters(client: AsyncClient, avid: AvId, cid: CId) -> list[ChapterInfoData]:
+    chapter_api = "https://api.bilibili.com/x/player/v2?avid={aid}&bvid={bvid}&cid={cid}"
+    chapter_url = chapter_api.format(**avid.to_dict(), cid=cid)
+    chapter_json_info = await Fetcher.fetch_json(client, chapter_url)
+    if chapter_json_info is None:
+        return []
+    if not data_has_chained_keys(chapter_json_info, ["data", "view_points"]):
+        Logger.warning(f"无法获取该视频的章节信息（{format_ids(avid, cid)}），原因：{chapter_json_info.get('message')}")
+        return []
+
+    raw_chapter_info = chapter_json_info["data"]["view_points"]
+    return [
+        {"content": chapter_info["content"], "start": chapter_info["from"], "end": chapter_info["to"]}
+        for chapter_info in raw_chapter_info
+    ]
+
+
 def _parse_ugc_video_metadata(
    video_info: _UgcVideoInfo,
    page_info: _UgcVideoPageInfo,
@ -277,6 +295,7 @@ def _parse_ugc_video_metadata(
        source="",  # TODO
        original_filename="",  # TODO
        website=video_info["bvid"].to_url(),
+        chapter_info_data=[],
    )


--- a/yutto/extractor/common.py
+++ b/yutto/extractor/common.py
@ -15,6 +15,7 @@ from yutto.api.cheese import CheeseListItem, get_cheese_playurl, get_cheese_subt
 from yutto.api.danmaku import get_danmaku
 from yutto.api.ugc_video import (
    UgcVideoListItem,
+    get_ugc_video_chapters,
    get_ugc_video_playurl,
    get_ugc_video_subtitles,
 )
@ -32,6 +33,7 @@ from yutto.processor.path_resolver import (
 from yutto.utils.console.logger import Logger
 from yutto.utils.danmaku import EmptyDanmakuData
 from yutto.utils.fetcher import Fetcher
+from yutto.utils.metadata import attach_chapter_info


 async def extract_bangumi_data(
@ -76,6 +78,7 @@ async def extract_bangumi_data(
            metadata=metadata,
            danmaku=danmaku,
            cover_data=cover_data,
+            chapter_info_data=[],
            output_dir=output_dir,
            tmp_dir=args.tmp_dir or output_dir,
            filename=filename,
@ -128,6 +131,7 @@ async def extract_cheese_data(
            metadata=metadata,
            danmaku=danmaku,
            cover_data=cover_data,
+            chapter_info_data=[],
            output_dir=output_dir,
            tmp_dir=args.tmp_dir or output_dir,
            filename=filename,
@ -153,8 +157,11 @@ async def extract_ugc_video_data(
            await get_ugc_video_playurl(client, avid, cid) if args.require_video or args.require_audio else ([], [])
        )
        subtitles = await get_ugc_video_subtitles(client, avid, cid) if args.require_subtitle else []
+        chapter_info_data = await get_ugc_video_chapters(client, avid, cid) if args.require_chapter_info else []
        danmaku = await get_danmaku(client, cid, args.danmaku_format) if args.require_danmaku else EmptyDanmakuData
        metadata = ugc_video_info["metadata"] if args.require_metadata else None
+        if metadata and chapter_info_data:
+            attach_chapter_info(metadata, chapter_info_data)
        cover_data = (
            await Fetcher.fetch_bin(client, ugc_video_info["metadata"]["thumb"]) if args.require_cover else None
        )
@ -184,6 +191,7 @@ async def extract_ugc_video_data(
            metadata=metadata,
            danmaku=danmaku,
            cover_data=cover_data,
+            chapter_info_data=chapter_info_data,
            output_dir=output_dir,
            tmp_dir=args.tmp_dir or output_dir,
            filename=filename,
--- a/yutto/processor/downloader.py
+++ b/yutto/processor/downloader.py
@ -21,7 +21,7 @@ from yutto.utils.fetcher import Fetcher
 from yutto.utils.ffmpeg import FFmpeg, FFmpegCommandBuilder
 from yutto.utils.file_buffer import AsyncFileBuffer
 from yutto.utils.funcutils import filter_none_value, xmerge
-from yutto.utils.metadata import write_metadata
+from yutto.utils.metadata import ChapterInfoData, write_chapter_info, write_metadata
 from yutto.utils.subtitle import write_subtitle


@ -176,6 +176,8 @@ def merge_video_and_audio(
    audio_path: Path,
    cover_data: bytes | None,
    cover_path: Path,
+    chapter_info_data: list[ChapterInfoData],
+    chapter_info_path: Path,
    output_path: Path,
    options: DownloaderOptions,
 ):
@ -215,6 +217,10 @@ def merge_video_and_audio(
        output.use(cover_input)
        output.set_cover(cover_input)

+    if video is not None and chapter_info_data:
+        metadata_input = command_builder.add_metadata_input(chapter_info_path)
+        output.use(metadata_input)
+
    # see also: https://www.reddit.com/r/ffmpeg/comments/qe7oq1/comment/hi0bmic/?utm_source=share&utm_medium=web2x&context=3
    output.with_extra_options(["-strict", "unofficial"])

@ -237,6 +243,8 @@ def merge_video_and_audio(
        audio_path.unlink()
    if cover_data is not None:
        cover_path.unlink()
+    if chapter_info_data:
+        chapter_info_path.unlink()


 class DownloadState(Enum):
@ -257,6 +265,7 @@ async def start_downloader(
    danmaku = episode_data["danmaku"]
    metadata = episode_data["metadata"]
    cover_data = episode_data["cover_data"]
+    chapter_info_data = episode_data["chapter_info_data"]
    output_dir = Path(episode_data["output_dir"])
    tmp_dir = Path(episode_data["tmp_dir"])
    filename = episode_data["filename"]
@ -269,6 +278,7 @@ async def start_downloader(
    video_path = tmp_dir.joinpath(filename + "_video.m4s")
    audio_path = tmp_dir.joinpath(filename + "_audio.m4s")
    cover_path = tmp_dir.joinpath(filename + "_cover.jpg")
+    chapter_info_path = tmp_dir.joinpath(filename + "_chapter_info.ini")

    video = select_video(
        videos, options["video_quality"], options["video_download_codec"], options["video_download_codec_priority"]
@ -345,6 +355,11 @@ async def start_downloader(
    video = video if will_download_video else None
    audio = audio if will_download_audio else None

+    # 保存章节信息
+    if chapter_info_data:
+        write_chapter_info(filename, chapter_info_data, chapter_info_path)
+
+    # 保存封面
    if cover_data is not None:
        cover_path.write_bytes(cover_data)

@ -352,5 +367,16 @@ async def start_downloader(
    await download_video_and_audio(client, video, video_path, audio, audio_path, options)

    # 合并视频 / 音频
-    merge_video_and_audio(video, video_path, audio, audio_path, cover_data, cover_path, output_path, options)
+    merge_video_and_audio(
+        video,
+        video_path,
+        audio,
+        audio_path,
+        cover_data,
+        cover_path,
+        chapter_info_data,
+        chapter_info_path,
+        output_path,
+        options,
+    )
    return DownloadState.DONE
--- a/yutto/utils/ffmpeg.py
+++ b/yutto/utils/ffmpeg.py
@ -78,7 +78,10 @@ class FFmpegInput:
        self.input_id = input_id
        self.stream_id = stream_id

-    def build(self) -> list[str]:
+    def build_select_command(self) -> list[str]:
+        return ["-map", str(self.input_id)]
+
+    def build_input_command(self) -> list[str]:
        return ["-i", str(self.path)]

    def __repr__(self):
@ -91,6 +94,11 @@ class FFmpegVideoInput(FFmpegInput): ...
 class FFmpegAudioInput(FFmpegInput): ...


+class FFmpegMetadataInput(FFmpegInput):
+    def build_select_command(self) -> list[str]:
+        return ["-map_metadata", str(self.input_id)]
+
+
 class FFmpegOutput:
    def __init__(self, path: Path | str):
        self.path = path
@ -98,6 +106,7 @@ class FFmpegOutput:
        self.vcodec: str | None = None
        self.acodec: str | None = None
        self.cover_input: FFmpegVideoInput | None = None
+        self.metadata_input: FFmpegMetadataInput | None = None
        self.extra_commands: list[str] = []

    def use(self, input: FFmpegInput):
@ -116,12 +125,16 @@ class FFmpegOutput:
        self.cover_input = cover
        return self

+    def set_metadata(self, metadata: FFmpegMetadataInput):
+        self.metadata_input = metadata
+        return self
+
    def with_extra_options(self, command: list[str]):
        self.extra_commands.extend(command)
        return self

    def build(self) -> list[str]:
-        selected_inputs = concat_commands([["-map", str(input.input_id)] for input in self.used_inputs])
+        selected_inputs = concat_commands([input.build_select_command() for input in self.used_inputs])
        vcodec = ["-vcodec", self.vcodec] if self.vcodec else []
        acodec = ["-acodec", self.acodec] if self.acodec else []
        # Refer to `-disposition` option in https://www.ffmpeg.org/ffmpeg.html#toc-Main-options
@ -166,6 +179,12 @@ class FFmpegCommandBuilder:
        self.inputs.append(input)
        return input

+    def add_metadata_input(self, path: Path | str):
+        input = FFmpegMetadataInput(path, self.num_inputs, 0)
+        self.num_inputs += 1
+        self.inputs.append(input)
+        return input
+
    def with_extra_options(self, command: list[str]):
        self.extra_commands.extend(command)
        return self
@ -176,7 +195,7 @@ class FFmpegCommandBuilder:
        return output

    def build(self):
-        input_commands = concat_commands([input.build() for input in self.inputs])
+        input_commands = concat_commands([input.build_input_command() for input in self.inputs])
        output_commands = concat_commands([output.build() for output in self.outputs])
        return input_commands + self.extra_commands + output_commands

--- a/yutto/utils/metadata.py
+++ b/yutto/utils/metadata.py
@ -16,6 +16,12 @@ class Actor(TypedDict):
    order: int


+class ChapterInfoData(TypedDict):
+    start: int
+    end: int
+    content: str
+
+
 class MetaData(TypedDict):
    title: str
    show_title: str
@ -29,6 +35,7 @@ class MetaData(TypedDict):
    source: str
    original_filename: str
    website: str
+    chapter_info_data: list[ChapterInfoData]


 def metadata_value_format(metadata: MetaData, metadata_format: dict[str, str]) -> dict[str, Any]:
@ -49,3 +56,20 @@ def write_metadata(metadata: MetaData, video_path: Path, metadata_format: dict[s
    xml_content = dict2xml(user_formatted_metadata, wrap=custom_root, indent="  ")  # type: ignore
    with metadata_path.open("w", encoding="utf-8") as f:  # type: ignore
        f.write(xml_content)  # type: ignore
+
+
+def attach_chapter_info(metadata: MetaData, chapter_info_data: list[ChapterInfoData]):
+    metadata["chapter_info_data"] = chapter_info_data
+
+
+# https://wklchris.github.io/blog/FFmpeg/FFmpeg.html#id26
+def write_chapter_info(title: str, chapter_info_data: list[ChapterInfoData], chapter_path: Path):
+    with chapter_path.open("w", encoding="utf-8") as f:
+        f.write(";FFMETADATA1\n")
+        f.write(f"title={title}\n")
+        for chapter in chapter_info_data:
+            f.write("[CHAPTER]\n")
+            f.write("TIMEBASE=1/1\n")
+            f.write(f"START={chapter['start']}\n")
+            f.write(f"END={chapter['end']}\n")
+            f.write(f"title={chapter['content']}\n")