douyin-downloader/DouYinCommand.py

466 lines
17 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import os
import sys
import json
import yaml
import time
from dataclasses import dataclass, field
from typing import List, Dict, Optional
from pathlib import Path
import logging
# 配置logger
logging.basicConfig(
level=logging.INFO,
format='[%(levelname)s] %(message)s'
)
# 改名为douyin_logger以避免冲突
douyin_logger = logging.getLogger("DouYin")
# 现在可以安全使用douyin_logger
try:
import asyncio
import aiohttp
ASYNC_SUPPORT = True
except ImportError:
ASYNC_SUPPORT = False
douyin_logger.warning("aiohttp 未安装,异步下载功能不可用")
from apiproxy.douyin.douyin import Douyin
from apiproxy.douyin.download import Download
from apiproxy.douyin import douyin_headers
from apiproxy.common import utils
@dataclass
class DownloadConfig:
"""下载配置类"""
link: List[str]
path: Path
music: bool = True
cover: bool = True
avatar: bool = True
json: bool = True
start_time: str = ""
end_time: str = ""
folderstyle: bool = True
mode: List[str] = field(default_factory=lambda: ["post"])
thread: int = 5
cookie: Optional[str] = None
database: bool = True
number: Dict[str, int] = field(default_factory=lambda: {
"post": 0, "like": 0, "allmix": 0, "mix": 0, "music": 0
})
increase: Dict[str, bool] = field(default_factory=lambda: {
"post": False, "like": False, "allmix": False, "mix": False, "music": False
})
@classmethod
def from_yaml(cls, yaml_path: Path) -> "DownloadConfig":
"""从YAML文件加载配置"""
# 实现YAML配置加载逻辑
@classmethod
def from_args(cls, args) -> "DownloadConfig":
"""从命令行参数加载配置"""
# 实现参数加载逻辑
def validate(self) -> bool:
"""验证配置有效性"""
# 实现验证逻辑
configModel = {
"link": [],
"path": os.getcwd(),
"music": True,
"cover": True,
"avatar": True,
"json": True,
"start_time": "",
"end_time": "",
"folderstyle": True,
"mode": ["post"],
"number": {
"post": 0,
"like": 0,
"allmix": 0,
"mix": 0,
"music": 0,
},
'database': True,
"increase": {
"post": False,
"like": False,
"allmix": False,
"mix": False,
"music": False,
},
"thread": 5,
"cookie": os.environ.get("DOUYIN_COOKIE", "")
}
def argument():
parser = argparse.ArgumentParser(description='抖音批量下载工具 使用帮助')
parser.add_argument("--cmd", "-C", help="使用命令行(True)或者配置文件(False), 默认为False",
type=utils.str2bool, required=False, default=False)
parser.add_argument("--link", "-l",
help="作品(视频或图集)、直播、合集、音乐集合、个人主页的分享链接或者电脑浏览器网址, 可以设置多个链接(删除文案, 保证只有URL, https://v.douyin.com/kcvMpuN/ 或者 https://www.douyin.com/开头的)",
type=str, required=False, default=[], action="append")
parser.add_argument("--path", "-p", help="下载保存位置, 默认当前文件位置",
type=str, required=False, default=os.getcwd())
parser.add_argument("--music", "-m", help="是否下载视频中的音乐(True/False), 默认为True",
type=utils.str2bool, required=False, default=True)
parser.add_argument("--cover", "-c", help="是否下载视频的封面(True/False), 默认为True, 当下载视频时有效",
type=utils.str2bool, required=False, default=True)
parser.add_argument("--avatar", "-a", help="是否下载作者的头像(True/False), 默认为True",
type=utils.str2bool, required=False, default=True)
parser.add_argument("--json", "-j", help="是否保存获取到的数据(True/False), 默认为True",
type=utils.str2bool, required=False, default=True)
parser.add_argument("--folderstyle", "-fs", help="文件保存风格, 默认为True",
type=utils.str2bool, required=False, default=True)
parser.add_argument("--mode", "-M", help="link是个人主页时, 设置下载发布的作品(post)或喜欢的作品(like)或者用户所有合集(mix), 默认为post, 可以设置多种模式",
type=str, required=False, default=[], action="append")
parser.add_argument("--postnumber", help="主页下作品下载个数设置, 默认为0 全部下载",
type=int, required=False, default=0)
parser.add_argument("--likenumber", help="主页下喜欢下载个数设置, 默认为0 全部下载",
type=int, required=False, default=0)
parser.add_argument("--allmixnumber", help="主页下合集下载个数设置, 默认为0 全部下载",
type=int, required=False, default=0)
parser.add_argument("--mixnumber", help="单个合集下作品下载个数设置, 默认为0 全部下载",
type=int, required=False, default=0)
parser.add_argument("--musicnumber", help="音乐(原声)下作品下载个数设置, 默认为0 全部下载",
type=int, required=False, default=0)
parser.add_argument("--database", "-d", help="是否使用数据库, 默认为True 使用数据库; 如果不使用数据库, 增量更新不可用",
type=utils.str2bool, required=False, default=True)
parser.add_argument("--postincrease", help="是否开启主页作品增量下载(True/False), 默认为False",
type=utils.str2bool, required=False, default=False)
parser.add_argument("--likeincrease", help="是否开启主页喜欢增量下载(True/False), 默认为False",
type=utils.str2bool, required=False, default=False)
parser.add_argument("--allmixincrease", help="是否开启主页合集增量下载(True/False), 默认为False",
type=utils.str2bool, required=False, default=False)
parser.add_argument("--mixincrease", help="是否开启单个合集下作品增量下载(True/False), 默认为False",
type=utils.str2bool, required=False, default=False)
parser.add_argument("--musicincrease", help="是否开启音乐(原声)下作品增量下载(True/False), 默认为False",
type=utils.str2bool, required=False, default=False)
parser.add_argument("--thread", "-t",
help="设置线程数, 默认5个线程",
type=int, required=False, default=5)
parser.add_argument("--cookie", help="设置cookie, 格式: \"name1=value1; name2=value2;\" 注意要加冒号",
type=str, required=False, default='')
parser.add_argument("--config", "-F",
type=argparse.FileType('r', encoding='utf-8'),
help="配置文件路径")
args = parser.parse_args()
if args.thread <= 0:
args.thread = 5
return args
def yamlConfig():
curPath = os.path.dirname(os.path.realpath(sys.argv[0]))
yamlPath = os.path.join(curPath, "config.yml")
try:
with open(yamlPath, 'r', encoding='utf-8') as f:
configDict = yaml.safe_load(f)
# 使用字典推导式简化配置更新
for key in configModel:
if key in configDict:
if isinstance(configModel[key], dict):
configModel[key].update(configDict[key] or {})
else:
configModel[key] = configDict[key]
# 特殊处理cookie
if configDict.get("cookies"):
cookieStr = "; ".join(f"{k}={v}" for k,v in configDict["cookies"].items())
configModel["cookie"] = cookieStr
# 特殊处理end_time
if configDict.get("end_time") == "now":
configModel["end_time"] = time.strftime("%Y-%m-%d", time.localtime())
except FileNotFoundError:
douyin_logger.warning("未找到配置文件config.yml")
except Exception as e:
douyin_logger.warning(f"配置文件解析出错: {str(e)}")
def validate_config(config: dict) -> bool:
"""验证配置有效性"""
required_keys = {
'link': list,
'path': str,
'thread': int
}
for key, typ in required_keys.items():
if key not in config or not isinstance(config[key], typ):
douyin_logger.error(f"无效配置项: {key}")
return False
if not all(isinstance(url, str) for url in config['link']):
douyin_logger.error("链接配置格式错误")
return False
return True
def main():
start = time.time()
# 配置初始化
args = argument()
if args.cmd:
update_config_from_args(args)
else:
yamlConfig()
if not validate_config(configModel):
return
if not configModel["link"]:
douyin_logger.error("未设置下载链接")
return
# Cookie处理
if configModel["cookie"]:
douyin_headers["Cookie"] = configModel["cookie"]
# 路径处理
configModel["path"] = os.path.abspath(configModel["path"])
os.makedirs(configModel["path"], exist_ok=True)
douyin_logger.info(f"数据保存路径 {configModel['path']}")
# 初始化下载器
dy = Douyin(database=configModel["database"])
dl = Download(
thread=configModel["thread"],
music=configModel["music"],
cover=configModel["cover"],
avatar=configModel["avatar"],
resjson=configModel["json"],
folderstyle=configModel["folderstyle"]
)
# 处理每个链接
for link in configModel["link"]:
process_link(dy, dl, link)
# 计算耗时
duration = time.time() - start
douyin_logger.info(f'\n[下载完成]:总耗时: {int(duration/60)}分钟{int(duration%60)}\n')
def process_link(dy, dl, link):
"""处理单个链接的下载逻辑"""
douyin_logger.info("-" * 80)
douyin_logger.info(f"[ 提示 ]:正在请求的链接: {link}")
try:
url = dy.getShareLink(link)
key_type, key = dy.getKey(url)
handlers = {
"user": handle_user_download,
"mix": handle_mix_download,
"music": handle_music_download,
"aweme": handle_aweme_download,
"live": handle_live_download
}
handler = handlers.get(key_type)
if handler:
handler(dy, dl, key)
else:
douyin_logger.warning(f"[ 警告 ]:未知的链接类型: {key_type}")
except Exception as e:
douyin_logger.error(f"处理链接时出错: {str(e)}")
def handle_user_download(dy, dl, key):
"""处理用户主页下载"""
douyin_logger.info("[ 提示 ]:正在请求用户主页下作品")
data = dy.getUserDetailInfo(sec_uid=key)
nickname = ""
if data and data.get('user'):
nickname = utils.replaceStr(data['user']['nickname'])
userPath = os.path.join(configModel["path"], f"user_{nickname}_{key}")
os.makedirs(userPath, exist_ok=True)
for mode in configModel["mode"]:
douyin_logger.info("-" * 80)
douyin_logger.info(f"[ 提示 ]:正在请求用户主页模式: {mode}")
if mode in ('post', 'like'):
_handle_post_like_mode(dy, dl, key, mode, userPath)
elif mode == 'mix':
_handle_mix_mode(dy, dl, key, userPath)
def _handle_post_like_mode(dy, dl, key, mode, userPath):
"""处理发布/喜欢模式的下载"""
datalist = dy.getUserInfo(
key,
mode,
35,
configModel["number"][mode],
configModel["increase"][mode],
start_time=configModel.get("start_time", ""),
end_time=configModel.get("end_time", "")
)
if not datalist:
return
modePath = os.path.join(userPath, mode)
os.makedirs(modePath, exist_ok=True)
dl.userDownload(awemeList=datalist, savePath=modePath)
def _handle_mix_mode(dy, dl, key, userPath):
"""处理合集模式的下载"""
mixIdNameDict = dy.getUserAllMixInfo(key, 35, configModel["number"]["allmix"])
if not mixIdNameDict:
return
modePath = os.path.join(userPath, "mix")
os.makedirs(modePath, exist_ok=True)
for mix_id, mix_name in mixIdNameDict.items():
douyin_logger.info(f'[ 提示 ]:正在下载合集 [{mix_name}] 中的作品')
mix_file_name = utils.replaceStr(mix_name)
datalist = dy.getMixInfo(
mix_id,
35,
0,
configModel["increase"]["allmix"],
key,
start_time=configModel.get("start_time", ""),
end_time=configModel.get("end_time", "")
)
if datalist:
dl.userDownload(awemeList=datalist, savePath=os.path.join(modePath, mix_file_name))
douyin_logger.info(f'[ 提示 ]:合集 [{mix_name}] 中的作品下载完成')
def handle_mix_download(dy, dl, key):
"""处理单个合集下载"""
douyin_logger.info("[ 提示 ]:正在请求单个合集下作品")
try:
datalist = dy.getMixInfo(
key,
35,
configModel["number"]["mix"],
configModel["increase"]["mix"],
"",
start_time=configModel.get("start_time", ""),
end_time=configModel.get("end_time", "")
)
if not datalist:
douyin_logger.error("获取合集信息失败")
return
mixname = utils.replaceStr(datalist[0]["mix_info"]["mix_name"])
mixPath = os.path.join(configModel["path"], f"mix_{mixname}_{key}")
os.makedirs(mixPath, exist_ok=True)
dl.userDownload(awemeList=datalist, savePath=mixPath)
except Exception as e:
douyin_logger.error(f"处理合集时出错: {str(e)}")
def handle_music_download(dy, dl, key):
"""处理音乐作品下载"""
douyin_logger.info("[ 提示 ]:正在请求音乐(原声)下作品")
datalist = dy.getMusicInfo(key, 35, configModel["number"]["music"], configModel["increase"]["music"])
if datalist:
musicname = utils.replaceStr(datalist[0]["music"]["title"])
musicPath = os.path.join(configModel["path"], f"music_{musicname}_{key}")
os.makedirs(musicPath, exist_ok=True)
dl.userDownload(awemeList=datalist, savePath=musicPath)
def handle_aweme_download(dy, dl, key):
"""处理单个作品下载"""
douyin_logger.info("[ 提示 ]:正在请求单个作品")
try:
result = dy.getAwemeInfo(key)
if not result:
douyin_logger.error("获取作品信息失败")
return
datanew, _ = result # 只有在确保result不为空时才解包
if datanew:
awemePath = os.path.join(configModel["path"], "aweme")
os.makedirs(awemePath, exist_ok=True)
dl.userDownload(awemeList=[datanew], savePath=awemePath)
else:
douyin_logger.error("作品数据为空")
except Exception as e:
douyin_logger.error(f"处理作品时出错: {str(e)}")
def handle_live_download(dy, dl, key):
"""处理直播下载"""
douyin_logger.info("[ 提示 ]:正在进行直播解析")
live_json = dy.getLiveInfo(key)
if configModel["json"] and live_json:
livePath = os.path.join(configModel["path"], "live")
os.makedirs(livePath, exist_ok=True)
live_file_name = utils.replaceStr(f"{key}{live_json['nickname']}")
json_path = os.path.join(livePath, f"{live_file_name}.json")
douyin_logger.info("[ 提示 ]:正在保存获取到的信息到result.json")
with open(json_path, "w", encoding='utf-8') as f:
json.dump(live_json, f, ensure_ascii=False, indent=2)
# 条件定义异步函数
if ASYNC_SUPPORT:
async def download_file(url, path):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status == 200:
with open(path, 'wb') as f:
f.write(await response.read())
return True
return False
def update_config_from_args(args):
"""从命令行参数更新配置"""
configModel["link"] = args.link
configModel["path"] = args.path
configModel["music"] = args.music
configModel["cover"] = args.cover
configModel["avatar"] = args.avatar
configModel["json"] = args.json
configModel["folderstyle"] = args.folderstyle
configModel["mode"] = args.mode if args.mode else ["post"]
configModel["thread"] = args.thread
configModel["cookie"] = args.cookie
configModel["database"] = args.database
# 更新number字典
configModel["number"]["post"] = args.postnumber
configModel["number"]["like"] = args.likenumber
configModel["number"]["allmix"] = args.allmixnumber
configModel["number"]["mix"] = args.mixnumber
configModel["number"]["music"] = args.musicnumber
# 更新increase字典
configModel["increase"]["post"] = args.postincrease
configModel["increase"]["like"] = args.likeincrease
configModel["increase"]["allmix"] = args.allmixincrease
configModel["increase"]["mix"] = args.mixincrease
configModel["increase"]["music"] = args.musicincrease
if __name__ == "__main__":
main()