reposync/decode.py

import re

def parse_git_output(output):
    # 定义正则表达式模式
    commit_pattern = re.compile(r'commit\s+([a-f0-9]+)')
    author_pattern = re.compile(r'Author:\s+(.+)')
    date_pattern = re.compile(r'Date:\s+(.+)')

    # 使用正则表达式搜索并提取信息
    commit_hash = commit_pattern.search(output).group(1) if commit_pattern.search(output) else None
    author = author_pattern.search(output).group(1) if author_pattern.search(output) else None
    date = date_pattern.search(output).group(1) if date_pattern.search(output) else None

    return commit_hash, author, date

def extract_diff_content(git_diff_output):
    # 定义正则表达式，匹配从 'diff --git' 开始到 'index' 之前的文本
    pattern = re.compile(r'diff --git(.+?)(?=\nindex|$)', re.DOTALL)

    # 使用正则表达式查找所有匹配项
    matches = pattern.findall(git_diff_output)

    # 存储截取的内容
    extracted_contents = []

    # 遍历所有匹配项
    for match in matches:
        # 去除每个匹配项中的空白行，并添加到结果列表
        cleaned_match = '\n'.join(line for line in match.strip().split('\n') if line)
        extracted_contents.append(cleaned_match)

    return extracted_contents

def extract_changes_since_last_diff(diff_output):
    # 定义正则表达式，匹配 '+++ b' 后面的内容，直到下一个 'diff' 或文本末尾
    pattern = re.compile(r'\+\+\+ b/(.+?)(?=\ndiff --git|$)', re.DOTALL)

    # 使用正则表达式查找所有匹配项
    matches = pattern.findall(diff_output)

    # 处理匹配结果，每项匹配结果是一个元组，包含从 '+++ b/' 到下一个 'diff' 或文本末尾的内容
    extracted_content = []
    for match in matches:
        # 去除匹配内容中的 '\ No newline at end of file' 行
        content = '\n'.join([line for line in match.strip().split('\n') if line and line != '\\ No newline at end of file'])
        extracted_content.append(content)

    return extracted_content

git_diff_output = """
Last commit hash before push: commit d5f594d3bc9bf2fabf1f97cd375f1ca8be821d60
Author: xmy <1926207361@qq.com>
Date:   Wed Jun 26 08:02:53 2024 +0800
    closer
diff --git a/src/Merry_Christmas.txt b/src/Merry_Christmas.txt
new file mode 100644
index 0000000..8f3694f
--- /dev/null
+++ b/src/Merry_Christmas.txt
@@ -0,0 +1 @@
+坂本龙一
\ No newline at end of file
diff --git a/src/changsha.txt b/src/changsha.txt
index 7df336a..f007879 100644
--- a/src/changsha.txt
+++ b/src/changsha.txt
@@ -1 +1,3 @@
-抗洪抢险 党员优先
\ No newline at end of file
+抗洪抢险 党员优先
+
+新增调试
\ No newline at end of file
"""

commit_hash, author, date = parse_git_output(git_diff_output)
print("Commit Hash:", commit_hash)
print("Author:", author)
print("Date:", date)
print("\n")

extracted_contents = extract_diff_content(git_diff_output)

# 提取 '+++ b' 后面的内容
changes_since_last_diff = extract_changes_since_last_diff(git_diff_output)

# 打印提取的内容
for content in extracted_contents:
    if 'new file' in content:
        pattern = re.compile(r' b/(.+?)(?=\nnew|$)', re.DOTALL)
        # 使用正则表达式查找所有匹配项
        matches = pattern.findall(content)
        # 存储截取的内容
        add_files = []
        # 遍历所有匹配项
        for match in matches:
            # 去除每个匹配项中的空白行，并添加到结果列表
            cleaned_match = '\n'.join(line for line in match.strip().split('\n') if line)
            add_files.append(cleaned_match)

        print('ADD_FILES:', add_files)

        for add_file in add_files:
            for change in changes_since_last_diff:
                if add_file in change:
                    print("\n"+change)
                    print("\n--- End of Extracted Content ---\n")
    else:
        pattern = re.compile(r'a/(.+?)(?=b|$)', re.DOTALL)
        # 使用正则表达式查找所有匹配项
        matches = pattern.findall(content)
        # 存储截取的内容
        modified_files = []
        # 遍历所有匹配项
        for match in matches:
            # 去除每个匹配项中的空白行，并添加到结果列表
            cleaned_match = '\n'.join(line for line in match.strip().split('\n') if line)
            modified_files.append(cleaned_match)
        print('MODIFIED_FILES:', modified_files)
        for modified_file in modified_files:
            for change in changes_since_last_diff:
                if modified_file in change:
                    print("\n"+ change)
                    print("\n--- End of Extracted Content ---\n")