删除无用文件

This commit is contained in:
huaian_zhou 2024-03-23 16:42:35 +08:00
parent 6d8ab9c802
commit 3794e4274e
3 changed files with 0 additions and 362 deletions

View File

@ -1,317 +0,0 @@
import sys
sys.path.append("..") # 添加util所在目录
import time
import json
import logging
import argparse
from pathlib import Path
from pymongo import MongoClient
from util import logging_init
DB = "JiraRepos"
REPOS = [
"Apache",
"Hyperledger",
"IntelDAOS",
"JFrog",
"Jira",
"JiraEcosystem",
"MariaDB",
"Mindville",
"Mojang",
"MongoDB",
"Qt",
"RedHat",
"Sakai",
"SecondLife",
"Sonatype",
"Spring",
]
EPIC_FIELD = {
"Apache": "customfield_12311120",
"Hyperledger": "customfield_10006",
"IntelDAOS": "customfield_10092",
"JFrog": "customfield_10806",
"Jira": "customfield_12931",
"JiraEcosystem": "customfield_12180",
"MariaDB": "customfield_10600",
"MindVille": "customfield_10000",
"MongoDB": "customfield_10857",
"Qt": "customfield_10400",
"Redhat": "customfield_12311140",
"Sakai": "customfield_10772",
"SecondLife": "customfield_10871",
"Sonatype": "customfield_11500",
"Spring": "customfield_10680",
}
ISSUE_DIR = Path("../../data/raw/issues")
ISSUE_DIR.mkdir(parents=True, exist_ok=True)
LINK_DIR = Path("../../data/raw/links")
LINK_DIR.mkdir(parents=True, exist_ok=True)
def extract_issues(db, repo: str):
"""提取Jira issues数据"""
collection = db[repo]
cursor = collection.find({}) # 查询所有document
issues = []
issue_ct, comment_ct = 0, 0 # issue和comment总数
for document in cursor:
try:
issue_key = document["key"]
try:
issuetype = document["fields"]["issuetype"]["name"]
except Exception:
issuetype = "None"
try:
status = document["fields"]["status"]["name"] # 状态
except Exception:
status = "None"
try:
priority = document["fields"]["priority"]["name"] # 优先级
except Exception:
priority = "None"
project = document["fields"]["project"]["name"]
try:
resolution = document["fields"]["resolution"]["name"] # 解决与否
except Exception:
resolution = "Open"
try:
component_arr = document["fields"]["components"]
components = []
for item in component_arr:
components.append(item["name"])
except Exception:
components = []
try:
created = document["fields"]["created"]
except Exception:
created = "None"
try:
updated = document["fields"]["updated"]
except Exception:
updated = "None"
try:
summary = document["fields"]["summary"]
except Exception:
summary = " "
try:
description = document["fields"]["description"]
except Exception:
description = " "
try:
comment_arr = document["fields"]["comments"]
comments = []
for item in comment_arr:
comments.append(item["body"])
comment_ct += 1
except:
comments = []
issue = {
"issue_key": issue_key,
"type": issuetype,
"status": status,
"priority": priority,
"resolution": resolution,
"project": project,
"created": created,
"updated": updated,
"title": summary,
"description": description,
"comments": comments,
"components": components,
}
issues.append(issue)
issue_ct += 1
except Exception:
pass
file_name = ISSUE_DIR / (repo + ".json")
with open(file_name, "w", errors="surrogatepass", encoding="utf-8") as f:
json.dump(issues, f, ensure_ascii=False, indent=2)
logging.info(
f"Extracted {issue_ct} issues, {comment_ct} comments from {repo} repo."
)
def extract_links(db, repo: str):
"""提取Jira issue links数据"""
collection = db[repo]
cursor = collection.find({}) # 查询所有document
links = []
for document in cursor:
try:
issue_key = document["key"]
issuelinks = document["fields"]["issuelinks"]
# 保存通用类型links
for issue_link in issuelinks:
type = issue_link["type"]["name"]
try:
in_issue_key = issue_key
out_issue_key = issue_link["outwardIssue"]["key"]
except Exception:
out_issue_key = issue_key
in_issue_key = issue_link["inwardIssue"]["key"]
link_key = in_issue_key + "_" + out_issue_key
link = {
"link_key": link_key,
"type": type,
"in_issue_key": in_issue_key,
"out_issue_key": out_issue_key,
}
links.append(link)
# 单独处理Subtask类型links
subtasks = document["fields"]["subtasks"]
for subtask in subtasks:
type = "Subtask"
in_issue_key = issue_key
out_issue_key = subtask["key"]
link_key = in_issue_key + "_" + out_issue_key
link = {
"link_key": link_key,
"type": type,
"in_issue_key": in_issue_key,
"out_issue_key": out_issue_key,
}
links.append(link)
# 单独处理Epic类型links
try:
epic = document["fields"][EPIC_FIELD[repo]]
type = "Epic-Relation"
in_issue_key = issue_key
out_issue_key = epic
link_key = in_issue_key + "_" + out_issue_key
link = {
"link_key": link_key,
"type": type,
"in_issue_key": in_issue_key,
"out_issue_key": out_issue_key,
}
links.append(link)
except Exception:
pass
# 单独处理RedHat Repo中的特殊类型links
if repo == "RedHat":
try:
parent = document["fields"]["customfield_12313140"]
type = "Parent-Relation"
in_issue_key = issue_key
out_issue_key = parent
link_key = in_issue_key + "_" + out_issue_key
link = {
"link_key": link_key,
"type": type,
"in_issue_key": in_issue_key,
"out_issue_key": out_issue_key,
}
links.append(link)
except Exception:
pass
try:
feature = document["fields"]["customfield_12318341"]
type = "Feature-Relation"
in_issue_key = issue_key
out_issue_key = feature
link_key = in_issue_key + "_" + out_issue_key
link = {
"link_key": link_key,
"type": type,
"in_issue_key": in_issue_key,
"out_issue_key": out_issue_key,
}
links.append(link)
except Exception:
pass
except Exception:
pass
file_name = LINK_DIR / (repo + ".json")
with open(file_name, "w", errors="surrogatepass", encoding="utf-8") as f:
json.dump(links, f, ensure_ascii=False, indent=2)
logging.info(f"Extracted links from {repo} repo. done!")
if __name__ == "__main__":
# 解析命令行参数
parser = argparse.ArgumentParser(description="Extract Jira issues data")
parser.add_argument("--host", default="localhost")
parser.add_argument("--port", type=int, default=27017)
parser.add_argument("--username", default=None)
parser.add_argument("--password", default=None)
args = parser.parse_args()
logging_init(log_filename="extract_jira_data", log_dir="../../log/preprocess")
start_time = time.perf_counter()
# 创建连接访问MongoDB
with MongoClient(
host=args.host,
port=args.port,
username=args.username,
password=args.password,
serverSelectionTimeoutMS=5000,
) as client:
db = client[DB]
for repo in REPOS:
extract_issues(db, repo)
extract_links(db, repo)
logging.info("=" * 20)
end_time = time.perf_counter()
logging.info(f"Time cost: {end_time - start_time:.3f}s")

View File

@ -1 +0,0 @@
from .log_helper import logging_init

View File

@ -1,44 +0,0 @@
import sys
import logging
from pathlib import Path
from datetime import datetime
def logging_init(
log_filename="monitor", log_level=logging.INFO, log_dir="./log/", only_file=False
):
"""
初始化日志系统
:param log_filename: 日志文件名
:param log_level: 日志等级
:param log_dir: 日志目录
:parma only_file: 是否只保存到日志文件中
"""
# 指定日志文件路径
log_dir = Path(log_dir)
log_dir.mkdir(parents=True, exist_ok=True)
log_filepath = log_dir / (log_filename + "_" + str(datetime.now())[:10] + ".txt")
# 指定日志格式
format = "[%(asctime)s] - %(levelname)s: %(message)s"
# 只保存到日志文件中
if only_file:
logging.basicConfig(
filename=log_filepath,
level=log_level,
format=format,
datefmt="%Y-%m-%d %H:%M:%S",
)
# 保存到日志文件并输出到终端
else:
logging.basicConfig(
level=log_level,
format=format,
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[
logging.FileHandler(log_filepath),
logging.StreamHandler(sys.stdout),
],
)