删除无用文件
This commit is contained in:
parent
6d8ab9c802
commit
3794e4274e
|
@ -1,317 +0,0 @@
|
|||
import sys
|
||||
|
||||
sys.path.append("..") # 添加util所在目录
|
||||
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from pymongo import MongoClient
|
||||
from util import logging_init
|
||||
|
||||
|
||||
DB = "JiraRepos"
|
||||
|
||||
REPOS = [
|
||||
"Apache",
|
||||
"Hyperledger",
|
||||
"IntelDAOS",
|
||||
"JFrog",
|
||||
"Jira",
|
||||
"JiraEcosystem",
|
||||
"MariaDB",
|
||||
"Mindville",
|
||||
"Mojang",
|
||||
"MongoDB",
|
||||
"Qt",
|
||||
"RedHat",
|
||||
"Sakai",
|
||||
"SecondLife",
|
||||
"Sonatype",
|
||||
"Spring",
|
||||
]
|
||||
|
||||
EPIC_FIELD = {
|
||||
"Apache": "customfield_12311120",
|
||||
"Hyperledger": "customfield_10006",
|
||||
"IntelDAOS": "customfield_10092",
|
||||
"JFrog": "customfield_10806",
|
||||
"Jira": "customfield_12931",
|
||||
"JiraEcosystem": "customfield_12180",
|
||||
"MariaDB": "customfield_10600",
|
||||
"MindVille": "customfield_10000",
|
||||
"MongoDB": "customfield_10857",
|
||||
"Qt": "customfield_10400",
|
||||
"Redhat": "customfield_12311140",
|
||||
"Sakai": "customfield_10772",
|
||||
"SecondLife": "customfield_10871",
|
||||
"Sonatype": "customfield_11500",
|
||||
"Spring": "customfield_10680",
|
||||
}
|
||||
|
||||
ISSUE_DIR = Path("../../data/raw/issues")
|
||||
ISSUE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
LINK_DIR = Path("../../data/raw/links")
|
||||
LINK_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def extract_issues(db, repo: str):
|
||||
"""提取Jira issues数据"""
|
||||
|
||||
collection = db[repo]
|
||||
cursor = collection.find({}) # 查询所有document
|
||||
|
||||
issues = []
|
||||
issue_ct, comment_ct = 0, 0 # issue和comment总数
|
||||
|
||||
for document in cursor:
|
||||
try:
|
||||
issue_key = document["key"]
|
||||
|
||||
try:
|
||||
issuetype = document["fields"]["issuetype"]["name"]
|
||||
except Exception:
|
||||
issuetype = "None"
|
||||
|
||||
try:
|
||||
status = document["fields"]["status"]["name"] # 状态
|
||||
except Exception:
|
||||
status = "None"
|
||||
|
||||
try:
|
||||
priority = document["fields"]["priority"]["name"] # 优先级
|
||||
except Exception:
|
||||
priority = "None"
|
||||
|
||||
project = document["fields"]["project"]["name"]
|
||||
|
||||
try:
|
||||
resolution = document["fields"]["resolution"]["name"] # 解决与否
|
||||
except Exception:
|
||||
resolution = "Open"
|
||||
|
||||
try:
|
||||
component_arr = document["fields"]["components"]
|
||||
components = []
|
||||
for item in component_arr:
|
||||
components.append(item["name"])
|
||||
except Exception:
|
||||
components = []
|
||||
|
||||
try:
|
||||
created = document["fields"]["created"]
|
||||
except Exception:
|
||||
created = "None"
|
||||
|
||||
try:
|
||||
updated = document["fields"]["updated"]
|
||||
except Exception:
|
||||
updated = "None"
|
||||
|
||||
try:
|
||||
summary = document["fields"]["summary"]
|
||||
except Exception:
|
||||
summary = " "
|
||||
|
||||
try:
|
||||
description = document["fields"]["description"]
|
||||
except Exception:
|
||||
description = " "
|
||||
|
||||
try:
|
||||
comment_arr = document["fields"]["comments"]
|
||||
comments = []
|
||||
for item in comment_arr:
|
||||
comments.append(item["body"])
|
||||
comment_ct += 1
|
||||
except:
|
||||
comments = []
|
||||
|
||||
issue = {
|
||||
"issue_key": issue_key,
|
||||
"type": issuetype,
|
||||
"status": status,
|
||||
"priority": priority,
|
||||
"resolution": resolution,
|
||||
"project": project,
|
||||
"created": created,
|
||||
"updated": updated,
|
||||
"title": summary,
|
||||
"description": description,
|
||||
"comments": comments,
|
||||
"components": components,
|
||||
}
|
||||
|
||||
issues.append(issue)
|
||||
issue_ct += 1
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
file_name = ISSUE_DIR / (repo + ".json")
|
||||
with open(file_name, "w", errors="surrogatepass", encoding="utf-8") as f:
|
||||
json.dump(issues, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logging.info(
|
||||
f"Extracted {issue_ct} issues, {comment_ct} comments from {repo} repo."
|
||||
)
|
||||
|
||||
|
||||
def extract_links(db, repo: str):
|
||||
"""提取Jira issue links数据"""
|
||||
|
||||
collection = db[repo]
|
||||
cursor = collection.find({}) # 查询所有document
|
||||
|
||||
links = []
|
||||
|
||||
for document in cursor:
|
||||
try:
|
||||
issue_key = document["key"]
|
||||
|
||||
issuelinks = document["fields"]["issuelinks"]
|
||||
|
||||
# 保存通用类型links
|
||||
for issue_link in issuelinks:
|
||||
type = issue_link["type"]["name"]
|
||||
|
||||
try:
|
||||
in_issue_key = issue_key
|
||||
out_issue_key = issue_link["outwardIssue"]["key"]
|
||||
except Exception:
|
||||
out_issue_key = issue_key
|
||||
in_issue_key = issue_link["inwardIssue"]["key"]
|
||||
|
||||
link_key = in_issue_key + "_" + out_issue_key
|
||||
|
||||
link = {
|
||||
"link_key": link_key,
|
||||
"type": type,
|
||||
"in_issue_key": in_issue_key,
|
||||
"out_issue_key": out_issue_key,
|
||||
}
|
||||
|
||||
links.append(link)
|
||||
|
||||
# 单独处理Subtask类型links
|
||||
subtasks = document["fields"]["subtasks"]
|
||||
for subtask in subtasks:
|
||||
type = "Subtask"
|
||||
in_issue_key = issue_key
|
||||
out_issue_key = subtask["key"]
|
||||
|
||||
link_key = in_issue_key + "_" + out_issue_key
|
||||
|
||||
link = {
|
||||
"link_key": link_key,
|
||||
"type": type,
|
||||
"in_issue_key": in_issue_key,
|
||||
"out_issue_key": out_issue_key,
|
||||
}
|
||||
|
||||
links.append(link)
|
||||
|
||||
# 单独处理Epic类型links
|
||||
try:
|
||||
epic = document["fields"][EPIC_FIELD[repo]]
|
||||
type = "Epic-Relation"
|
||||
in_issue_key = issue_key
|
||||
out_issue_key = epic
|
||||
|
||||
link_key = in_issue_key + "_" + out_issue_key
|
||||
|
||||
link = {
|
||||
"link_key": link_key,
|
||||
"type": type,
|
||||
"in_issue_key": in_issue_key,
|
||||
"out_issue_key": out_issue_key,
|
||||
}
|
||||
|
||||
links.append(link)
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 单独处理RedHat Repo中的特殊类型links
|
||||
if repo == "RedHat":
|
||||
try:
|
||||
parent = document["fields"]["customfield_12313140"]
|
||||
type = "Parent-Relation"
|
||||
in_issue_key = issue_key
|
||||
out_issue_key = parent
|
||||
|
||||
link_key = in_issue_key + "_" + out_issue_key
|
||||
|
||||
link = {
|
||||
"link_key": link_key,
|
||||
"type": type,
|
||||
"in_issue_key": in_issue_key,
|
||||
"out_issue_key": out_issue_key,
|
||||
}
|
||||
|
||||
links.append(link)
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
feature = document["fields"]["customfield_12318341"]
|
||||
type = "Feature-Relation"
|
||||
in_issue_key = issue_key
|
||||
out_issue_key = feature
|
||||
|
||||
link_key = in_issue_key + "_" + out_issue_key
|
||||
|
||||
link = {
|
||||
"link_key": link_key,
|
||||
"type": type,
|
||||
"in_issue_key": in_issue_key,
|
||||
"out_issue_key": out_issue_key,
|
||||
}
|
||||
|
||||
links.append(link)
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
file_name = LINK_DIR / (repo + ".json")
|
||||
with open(file_name, "w", errors="surrogatepass", encoding="utf-8") as f:
|
||||
json.dump(links, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logging.info(f"Extracted links from {repo} repo. done!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 解析命令行参数
|
||||
parser = argparse.ArgumentParser(description="Extract Jira issues data")
|
||||
parser.add_argument("--host", default="localhost")
|
||||
parser.add_argument("--port", type=int, default=27017)
|
||||
parser.add_argument("--username", default=None)
|
||||
parser.add_argument("--password", default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
logging_init(log_filename="extract_jira_data", log_dir="../../log/preprocess")
|
||||
|
||||
start_time = time.perf_counter()
|
||||
# 创建连接,访问MongoDB
|
||||
with MongoClient(
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
username=args.username,
|
||||
password=args.password,
|
||||
serverSelectionTimeoutMS=5000,
|
||||
) as client:
|
||||
db = client[DB]
|
||||
|
||||
for repo in REPOS:
|
||||
extract_issues(db, repo)
|
||||
extract_links(db, repo)
|
||||
logging.info("=" * 20)
|
||||
|
||||
end_time = time.perf_counter()
|
||||
|
||||
logging.info(f"Time cost: {end_time - start_time:.3f}s")
|
|
@ -1 +0,0 @@
|
|||
from .log_helper import logging_init
|
|
@ -1,44 +0,0 @@
|
|||
import sys
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def logging_init(
|
||||
log_filename="monitor", log_level=logging.INFO, log_dir="./log/", only_file=False
|
||||
):
|
||||
"""
|
||||
初始化日志系统
|
||||
:param log_filename: 日志文件名
|
||||
:param log_level: 日志等级
|
||||
:param log_dir: 日志目录
|
||||
:parma only_file: 是否只保存到日志文件中
|
||||
"""
|
||||
|
||||
# 指定日志文件路径
|
||||
log_dir = Path(log_dir)
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_filepath = log_dir / (log_filename + "_" + str(datetime.now())[:10] + ".txt")
|
||||
|
||||
# 指定日志格式
|
||||
format = "[%(asctime)s] - %(levelname)s: %(message)s"
|
||||
|
||||
# 只保存到日志文件中
|
||||
if only_file:
|
||||
logging.basicConfig(
|
||||
filename=log_filepath,
|
||||
level=log_level,
|
||||
format=format,
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
# 保存到日志文件并输出到终端
|
||||
else:
|
||||
logging.basicConfig(
|
||||
level=log_level,
|
||||
format=format,
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
handlers=[
|
||||
logging.FileHandler(log_filepath),
|
||||
logging.StreamHandler(sys.stdout),
|
||||
],
|
||||
)
|
Loading…
Reference in New Issue