删除无用文件
This commit is contained in:
parent
6d8ab9c802
commit
3794e4274e
|
@ -1,317 +0,0 @@
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.append("..") # 添加util所在目录
|
|
||||||
|
|
||||||
import time
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import argparse
|
|
||||||
from pathlib import Path
|
|
||||||
from pymongo import MongoClient
|
|
||||||
from util import logging_init
|
|
||||||
|
|
||||||
|
|
||||||
DB = "JiraRepos"
|
|
||||||
|
|
||||||
REPOS = [
|
|
||||||
"Apache",
|
|
||||||
"Hyperledger",
|
|
||||||
"IntelDAOS",
|
|
||||||
"JFrog",
|
|
||||||
"Jira",
|
|
||||||
"JiraEcosystem",
|
|
||||||
"MariaDB",
|
|
||||||
"Mindville",
|
|
||||||
"Mojang",
|
|
||||||
"MongoDB",
|
|
||||||
"Qt",
|
|
||||||
"RedHat",
|
|
||||||
"Sakai",
|
|
||||||
"SecondLife",
|
|
||||||
"Sonatype",
|
|
||||||
"Spring",
|
|
||||||
]
|
|
||||||
|
|
||||||
EPIC_FIELD = {
|
|
||||||
"Apache": "customfield_12311120",
|
|
||||||
"Hyperledger": "customfield_10006",
|
|
||||||
"IntelDAOS": "customfield_10092",
|
|
||||||
"JFrog": "customfield_10806",
|
|
||||||
"Jira": "customfield_12931",
|
|
||||||
"JiraEcosystem": "customfield_12180",
|
|
||||||
"MariaDB": "customfield_10600",
|
|
||||||
"MindVille": "customfield_10000",
|
|
||||||
"MongoDB": "customfield_10857",
|
|
||||||
"Qt": "customfield_10400",
|
|
||||||
"Redhat": "customfield_12311140",
|
|
||||||
"Sakai": "customfield_10772",
|
|
||||||
"SecondLife": "customfield_10871",
|
|
||||||
"Sonatype": "customfield_11500",
|
|
||||||
"Spring": "customfield_10680",
|
|
||||||
}
|
|
||||||
|
|
||||||
ISSUE_DIR = Path("../../data/raw/issues")
|
|
||||||
ISSUE_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
LINK_DIR = Path("../../data/raw/links")
|
|
||||||
LINK_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_issues(db, repo: str):
|
|
||||||
"""提取Jira issues数据"""
|
|
||||||
|
|
||||||
collection = db[repo]
|
|
||||||
cursor = collection.find({}) # 查询所有document
|
|
||||||
|
|
||||||
issues = []
|
|
||||||
issue_ct, comment_ct = 0, 0 # issue和comment总数
|
|
||||||
|
|
||||||
for document in cursor:
|
|
||||||
try:
|
|
||||||
issue_key = document["key"]
|
|
||||||
|
|
||||||
try:
|
|
||||||
issuetype = document["fields"]["issuetype"]["name"]
|
|
||||||
except Exception:
|
|
||||||
issuetype = "None"
|
|
||||||
|
|
||||||
try:
|
|
||||||
status = document["fields"]["status"]["name"] # 状态
|
|
||||||
except Exception:
|
|
||||||
status = "None"
|
|
||||||
|
|
||||||
try:
|
|
||||||
priority = document["fields"]["priority"]["name"] # 优先级
|
|
||||||
except Exception:
|
|
||||||
priority = "None"
|
|
||||||
|
|
||||||
project = document["fields"]["project"]["name"]
|
|
||||||
|
|
||||||
try:
|
|
||||||
resolution = document["fields"]["resolution"]["name"] # 解决与否
|
|
||||||
except Exception:
|
|
||||||
resolution = "Open"
|
|
||||||
|
|
||||||
try:
|
|
||||||
component_arr = document["fields"]["components"]
|
|
||||||
components = []
|
|
||||||
for item in component_arr:
|
|
||||||
components.append(item["name"])
|
|
||||||
except Exception:
|
|
||||||
components = []
|
|
||||||
|
|
||||||
try:
|
|
||||||
created = document["fields"]["created"]
|
|
||||||
except Exception:
|
|
||||||
created = "None"
|
|
||||||
|
|
||||||
try:
|
|
||||||
updated = document["fields"]["updated"]
|
|
||||||
except Exception:
|
|
||||||
updated = "None"
|
|
||||||
|
|
||||||
try:
|
|
||||||
summary = document["fields"]["summary"]
|
|
||||||
except Exception:
|
|
||||||
summary = " "
|
|
||||||
|
|
||||||
try:
|
|
||||||
description = document["fields"]["description"]
|
|
||||||
except Exception:
|
|
||||||
description = " "
|
|
||||||
|
|
||||||
try:
|
|
||||||
comment_arr = document["fields"]["comments"]
|
|
||||||
comments = []
|
|
||||||
for item in comment_arr:
|
|
||||||
comments.append(item["body"])
|
|
||||||
comment_ct += 1
|
|
||||||
except:
|
|
||||||
comments = []
|
|
||||||
|
|
||||||
issue = {
|
|
||||||
"issue_key": issue_key,
|
|
||||||
"type": issuetype,
|
|
||||||
"status": status,
|
|
||||||
"priority": priority,
|
|
||||||
"resolution": resolution,
|
|
||||||
"project": project,
|
|
||||||
"created": created,
|
|
||||||
"updated": updated,
|
|
||||||
"title": summary,
|
|
||||||
"description": description,
|
|
||||||
"comments": comments,
|
|
||||||
"components": components,
|
|
||||||
}
|
|
||||||
|
|
||||||
issues.append(issue)
|
|
||||||
issue_ct += 1
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
file_name = ISSUE_DIR / (repo + ".json")
|
|
||||||
with open(file_name, "w", errors="surrogatepass", encoding="utf-8") as f:
|
|
||||||
json.dump(issues, f, ensure_ascii=False, indent=2)
|
|
||||||
|
|
||||||
logging.info(
|
|
||||||
f"Extracted {issue_ct} issues, {comment_ct} comments from {repo} repo."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_links(db, repo: str):
|
|
||||||
"""提取Jira issue links数据"""
|
|
||||||
|
|
||||||
collection = db[repo]
|
|
||||||
cursor = collection.find({}) # 查询所有document
|
|
||||||
|
|
||||||
links = []
|
|
||||||
|
|
||||||
for document in cursor:
|
|
||||||
try:
|
|
||||||
issue_key = document["key"]
|
|
||||||
|
|
||||||
issuelinks = document["fields"]["issuelinks"]
|
|
||||||
|
|
||||||
# 保存通用类型links
|
|
||||||
for issue_link in issuelinks:
|
|
||||||
type = issue_link["type"]["name"]
|
|
||||||
|
|
||||||
try:
|
|
||||||
in_issue_key = issue_key
|
|
||||||
out_issue_key = issue_link["outwardIssue"]["key"]
|
|
||||||
except Exception:
|
|
||||||
out_issue_key = issue_key
|
|
||||||
in_issue_key = issue_link["inwardIssue"]["key"]
|
|
||||||
|
|
||||||
link_key = in_issue_key + "_" + out_issue_key
|
|
||||||
|
|
||||||
link = {
|
|
||||||
"link_key": link_key,
|
|
||||||
"type": type,
|
|
||||||
"in_issue_key": in_issue_key,
|
|
||||||
"out_issue_key": out_issue_key,
|
|
||||||
}
|
|
||||||
|
|
||||||
links.append(link)
|
|
||||||
|
|
||||||
# 单独处理Subtask类型links
|
|
||||||
subtasks = document["fields"]["subtasks"]
|
|
||||||
for subtask in subtasks:
|
|
||||||
type = "Subtask"
|
|
||||||
in_issue_key = issue_key
|
|
||||||
out_issue_key = subtask["key"]
|
|
||||||
|
|
||||||
link_key = in_issue_key + "_" + out_issue_key
|
|
||||||
|
|
||||||
link = {
|
|
||||||
"link_key": link_key,
|
|
||||||
"type": type,
|
|
||||||
"in_issue_key": in_issue_key,
|
|
||||||
"out_issue_key": out_issue_key,
|
|
||||||
}
|
|
||||||
|
|
||||||
links.append(link)
|
|
||||||
|
|
||||||
# 单独处理Epic类型links
|
|
||||||
try:
|
|
||||||
epic = document["fields"][EPIC_FIELD[repo]]
|
|
||||||
type = "Epic-Relation"
|
|
||||||
in_issue_key = issue_key
|
|
||||||
out_issue_key = epic
|
|
||||||
|
|
||||||
link_key = in_issue_key + "_" + out_issue_key
|
|
||||||
|
|
||||||
link = {
|
|
||||||
"link_key": link_key,
|
|
||||||
"type": type,
|
|
||||||
"in_issue_key": in_issue_key,
|
|
||||||
"out_issue_key": out_issue_key,
|
|
||||||
}
|
|
||||||
|
|
||||||
links.append(link)
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# 单独处理RedHat Repo中的特殊类型links
|
|
||||||
if repo == "RedHat":
|
|
||||||
try:
|
|
||||||
parent = document["fields"]["customfield_12313140"]
|
|
||||||
type = "Parent-Relation"
|
|
||||||
in_issue_key = issue_key
|
|
||||||
out_issue_key = parent
|
|
||||||
|
|
||||||
link_key = in_issue_key + "_" + out_issue_key
|
|
||||||
|
|
||||||
link = {
|
|
||||||
"link_key": link_key,
|
|
||||||
"type": type,
|
|
||||||
"in_issue_key": in_issue_key,
|
|
||||||
"out_issue_key": out_issue_key,
|
|
||||||
}
|
|
||||||
|
|
||||||
links.append(link)
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
feature = document["fields"]["customfield_12318341"]
|
|
||||||
type = "Feature-Relation"
|
|
||||||
in_issue_key = issue_key
|
|
||||||
out_issue_key = feature
|
|
||||||
|
|
||||||
link_key = in_issue_key + "_" + out_issue_key
|
|
||||||
|
|
||||||
link = {
|
|
||||||
"link_key": link_key,
|
|
||||||
"type": type,
|
|
||||||
"in_issue_key": in_issue_key,
|
|
||||||
"out_issue_key": out_issue_key,
|
|
||||||
}
|
|
||||||
|
|
||||||
links.append(link)
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
file_name = LINK_DIR / (repo + ".json")
|
|
||||||
with open(file_name, "w", errors="surrogatepass", encoding="utf-8") as f:
|
|
||||||
json.dump(links, f, ensure_ascii=False, indent=2)
|
|
||||||
|
|
||||||
logging.info(f"Extracted links from {repo} repo. done!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# 解析命令行参数
|
|
||||||
parser = argparse.ArgumentParser(description="Extract Jira issues data")
|
|
||||||
parser.add_argument("--host", default="localhost")
|
|
||||||
parser.add_argument("--port", type=int, default=27017)
|
|
||||||
parser.add_argument("--username", default=None)
|
|
||||||
parser.add_argument("--password", default=None)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
logging_init(log_filename="extract_jira_data", log_dir="../../log/preprocess")
|
|
||||||
|
|
||||||
start_time = time.perf_counter()
|
|
||||||
# 创建连接,访问MongoDB
|
|
||||||
with MongoClient(
|
|
||||||
host=args.host,
|
|
||||||
port=args.port,
|
|
||||||
username=args.username,
|
|
||||||
password=args.password,
|
|
||||||
serverSelectionTimeoutMS=5000,
|
|
||||||
) as client:
|
|
||||||
db = client[DB]
|
|
||||||
|
|
||||||
for repo in REPOS:
|
|
||||||
extract_issues(db, repo)
|
|
||||||
extract_links(db, repo)
|
|
||||||
logging.info("=" * 20)
|
|
||||||
|
|
||||||
end_time = time.perf_counter()
|
|
||||||
|
|
||||||
logging.info(f"Time cost: {end_time - start_time:.3f}s")
|
|
|
@ -1 +0,0 @@
|
||||||
from .log_helper import logging_init
|
|
|
@ -1,44 +0,0 @@
|
||||||
import sys
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
|
|
||||||
def logging_init(
|
|
||||||
log_filename="monitor", log_level=logging.INFO, log_dir="./log/", only_file=False
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
初始化日志系统
|
|
||||||
:param log_filename: 日志文件名
|
|
||||||
:param log_level: 日志等级
|
|
||||||
:param log_dir: 日志目录
|
|
||||||
:parma only_file: 是否只保存到日志文件中
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 指定日志文件路径
|
|
||||||
log_dir = Path(log_dir)
|
|
||||||
log_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
log_filepath = log_dir / (log_filename + "_" + str(datetime.now())[:10] + ".txt")
|
|
||||||
|
|
||||||
# 指定日志格式
|
|
||||||
format = "[%(asctime)s] - %(levelname)s: %(message)s"
|
|
||||||
|
|
||||||
# 只保存到日志文件中
|
|
||||||
if only_file:
|
|
||||||
logging.basicConfig(
|
|
||||||
filename=log_filepath,
|
|
||||||
level=log_level,
|
|
||||||
format=format,
|
|
||||||
datefmt="%Y-%m-%d %H:%M:%S",
|
|
||||||
)
|
|
||||||
# 保存到日志文件并输出到终端
|
|
||||||
else:
|
|
||||||
logging.basicConfig(
|
|
||||||
level=log_level,
|
|
||||||
format=format,
|
|
||||||
datefmt="%Y-%m-%d %H:%M:%S",
|
|
||||||
handlers=[
|
|
||||||
logging.FileHandler(log_filepath),
|
|
||||||
logging.StreamHandler(sys.stdout),
|
|
||||||
],
|
|
||||||
)
|
|
Loading…
Reference in New Issue