code update
This commit is contained in:
parent
a1c5fbc83b
commit
8b473e65a1
|
@ -0,0 +1,7 @@
|
|||
* Serving Flask app 'CloneDetectionAPI'
|
||||
* Debug mode: off
|
||||
[31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:5000
|
||||
* Running on http://10.29.128.27:5000
|
||||
[33mPress CTRL+C to quit[0m
|
|
@ -0,0 +1,304 @@
|
|||
# aim: The ColdStartPerception service for OSCH
|
||||
# author: zhangxunhui
|
||||
# date: 2022-04-23
|
||||
|
||||
import os
|
||||
import queue
|
||||
import sys
|
||||
import threading
|
||||
from typing import List
|
||||
import hashlib
|
||||
|
||||
from ChangedMethodExtractor import ChangedMethodExtractor
|
||||
from dulwich.objects import Blob, Commit, Tag, Tree
|
||||
from dulwich.repo import Repo
|
||||
from dulwich.walk import WalkEntry
|
||||
from ESUtils import ESUtils
|
||||
from models.RepoInfo import RepoInfo
|
||||
from MySQLUtils import MySQLUtils
|
||||
|
||||
from services.utils import read_config
|
||||
|
||||
|
||||
class HandleRepository(object):
|
||||
def __init__(self, repoInfo: RepoInfo, config: dict, es_utils: ESUtils, type: str):
|
||||
self.config = config
|
||||
self.repoInfo = repoInfo
|
||||
self.repo = Repo(self.repoInfo.repo_path)
|
||||
self.type = type
|
||||
if self.type == "gitea":
|
||||
self.repoInfo.ownername = self.repo.path.split("/")[-2]
|
||||
self.repoInfo.reponame = self.repo.path.split("/")[-1].split(".")[0]
|
||||
self.mysql_utils = MySQLUtils(
|
||||
host=self.config["mysql"]["host"],
|
||||
port=self.config["mysql"]["port"],
|
||||
username=self.config["mysql"]["username"],
|
||||
password=self.config["mysql"]["password"],
|
||||
database=self.config["mysql"]["database"],
|
||||
autocommit=False,
|
||||
dictcursor=True,
|
||||
)
|
||||
repo_id = self.mysql_utils.get_repo_id(
|
||||
self.repoInfo.ownername, self.repoInfo.reponame
|
||||
)
|
||||
if repo_id is None:
|
||||
raise Exception(
|
||||
"HandleRepository Error: cannot find the id of repository: {repository_path}".format(
|
||||
repository_path=self.repository_path
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.repoInfo.repo_id = repo_id["id"]
|
||||
elif self.type == "local":
|
||||
self.repoInfo.repo_id = self.hash_path_to_id(self.repoInfo.repo_path)
|
||||
|
||||
self.es_utils = es_utils
|
||||
self.handled_commits = self.es_utils.get_handled_commits(
|
||||
repo_id=self.repoInfo.repo_id,
|
||||
index_name=self.config["elasticsearch"]["index_handled_commits"],
|
||||
)
|
||||
|
||||
# local repo id
|
||||
def hash_path_to_id(self, path):
|
||||
hash_hex = hashlib.sha256(path.encode()).hexdigest()
|
||||
return hash_hex
|
||||
|
||||
def run(self):
|
||||
"""Get all the commits."""
|
||||
|
||||
print(
|
||||
"[Info]: Handling repository {repo_path}".format(
|
||||
repo_path=self.repo.path
|
||||
)
|
||||
)
|
||||
|
||||
commits: List[Commit] = []
|
||||
|
||||
object_store = self.repo.object_store
|
||||
object_shas = list(iter(object_store))
|
||||
for object_sha in object_shas:
|
||||
obj = object_store[object_sha]
|
||||
if (
|
||||
isinstance(obj, Tag)
|
||||
or isinstance(obj, Blob)
|
||||
or isinstance(obj, Tree)
|
||||
):
|
||||
pass
|
||||
elif isinstance(obj, Commit):
|
||||
commits.append(obj)
|
||||
else:
|
||||
raise Exception("HandleRepository.run Error: unknown type!")
|
||||
|
||||
if self.type == 'gitea':
|
||||
"""Whether this repository is forked or original"""
|
||||
info = self.mysql_utils.get_repo_info(repo_id=self.repoInfo.repo_id)
|
||||
is_fork = False
|
||||
if info is not None and info["is_fork"] == 1:
|
||||
is_fork = True
|
||||
if is_fork:
|
||||
# eliminate the forked commits
|
||||
fork_id = info["fork_id"]
|
||||
origin_info = self.mysql_utils.get_repo_info(repo_id=fork_id)
|
||||
if origin_info is not None:
|
||||
origin_ownername = origin_info["owner_name"]
|
||||
origin_reponame = origin_info["name"]
|
||||
origin_repo_path = os.path.join(
|
||||
self.config["gitea"]["repositories_path"],
|
||||
origin_ownername,
|
||||
origin_reponame + ".git",
|
||||
)
|
||||
origin_repo = Repo(origin_repo_path)
|
||||
origin_commits: List[Commit] = []
|
||||
origin_object_store = origin_repo.object_store
|
||||
origin_object_shas = list(iter(origin_object_store))
|
||||
for object_sha in origin_object_shas:
|
||||
obj = origin_object_store[object_sha]
|
||||
if isinstance(obj, Commit):
|
||||
origin_commits.append(obj)
|
||||
commits = list(set(commits) - set(origin_commits))
|
||||
else:
|
||||
pass # origin repo not found in gitea mysql database
|
||||
else:
|
||||
pass # not a fork repo
|
||||
else:
|
||||
pass # local repo dont forked
|
||||
|
||||
"""Handle each commit."""
|
||||
for commit in commits:
|
||||
if commit.id.decode() not in self.handled_commits:
|
||||
HandleCommit(
|
||||
repo=self.repo,
|
||||
repoInfo=self.repoInfo,
|
||||
commit=commit,
|
||||
config=self.config,
|
||||
es_utils=self.es_utils,
|
||||
).run()
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
class HandleRepoThread(threading.Thread):
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
q: queue.Queue,
|
||||
config: dict,
|
||||
type: str,
|
||||
):
|
||||
threading.Thread.__init__(self)
|
||||
self.name = name
|
||||
self.q = q
|
||||
self.config = config
|
||||
self.es_utils = ESUtils(config=self.config)
|
||||
self.type = type
|
||||
|
||||
def run(self):
|
||||
print("[Info]: Start thread: " + self.name)
|
||||
while not self.q.empty():
|
||||
repoInfo = self.q.get()
|
||||
# handle one repository(local or gitea)
|
||||
handler = HandleRepository(
|
||||
repoInfo=repoInfo,
|
||||
config=self.config,
|
||||
es_utils=self.es_utils,
|
||||
type = self.type
|
||||
)
|
||||
handler.run()
|
||||
self.q.task_done()
|
||||
print("[Info]: Exist thread: " + self.name)
|
||||
|
||||
|
||||
class HandleCommit(object):
|
||||
def __init__(
|
||||
self,
|
||||
repo: Repo,
|
||||
repoInfo: RepoInfo,
|
||||
commit: Commit,
|
||||
config: dict,
|
||||
es_utils: ESUtils,
|
||||
):
|
||||
self.repo = repo
|
||||
self.repoInfo = repoInfo
|
||||
self.commit = commit
|
||||
self.config = config
|
||||
self.es_utils = es_utils
|
||||
|
||||
def run(self):
|
||||
commit_sha = self.commit.id.decode()
|
||||
print(
|
||||
"[Info]: Handling commit {commit_sha}".format(
|
||||
commit_sha=commit_sha
|
||||
)
|
||||
)
|
||||
|
||||
"""Generate all the changes for this commit."""
|
||||
walk_entry = WalkEntry(
|
||||
self.repo.get_walker(include=[self.commit.id]), self.commit
|
||||
)
|
||||
t_changes = walk_entry.changes() # get all the TreeChange objects
|
||||
if len(self.commit.parents) > 1:
|
||||
t_changes = [item for t_cs in t_changes for item in t_cs]
|
||||
|
||||
changed_methods = ChangedMethodExtractor(
|
||||
repo=self.repo,
|
||||
repoInfo=self.repoInfo,
|
||||
commit=self.commit,
|
||||
t_changes=t_changes,
|
||||
config=self.config,
|
||||
).parse()
|
||||
|
||||
es_data_bulk = self.es_utils.extract_es_infos(
|
||||
changed_methods=changed_methods
|
||||
)
|
||||
self.es_utils.insert_es_bulk(es_data_bulk)
|
||||
|
||||
"""Finish handling this commit, insert into the handled_commit index in es."""
|
||||
es_data = {"repo_id": self.repoInfo.repo_id, "commit_sha": commit_sha}
|
||||
self.es_utils.insert_es_item(
|
||||
item=es_data,
|
||||
index_name=self.config["elasticsearch"]["index_handled_commits"],
|
||||
)
|
||||
|
||||
|
||||
def handle_repositories(repositories_path: str, type: str, config: dict):
|
||||
"""Handle all the repositories in the directory."""
|
||||
|
||||
es_utils = ESUtils(config=config)
|
||||
es_utils.create_n_gram_index()
|
||||
es_utils.create_handled_commit_index()
|
||||
|
||||
"""Handle repositories by multiple threads."""
|
||||
workQueue = queue.Queue()
|
||||
|
||||
if type == "local":
|
||||
repo_git_paths = [f.path for f in os.scandir(repositories_path) if f.is_dir()]
|
||||
for repo_git_path in repo_git_paths:
|
||||
workQueue.put(RepoInfo(repo_path=repo_git_path))
|
||||
|
||||
elif type == "gitea":
|
||||
# iterate all the ownernames
|
||||
ownername_paths = [
|
||||
f.path for f in os.scandir(repositories_path) if f.is_dir()
|
||||
]
|
||||
for ownername_path in ownername_paths:
|
||||
# iterate all the repositories
|
||||
repo_git_paths = [
|
||||
f.path for f in os.scandir(ownername_path) if f.is_dir()
|
||||
]
|
||||
for repo_git_path in repo_git_paths:
|
||||
# if "test1.git" not in repo_git_path:
|
||||
# continue # only for test
|
||||
|
||||
workQueue.put(RepoInfo(repo_path=repo_git_path))
|
||||
|
||||
else:
|
||||
print("[Error]: illegal type")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
THREADNUM = config["coldstart_service"]["THREADNUM"]
|
||||
threads = []
|
||||
for i in range(THREADNUM):
|
||||
t = HandleRepoThread(
|
||||
name="Thread-" + str(i + 1),
|
||||
q=workQueue,
|
||||
config=config,
|
||||
type=type,
|
||||
)
|
||||
t.start()
|
||||
threads.append(t)
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
|
||||
def main():
|
||||
config_path = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), "config-cpp.yml"
|
||||
)
|
||||
config = read_config(config_path)
|
||||
if config is None:
|
||||
print(
|
||||
"[Error]: configuration file {config_path} not found".format(
|
||||
config_path=config_path
|
||||
)
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# cold start check local data firstly
|
||||
try:
|
||||
local_repositories_path = config["local"]["repositories_path"]
|
||||
type = 'local'
|
||||
handle_repositories(repositories_path=local_repositories_path, type=type, config=config)
|
||||
except KeyError:
|
||||
try:
|
||||
gitea_repositories_path = config["gitea"]["repositories_path"]
|
||||
type = 'gitea'
|
||||
handle_repositories(repositories_path=gitea_repositories_path, type=type, config=config)
|
||||
except KeyError:
|
||||
print("[Error]: local and gitea repositories_path configration not found")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
print("Finish ColdStartPerception service")
|
|
@ -0,0 +1,304 @@
|
|||
# aim: The ColdStartPerception service for OSCH
|
||||
# author: zhangxunhui
|
||||
# date: 2022-04-23
|
||||
|
||||
import os
|
||||
import queue
|
||||
import sys
|
||||
import threading
|
||||
from typing import List
|
||||
import hashlib
|
||||
|
||||
from ChangedMethodExtractor import ChangedMethodExtractor
|
||||
from dulwich.objects import Blob, Commit, Tag, Tree
|
||||
from dulwich.repo import Repo
|
||||
from dulwich.walk import WalkEntry
|
||||
from ESUtils import ESUtils
|
||||
from models.RepoInfo import RepoInfo
|
||||
from MySQLUtils import MySQLUtils
|
||||
|
||||
from services.utils import read_config
|
||||
|
||||
|
||||
class HandleRepository(object):
|
||||
def __init__(self, repoInfo: RepoInfo, config: dict, es_utils: ESUtils, type: str):
|
||||
self.config = config
|
||||
self.repoInfo = repoInfo
|
||||
self.repo = Repo(self.repoInfo.repo_path)
|
||||
self.type = type
|
||||
if self.type == "gitea":
|
||||
self.repoInfo.ownername = self.repo.path.split("/")[-2]
|
||||
self.repoInfo.reponame = self.repo.path.split("/")[-1].split(".")[0]
|
||||
self.mysql_utils = MySQLUtils(
|
||||
host=self.config["mysql"]["host"],
|
||||
port=self.config["mysql"]["port"],
|
||||
username=self.config["mysql"]["username"],
|
||||
password=self.config["mysql"]["password"],
|
||||
database=self.config["mysql"]["database"],
|
||||
autocommit=False,
|
||||
dictcursor=True,
|
||||
)
|
||||
repo_id = self.mysql_utils.get_repo_id(
|
||||
self.repoInfo.ownername, self.repoInfo.reponame
|
||||
)
|
||||
if repo_id is None:
|
||||
raise Exception(
|
||||
"HandleRepository Error: cannot find the id of repository: {repository_path}".format(
|
||||
repository_path=self.repository_path
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.repoInfo.repo_id = repo_id["id"]
|
||||
elif self.type == "local":
|
||||
self.repoInfo.repo_id = self.hash_path_to_id(self.repoInfo.repo_path)
|
||||
|
||||
self.es_utils = es_utils
|
||||
self.handled_commits = self.es_utils.get_handled_commits(
|
||||
repo_id=self.repoInfo.repo_id,
|
||||
index_name=self.config["elasticsearch"]["index_handled_commits"],
|
||||
)
|
||||
|
||||
# local repo id
|
||||
def hash_path_to_id(self, path):
|
||||
hash_hex = hashlib.sha256(path.encode()).hexdigest()
|
||||
return hash_hex
|
||||
|
||||
def run(self):
|
||||
"""Get all the commits."""
|
||||
|
||||
print(
|
||||
"[Info]: Handling repository {repo_path}".format(
|
||||
repo_path=self.repo.path
|
||||
)
|
||||
)
|
||||
|
||||
commits: List[Commit] = []
|
||||
|
||||
object_store = self.repo.object_store
|
||||
object_shas = list(iter(object_store))
|
||||
for object_sha in object_shas:
|
||||
obj = object_store[object_sha]
|
||||
if (
|
||||
isinstance(obj, Tag)
|
||||
or isinstance(obj, Blob)
|
||||
or isinstance(obj, Tree)
|
||||
):
|
||||
pass
|
||||
elif isinstance(obj, Commit):
|
||||
commits.append(obj)
|
||||
else:
|
||||
raise Exception("HandleRepository.run Error: unknown type!")
|
||||
|
||||
if self.type == 'gitea':
|
||||
"""Whether this repository is forked or original"""
|
||||
info = self.mysql_utils.get_repo_info(repo_id=self.repoInfo.repo_id)
|
||||
is_fork = False
|
||||
if info is not None and info["is_fork"] == 1:
|
||||
is_fork = True
|
||||
if is_fork:
|
||||
# eliminate the forked commits
|
||||
fork_id = info["fork_id"]
|
||||
origin_info = self.mysql_utils.get_repo_info(repo_id=fork_id)
|
||||
if origin_info is not None:
|
||||
origin_ownername = origin_info["owner_name"]
|
||||
origin_reponame = origin_info["name"]
|
||||
origin_repo_path = os.path.join(
|
||||
self.config["gitea"]["repositories_path"],
|
||||
origin_ownername,
|
||||
origin_reponame + ".git",
|
||||
)
|
||||
origin_repo = Repo(origin_repo_path)
|
||||
origin_commits: List[Commit] = []
|
||||
origin_object_store = origin_repo.object_store
|
||||
origin_object_shas = list(iter(origin_object_store))
|
||||
for object_sha in origin_object_shas:
|
||||
obj = origin_object_store[object_sha]
|
||||
if isinstance(obj, Commit):
|
||||
origin_commits.append(obj)
|
||||
commits = list(set(commits) - set(origin_commits))
|
||||
else:
|
||||
pass # origin repo not found in gitea mysql database
|
||||
else:
|
||||
pass # not a fork repo
|
||||
else:
|
||||
pass # local repo dont forked
|
||||
|
||||
"""Handle each commit."""
|
||||
for commit in commits:
|
||||
if commit.id.decode() not in self.handled_commits:
|
||||
HandleCommit(
|
||||
repo=self.repo,
|
||||
repoInfo=self.repoInfo,
|
||||
commit=commit,
|
||||
config=self.config,
|
||||
es_utils=self.es_utils,
|
||||
).run()
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
class HandleRepoThread(threading.Thread):
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
q: queue.Queue,
|
||||
config: dict,
|
||||
type: str,
|
||||
):
|
||||
threading.Thread.__init__(self)
|
||||
self.name = name
|
||||
self.q = q
|
||||
self.config = config
|
||||
self.es_utils = ESUtils(config=self.config)
|
||||
self.type = type
|
||||
|
||||
def run(self):
|
||||
print("[Info]: Start thread: " + self.name)
|
||||
while not self.q.empty():
|
||||
repoInfo = self.q.get()
|
||||
# handle one repository(local or gitea)
|
||||
handler = HandleRepository(
|
||||
repoInfo=repoInfo,
|
||||
config=self.config,
|
||||
es_utils=self.es_utils,
|
||||
type = self.type
|
||||
)
|
||||
handler.run()
|
||||
self.q.task_done()
|
||||
print("[Info]: Exist thread: " + self.name)
|
||||
|
||||
|
||||
class HandleCommit(object):
|
||||
def __init__(
|
||||
self,
|
||||
repo: Repo,
|
||||
repoInfo: RepoInfo,
|
||||
commit: Commit,
|
||||
config: dict,
|
||||
es_utils: ESUtils,
|
||||
):
|
||||
self.repo = repo
|
||||
self.repoInfo = repoInfo
|
||||
self.commit = commit
|
||||
self.config = config
|
||||
self.es_utils = es_utils
|
||||
|
||||
def run(self):
|
||||
commit_sha = self.commit.id.decode()
|
||||
print(
|
||||
"[Info]: Handling commit {commit_sha}".format(
|
||||
commit_sha=commit_sha
|
||||
)
|
||||
)
|
||||
|
||||
"""Generate all the changes for this commit."""
|
||||
walk_entry = WalkEntry(
|
||||
self.repo.get_walker(include=[self.commit.id]), self.commit
|
||||
)
|
||||
t_changes = walk_entry.changes() # get all the TreeChange objects
|
||||
if len(self.commit.parents) > 1:
|
||||
t_changes = [item for t_cs in t_changes for item in t_cs]
|
||||
|
||||
changed_methods = ChangedMethodExtractor(
|
||||
repo=self.repo,
|
||||
repoInfo=self.repoInfo,
|
||||
commit=self.commit,
|
||||
t_changes=t_changes,
|
||||
config=self.config,
|
||||
).parse()
|
||||
|
||||
es_data_bulk = self.es_utils.extract_es_infos(
|
||||
changed_methods=changed_methods
|
||||
)
|
||||
self.es_utils.insert_es_bulk(es_data_bulk)
|
||||
|
||||
"""Finish handling this commit, insert into the handled_commit index in es."""
|
||||
es_data = {"repo_id": self.repoInfo.repo_id, "commit_sha": commit_sha}
|
||||
self.es_utils.insert_es_item(
|
||||
item=es_data,
|
||||
index_name=self.config["elasticsearch"]["index_handled_commits"],
|
||||
)
|
||||
|
||||
|
||||
def handle_repositories(repositories_path: str, type: str, config: dict):
|
||||
"""Handle all the repositories in the directory."""
|
||||
|
||||
es_utils = ESUtils(config=config)
|
||||
es_utils.create_n_gram_index()
|
||||
es_utils.create_handled_commit_index()
|
||||
|
||||
"""Handle repositories by multiple threads."""
|
||||
workQueue = queue.Queue()
|
||||
|
||||
if type == "local":
|
||||
repo_git_paths = [f.path for f in os.scandir(repositories_path) if f.is_dir()]
|
||||
for repo_git_path in repo_git_paths:
|
||||
workQueue.put(RepoInfo(repo_path=repo_git_path))
|
||||
|
||||
elif type == "gitea":
|
||||
# iterate all the ownernames
|
||||
ownername_paths = [
|
||||
f.path for f in os.scandir(repositories_path) if f.is_dir()
|
||||
]
|
||||
for ownername_path in ownername_paths:
|
||||
# iterate all the repositories
|
||||
repo_git_paths = [
|
||||
f.path for f in os.scandir(ownername_path) if f.is_dir()
|
||||
]
|
||||
for repo_git_path in repo_git_paths:
|
||||
# if "test1.git" not in repo_git_path:
|
||||
# continue # only for test
|
||||
|
||||
workQueue.put(RepoInfo(repo_path=repo_git_path))
|
||||
|
||||
else:
|
||||
print("[Error]: illegal type")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
THREADNUM = config["coldstart_service"]["THREADNUM"]
|
||||
threads = []
|
||||
for i in range(THREADNUM):
|
||||
t = HandleRepoThread(
|
||||
name="Thread-" + str(i + 1),
|
||||
q=workQueue,
|
||||
config=config,
|
||||
type=type,
|
||||
)
|
||||
t.start()
|
||||
threads.append(t)
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
|
||||
def main():
|
||||
config_path = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), "config-java.yml"
|
||||
)
|
||||
config = read_config(config_path)
|
||||
if config is None:
|
||||
print(
|
||||
"[Error]: configuration file {config_path} not found".format(
|
||||
config_path=config_path
|
||||
)
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# cold start check local data firstly
|
||||
try:
|
||||
local_repositories_path = config["local"]["repositories_path"]
|
||||
type = 'local'
|
||||
handle_repositories(repositories_path=local_repositories_path, type=type, config=config)
|
||||
except KeyError:
|
||||
try:
|
||||
gitea_repositories_path = config["gitea"]["repositories_path"]
|
||||
type = 'gitea'
|
||||
handle_repositories(repositories_path=gitea_repositories_path, type=type, config=config)
|
||||
except KeyError:
|
||||
print("[Error]: local and gitea repositories_path configration not found")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
print("Finish ColdStartPerception service")
|
|
@ -0,0 +1,304 @@
|
|||
# aim: The ColdStartPerception service for OSCH
|
||||
# author: zhangxunhui
|
||||
# date: 2022-04-23
|
||||
|
||||
import os
|
||||
import queue
|
||||
import sys
|
||||
import threading
|
||||
from typing import List
|
||||
import hashlib
|
||||
|
||||
from ChangedMethodExtractor import ChangedMethodExtractor
|
||||
from dulwich.objects import Blob, Commit, Tag, Tree
|
||||
from dulwich.repo import Repo
|
||||
from dulwich.walk import WalkEntry
|
||||
from ESUtils import ESUtils
|
||||
from models.RepoInfo import RepoInfo
|
||||
from MySQLUtils import MySQLUtils
|
||||
|
||||
from services.utils import read_config
|
||||
|
||||
|
||||
class HandleRepository(object):
|
||||
def __init__(self, repoInfo: RepoInfo, config: dict, es_utils: ESUtils, type: str):
|
||||
self.config = config
|
||||
self.repoInfo = repoInfo
|
||||
self.repo = Repo(self.repoInfo.repo_path)
|
||||
self.type = type
|
||||
if self.type == "gitea":
|
||||
self.repoInfo.ownername = self.repo.path.split("/")[-2]
|
||||
self.repoInfo.reponame = self.repo.path.split("/")[-1].split(".")[0]
|
||||
self.mysql_utils = MySQLUtils(
|
||||
host=self.config["mysql"]["host"],
|
||||
port=self.config["mysql"]["port"],
|
||||
username=self.config["mysql"]["username"],
|
||||
password=self.config["mysql"]["password"],
|
||||
database=self.config["mysql"]["database"],
|
||||
autocommit=False,
|
||||
dictcursor=True,
|
||||
)
|
||||
repo_id = self.mysql_utils.get_repo_id(
|
||||
self.repoInfo.ownername, self.repoInfo.reponame
|
||||
)
|
||||
if repo_id is None:
|
||||
raise Exception(
|
||||
"HandleRepository Error: cannot find the id of repository: {repository_path}".format(
|
||||
repository_path=self.repository_path
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.repoInfo.repo_id = repo_id["id"]
|
||||
elif self.type == "local":
|
||||
self.repoInfo.repo_id = self.hash_path_to_id(self.repoInfo.repo_path)
|
||||
|
||||
self.es_utils = es_utils
|
||||
self.handled_commits = self.es_utils.get_handled_commits(
|
||||
repo_id=self.repoInfo.repo_id,
|
||||
index_name=self.config["elasticsearch"]["index_handled_commits"],
|
||||
)
|
||||
|
||||
# local repo id
|
||||
def hash_path_to_id(self, path):
|
||||
hash_hex = hashlib.sha256(path.encode()).hexdigest()
|
||||
return hash_hex
|
||||
|
||||
def run(self):
|
||||
"""Get all the commits."""
|
||||
|
||||
print(
|
||||
"[Info]: Handling repository {repo_path}".format(
|
||||
repo_path=self.repo.path
|
||||
)
|
||||
)
|
||||
|
||||
commits: List[Commit] = []
|
||||
|
||||
object_store = self.repo.object_store
|
||||
object_shas = list(iter(object_store))
|
||||
for object_sha in object_shas:
|
||||
obj = object_store[object_sha]
|
||||
if (
|
||||
isinstance(obj, Tag)
|
||||
or isinstance(obj, Blob)
|
||||
or isinstance(obj, Tree)
|
||||
):
|
||||
pass
|
||||
elif isinstance(obj, Commit):
|
||||
commits.append(obj)
|
||||
else:
|
||||
raise Exception("HandleRepository.run Error: unknown type!")
|
||||
|
||||
if self.type == 'gitea':
|
||||
"""Whether this repository is forked or original"""
|
||||
info = self.mysql_utils.get_repo_info(repo_id=self.repoInfo.repo_id)
|
||||
is_fork = False
|
||||
if info is not None and info["is_fork"] == 1:
|
||||
is_fork = True
|
||||
if is_fork:
|
||||
# eliminate the forked commits
|
||||
fork_id = info["fork_id"]
|
||||
origin_info = self.mysql_utils.get_repo_info(repo_id=fork_id)
|
||||
if origin_info is not None:
|
||||
origin_ownername = origin_info["owner_name"]
|
||||
origin_reponame = origin_info["name"]
|
||||
origin_repo_path = os.path.join(
|
||||
self.config["gitea"]["repositories_path"],
|
||||
origin_ownername,
|
||||
origin_reponame + ".git",
|
||||
)
|
||||
origin_repo = Repo(origin_repo_path)
|
||||
origin_commits: List[Commit] = []
|
||||
origin_object_store = origin_repo.object_store
|
||||
origin_object_shas = list(iter(origin_object_store))
|
||||
for object_sha in origin_object_shas:
|
||||
obj = origin_object_store[object_sha]
|
||||
if isinstance(obj, Commit):
|
||||
origin_commits.append(obj)
|
||||
commits = list(set(commits) - set(origin_commits))
|
||||
else:
|
||||
pass # origin repo not found in gitea mysql database
|
||||
else:
|
||||
pass # not a fork repo
|
||||
else:
|
||||
pass # local repo dont forked
|
||||
|
||||
"""Handle each commit."""
|
||||
for commit in commits:
|
||||
if commit.id.decode() not in self.handled_commits:
|
||||
HandleCommit(
|
||||
repo=self.repo,
|
||||
repoInfo=self.repoInfo,
|
||||
commit=commit,
|
||||
config=self.config,
|
||||
es_utils=self.es_utils,
|
||||
).run()
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
class HandleRepoThread(threading.Thread):
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
q: queue.Queue,
|
||||
config: dict,
|
||||
type: str,
|
||||
):
|
||||
threading.Thread.__init__(self)
|
||||
self.name = name
|
||||
self.q = q
|
||||
self.config = config
|
||||
self.es_utils = ESUtils(config=self.config)
|
||||
self.type = type
|
||||
|
||||
def run(self):
|
||||
print("[Info]: Start thread: " + self.name)
|
||||
while not self.q.empty():
|
||||
repoInfo = self.q.get()
|
||||
# handle one repository(local or gitea)
|
||||
handler = HandleRepository(
|
||||
repoInfo=repoInfo,
|
||||
config=self.config,
|
||||
es_utils=self.es_utils,
|
||||
type = self.type
|
||||
)
|
||||
handler.run()
|
||||
self.q.task_done()
|
||||
print("[Info]: Exist thread: " + self.name)
|
||||
|
||||
|
||||
class HandleCommit(object):
|
||||
def __init__(
|
||||
self,
|
||||
repo: Repo,
|
||||
repoInfo: RepoInfo,
|
||||
commit: Commit,
|
||||
config: dict,
|
||||
es_utils: ESUtils,
|
||||
):
|
||||
self.repo = repo
|
||||
self.repoInfo = repoInfo
|
||||
self.commit = commit
|
||||
self.config = config
|
||||
self.es_utils = es_utils
|
||||
|
||||
def run(self):
|
||||
commit_sha = self.commit.id.decode()
|
||||
print(
|
||||
"[Info]: Handling commit {commit_sha}".format(
|
||||
commit_sha=commit_sha
|
||||
)
|
||||
)
|
||||
|
||||
"""Generate all the changes for this commit."""
|
||||
walk_entry = WalkEntry(
|
||||
self.repo.get_walker(include=[self.commit.id]), self.commit
|
||||
)
|
||||
t_changes = walk_entry.changes() # get all the TreeChange objects
|
||||
if len(self.commit.parents) > 1:
|
||||
t_changes = [item for t_cs in t_changes for item in t_cs]
|
||||
|
||||
changed_methods = ChangedMethodExtractor(
|
||||
repo=self.repo,
|
||||
repoInfo=self.repoInfo,
|
||||
commit=self.commit,
|
||||
t_changes=t_changes,
|
||||
config=self.config,
|
||||
).parse()
|
||||
|
||||
es_data_bulk = self.es_utils.extract_es_infos(
|
||||
changed_methods=changed_methods
|
||||
)
|
||||
self.es_utils.insert_es_bulk(es_data_bulk)
|
||||
|
||||
"""Finish handling this commit, insert into the handled_commit index in es."""
|
||||
es_data = {"repo_id": self.repoInfo.repo_id, "commit_sha": commit_sha}
|
||||
self.es_utils.insert_es_item(
|
||||
item=es_data,
|
||||
index_name=self.config["elasticsearch"]["index_handled_commits"],
|
||||
)
|
||||
|
||||
|
||||
def handle_repositories(repositories_path: str, type: str, config: dict):
|
||||
"""Handle all the repositories in the directory."""
|
||||
|
||||
es_utils = ESUtils(config=config)
|
||||
es_utils.create_n_gram_index()
|
||||
es_utils.create_handled_commit_index()
|
||||
|
||||
"""Handle repositories by multiple threads."""
|
||||
workQueue = queue.Queue()
|
||||
|
||||
if type == "local":
|
||||
repo_git_paths = [f.path for f in os.scandir(repositories_path) if f.is_dir()]
|
||||
for repo_git_path in repo_git_paths:
|
||||
workQueue.put(RepoInfo(repo_path=repo_git_path))
|
||||
|
||||
elif type == "gitea":
|
||||
# iterate all the ownernames
|
||||
ownername_paths = [
|
||||
f.path for f in os.scandir(repositories_path) if f.is_dir()
|
||||
]
|
||||
for ownername_path in ownername_paths:
|
||||
# iterate all the repositories
|
||||
repo_git_paths = [
|
||||
f.path for f in os.scandir(ownername_path) if f.is_dir()
|
||||
]
|
||||
for repo_git_path in repo_git_paths:
|
||||
# if "test1.git" not in repo_git_path:
|
||||
# continue # only for test
|
||||
|
||||
workQueue.put(RepoInfo(repo_path=repo_git_path))
|
||||
|
||||
else:
|
||||
print("[Error]: illegal type")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
THREADNUM = config["coldstart_service"]["THREADNUM"]
|
||||
threads = []
|
||||
for i in range(THREADNUM):
|
||||
t = HandleRepoThread(
|
||||
name="Thread-" + str(i + 1),
|
||||
q=workQueue,
|
||||
config=config,
|
||||
type=type,
|
||||
)
|
||||
t.start()
|
||||
threads.append(t)
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
|
||||
def main():
|
||||
config_path = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), "config-python.yml"
|
||||
)
|
||||
config = read_config(config_path)
|
||||
if config is None:
|
||||
print(
|
||||
"[Error]: configuration file {config_path} not found".format(
|
||||
config_path=config_path
|
||||
)
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# cold start check local data firstly
|
||||
try:
|
||||
local_repositories_path = config["local"]["repositories_path"]
|
||||
type = 'local'
|
||||
handle_repositories(repositories_path=local_repositories_path, type=type, config=config)
|
||||
except KeyError:
|
||||
try:
|
||||
gitea_repositories_path = config["gitea"]["repositories_path"]
|
||||
type = 'gitea'
|
||||
handle_repositories(repositories_path=gitea_repositories_path, type=type, config=config)
|
||||
except KeyError:
|
||||
print("[Error]: local and gitea repositories_path configration not found")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
print("Finish ColdStartPerception service")
|
|
@ -0,0 +1,114 @@
|
|||
Traceback (most recent call last):
|
||||
File "/home/pdlzxh/OSCH/services/ColdStartPerception.py", line 289, in main
|
||||
local_repositories_path = config["local"]["repositories_path"]
|
||||
~~~~~~^^^^^^^^^
|
||||
KeyError: 'local'
|
||||
|
||||
During handling of the above exception, another exception occurred:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/connection.py", line 174, in _new_conn
|
||||
conn = connection.create_connection(
|
||||
(self._dns_host, self.port), self.timeout, **extra_kw
|
||||
)
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/util/connection.py", line 95, in create_connection
|
||||
raise err
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/util/connection.py", line 85, in create_connection
|
||||
sock.connect(sa)
|
||||
~~~~~~~~~~~~^^^^
|
||||
ConnectionRefusedError: [Errno 111] Connection refused
|
||||
|
||||
During handling of the above exception, another exception occurred:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/elasticsearch/connection/http_urllib3.py", line 251, in perform_request
|
||||
response = self.pool.urlopen(
|
||||
method, url, body, retries=Retry(False), headers=request_headers, **kw
|
||||
)
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/connectionpool.py", line 799, in urlopen
|
||||
retries = retries.increment(
|
||||
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
|
||||
)
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/util/retry.py", line 525, in increment
|
||||
raise six.reraise(type(error), error, _stacktrace)
|
||||
~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/packages/six.py", line 770, in reraise
|
||||
raise value
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/connectionpool.py", line 715, in urlopen
|
||||
httplib_response = self._make_request(
|
||||
conn,
|
||||
...<5 lines>...
|
||||
chunked=chunked,
|
||||
)
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/connectionpool.py", line 416, in _make_request
|
||||
conn.request(method, url, **httplib_request_kw)
|
||||
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/connection.py", line 244, in request
|
||||
super(HTTPConnection, self).request(method, url, body=body, headers=headers)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/http/client.py", line 1336, in request
|
||||
self._send_request(method, url, body, headers, encode_chunked)
|
||||
~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/http/client.py", line 1382, in _send_request
|
||||
self.endheaders(body, encode_chunked=encode_chunked)
|
||||
~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/http/client.py", line 1331, in endheaders
|
||||
self._send_output(message_body, encode_chunked=encode_chunked)
|
||||
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/http/client.py", line 1091, in _send_output
|
||||
self.send(msg)
|
||||
~~~~~~~~~^^^^^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/http/client.py", line 1035, in send
|
||||
self.connect()
|
||||
~~~~~~~~~~~~^^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/connection.py", line 205, in connect
|
||||
conn = self._new_conn()
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/urllib3/connection.py", line 186, in _new_conn
|
||||
raise NewConnectionError(
|
||||
self, "Failed to establish a new connection: %s" % e
|
||||
)
|
||||
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7f05d79582b0>: Failed to establish a new connection: [Errno 111] Connection refused
|
||||
|
||||
During handling of the above exception, another exception occurred:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "/home/pdlzxh/OSCH/services/ColdStartPerception.py", line 303, in <module>
|
||||
main()
|
||||
~~~~^^
|
||||
File "/home/pdlzxh/OSCH/services/ColdStartPerception.py", line 296, in main
|
||||
handle_repositories(repositories_path=gitea_repositories_path, type=type, config=config)
|
||||
~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/pdlzxh/OSCH/services/ColdStartPerception.py", line 226, in handle_repositories
|
||||
es_utils = ESUtils(config=config)
|
||||
File "/home/pdlzxh/OSCH/services/ESUtils.py", line 19, in __init__
|
||||
self.client = self.connect()
|
||||
~~~~~~~~~~~~^^
|
||||
File "/home/pdlzxh/OSCH/services/ESUtils.py", line 25, in connect
|
||||
response = client.cluster.health()
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/elasticsearch/client/utils.py", line 168, in _wrapped
|
||||
return func(*args, params=params, headers=headers, **kwargs)
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/elasticsearch/client/cluster.py", line 66, in health
|
||||
return self.transport.perform_request(
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
|
||||
"GET",
|
||||
^^^^^^
|
||||
...<2 lines>...
|
||||
headers=headers,
|
||||
^^^^^^^^^^^^^^^^
|
||||
)
|
||||
^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/elasticsearch/transport.py", line 413, in perform_request
|
||||
raise e
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/elasticsearch/transport.py", line 381, in perform_request
|
||||
status, headers_response, data = connection.perform_request(
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~^
|
||||
method,
|
||||
^^^^^^^
|
||||
...<5 lines>...
|
||||
timeout=timeout,
|
||||
^^^^^^^^^^^^^^^^
|
||||
)
|
||||
^
|
||||
File "/home/pdlzxh/anaconda3/envs/OSCH/lib/python3.13/site-packages/elasticsearch/connection/http_urllib3.py", line 266, in perform_request
|
||||
raise ConnectionError("N/A", str(e), e)
|
||||
elasticsearch.exceptions.ConnectionError: ConnectionError(<urllib3.connection.HTTPConnection object at 0x7f05d79582b0>: Failed to establish a new connection: [Errno 111] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x7f05d79582b0>: Failed to establish a new connection: [Errno 111] Connection refused)
|
|
@ -125,13 +125,16 @@ class HandleRepository(object):
|
|||
|
||||
"""Handle each commit."""
|
||||
for commit in commits:
|
||||
HandleCommit(
|
||||
repo=self.repo,
|
||||
repoInfo=self.repoInfo,
|
||||
commit=commit,
|
||||
config=self.config,
|
||||
es_utils=self.es_utils,
|
||||
).run()
|
||||
if commit.id.decode() not in self.handled_commits:
|
||||
HandleCommit(
|
||||
repo=self.repo,
|
||||
repoInfo=self.repoInfo,
|
||||
commit=commit,
|
||||
config=self.config,
|
||||
es_utils=self.es_utils,
|
||||
).run()
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
class HandleRepoThread(threading.Thread):
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
* Serving Flask app 'IncrementalPerceptionAPI'
|
||||
* Debug mode: off
|
||||
[31m[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.[0m
|
||||
* Running on all addresses (0.0.0.0)
|
||||
* Running on http://127.0.0.1:5001
|
||||
* Running on http://10.29.128.27:5001
|
||||
[33mPress CTRL+C to quit[0m
|
|
@ -0,0 +1,52 @@
|
|||
# gitea:
|
||||
# repositories_path: "Your project root path on your local machine/dependencies/gitea/git/repositories"
|
||||
|
||||
local:
|
||||
repositories_path: "/home/common_data/cpp_repo"
|
||||
# repositories_path: "/home/pdlzxh/OSCH/test_repo"
|
||||
|
||||
elasticsearch:
|
||||
urls:
|
||||
- "http://localhost:9200"
|
||||
username: "elastic"
|
||||
password: "ATs_4A7nPIHiK7=pz3T_"
|
||||
index_ngram: "handled_n_grams"
|
||||
index_handled_commits: "handled_commits"
|
||||
|
||||
mysql:
|
||||
host: "127.0.0.1"
|
||||
port: 3307
|
||||
username: "gitea"
|
||||
password: "gitea"
|
||||
database: "gitea"
|
||||
|
||||
rabbitmq:
|
||||
host: "127.0.0.1"
|
||||
port: 5672
|
||||
|
||||
service:
|
||||
lang_suffix:
|
||||
- "java"
|
||||
- "py"
|
||||
- "cpp"
|
||||
mit: 50
|
||||
mil: 6
|
||||
ngram: 5
|
||||
filter_threshold: 10
|
||||
verify_threshold: 70
|
||||
|
||||
coldstart_service:
|
||||
THREADNUM: 6
|
||||
|
||||
incremental_service:
|
||||
THREADNUM: 2
|
||||
|
||||
nil:
|
||||
basepath: "/home/pdlzxh/OSCH/services/parser/nil"
|
||||
java: "Func-extractor-java.jar"
|
||||
py: "Func-extractor-py.jar"
|
||||
cpp: "Func-extractor-cpp.jar"
|
||||
|
||||
client_service:
|
||||
gitea_url: "Your address for gitea service"
|
||||
token: "Your personal account token who installed the webhook client service"
|
|
@ -0,0 +1,52 @@
|
|||
# gitea:
|
||||
# repositories_path: "Your project root path on your local machine/dependencies/gitea/git/repositories"
|
||||
|
||||
local:
|
||||
repositories_path: "/home/common_data/java_repo"
|
||||
# repositories_path: "/home/pdlzxh/OSCH/test_repo"
|
||||
|
||||
elasticsearch:
|
||||
urls:
|
||||
- "http://localhost:9200"
|
||||
username: "elastic"
|
||||
password: "ATs_4A7nPIHiK7=pz3T_"
|
||||
index_ngram: "handled_n_grams"
|
||||
index_handled_commits: "handled_commits"
|
||||
|
||||
mysql:
|
||||
host: "127.0.0.1"
|
||||
port: 3307
|
||||
username: "gitea"
|
||||
password: "gitea"
|
||||
database: "gitea"
|
||||
|
||||
rabbitmq:
|
||||
host: "127.0.0.1"
|
||||
port: 5672
|
||||
|
||||
service:
|
||||
lang_suffix:
|
||||
- "java"
|
||||
- "py"
|
||||
- "cpp"
|
||||
mit: 50
|
||||
mil: 6
|
||||
ngram: 5
|
||||
filter_threshold: 10
|
||||
verify_threshold: 70
|
||||
|
||||
coldstart_service:
|
||||
THREADNUM: 12
|
||||
|
||||
incremental_service:
|
||||
THREADNUM: 2
|
||||
|
||||
nil:
|
||||
basepath: "/home/pdlzxh/OSCH/services/parser/nil"
|
||||
java: "Func-extractor-java.jar"
|
||||
py: "Func-extractor-py.jar"
|
||||
cpp: "Func-extractor-cpp.jar"
|
||||
|
||||
client_service:
|
||||
gitea_url: "Your address for gitea service"
|
||||
token: "Your personal account token who installed the webhook client service"
|
|
@ -0,0 +1,52 @@
|
|||
# gitea:
|
||||
# repositories_path: "Your project root path on your local machine/dependencies/gitea/git/repositories"
|
||||
|
||||
local:
|
||||
repositories_path: "/home/common_data/python_repo"
|
||||
# repositories_path: "/home/pdlzxh/OSCH/test_repo"
|
||||
|
||||
elasticsearch:
|
||||
urls:
|
||||
- "http://localhost:9200"
|
||||
username: "elastic"
|
||||
password: "ATs_4A7nPIHiK7=pz3T_"
|
||||
index_ngram: "handled_n_grams"
|
||||
index_handled_commits: "handled_commits"
|
||||
|
||||
mysql:
|
||||
host: "127.0.0.1"
|
||||
port: 3307
|
||||
username: "gitea"
|
||||
password: "gitea"
|
||||
database: "gitea"
|
||||
|
||||
rabbitmq:
|
||||
host: "127.0.0.1"
|
||||
port: 5672
|
||||
|
||||
service:
|
||||
lang_suffix:
|
||||
- "java"
|
||||
- "py"
|
||||
- "cpp"
|
||||
mit: 50
|
||||
mil: 6
|
||||
ngram: 5
|
||||
filter_threshold: 10
|
||||
verify_threshold: 70
|
||||
|
||||
coldstart_service:
|
||||
THREADNUM: 6
|
||||
|
||||
incremental_service:
|
||||
THREADNUM: 2
|
||||
|
||||
nil:
|
||||
basepath: "/home/pdlzxh/OSCH/services/parser/nil"
|
||||
java: "Func-extractor-java.jar"
|
||||
py: "Func-extractor-py.jar"
|
||||
cpp: "Func-extractor-cpp.jar"
|
||||
|
||||
client_service:
|
||||
gitea_url: "Your address for gitea service"
|
||||
token: "Your personal account token who installed the webhook client service"
|
|
@ -1,14 +1,11 @@
|
|||
# gitea:
|
||||
# repositories_path: "Your project root path on your local machine/dependencies/gitea/git/repositories"
|
||||
|
||||
local:
|
||||
repositories_path: "/home/common_data/java_repo"
|
||||
gitea:
|
||||
repositories_path: "home/pdlzxh/OSCH/dependencies/gitea/git/repositories"
|
||||
|
||||
elasticsearch:
|
||||
urls:
|
||||
- "http://localhost:9200"
|
||||
username: "elastic"
|
||||
password: "ATs_4A7nPIHiK7=pz3T_"
|
||||
- "http://localhost:19200"
|
||||
username: ""
|
||||
password: ""
|
||||
index_ngram: "handled_n_grams"
|
||||
index_handled_commits: "handled_commits"
|
||||
|
||||
|
@ -47,5 +44,5 @@ nil:
|
|||
cpp: "Func-extractor-cpp.jar"
|
||||
|
||||
client_service:
|
||||
gitea_url: "Your address for gitea service"
|
||||
token: "Your personal account token who installed the webhook client service"
|
||||
gitea_url: "http://10.0.10.27:3000"
|
||||
token: "746865cd309495902cbb6052b788b2043ad03be0"
|
||||
|
|
|
@ -2,6 +2,7 @@ import os
|
|||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
|
||||
from dulwich.objects import Commit
|
||||
from models.MethodInfo import MethodInfo
|
||||
|
@ -43,7 +44,7 @@ class FuncExtractor:
|
|||
self.config["nil"][lang],
|
||||
),
|
||||
"-rp",
|
||||
self.repoInfo.repo_path + '/.git',
|
||||
self.repoInfo.repo_path,
|
||||
"-os",
|
||||
self.object_sha,
|
||||
"-mit",
|
||||
|
@ -71,7 +72,8 @@ class FuncExtractor:
|
|||
tokens=tokens,
|
||||
)
|
||||
except Exception:
|
||||
print("-rp "+self.repoInfo.repo_path+" -os "+self.object_sha+" Finished!")
|
||||
now = now = datetime.now()
|
||||
print(now.strftime("%Y-%m-%d %H:%M:%S") + "-rp "+self.repoInfo.repo_path+" -os "+self.object_sha+" Finished!")
|
||||
return
|
||||
|
||||
def formMethodInfo(self, start_line: int, end_line: int, tokens: list):
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
restart_service()
|
||||
{
|
||||
service_name=$1
|
||||
python_path="/home/zxh/anaconda3/envs/OSCH/bin/python" # This should be changed to your local python path
|
||||
python_path="/home/pdlzxh/anaconda3/envs/OSCH/bin/python" # This should be changed to your local python path
|
||||
echo "-----------------------------"
|
||||
echo "Restarting service: $service_name..."
|
||||
script_name="$service_name.py"
|
||||
|
|
Loading…
Reference in New Issue