duppr_analysis/experiment_code/minor_revision/unintentional_part.py

36 lines
942 B
Python

# coding:utf-8
import dbop
dbop.init_pool(3)
def work():
# 先找出26个项目
prjs = [item[0] for item in dbop.select_all("select id from project")]
# 初始选了多少个
total = 0
yes = 0
for prj in prjs:
prj_prs = set()
prj_prs_yes =set()
with open("../../../../../../project/python/DupPR/label_data/%d_ini.txt"%(prj,),"r") as fp:
for item in fp.readlines():
its = item.split("\t")
prj_prs.add(its[0])
prj_prs.add(its[1])
total += len(prj_prs)
with open("../../../../../../project/python/DupPR/label_data/%d_yes.txt"%(prj,),"r") as fp:
for item in fp.readlines():
its = item.split("\t")
prj_prs_yes.add(its[0])
prj_prs_yes.add(its[1])
yes += len(prj_prs_yes)
# 有多少个 dup
dups = dbop.select_one("select count(*) from rq2_metrics where is_dup = 1",None)[0]
# 算下比例
print total, dups, dups*1.0/total
print total, yes, yes*1.0/total
if __name__ == "__main__":
work()