36 lines
942 B
Python
36 lines
942 B
Python
# coding:utf-8
|
|
import dbop
|
|
dbop.init_pool(3)
|
|
def work():
|
|
# 先找出26个项目
|
|
prjs = [item[0] for item in dbop.select_all("select id from project")]
|
|
|
|
# 初始选了多少个
|
|
total = 0
|
|
yes = 0
|
|
for prj in prjs:
|
|
prj_prs = set()
|
|
prj_prs_yes =set()
|
|
with open("../../../../../../project/python/DupPR/label_data/%d_ini.txt"%(prj,),"r") as fp:
|
|
for item in fp.readlines():
|
|
its = item.split("\t")
|
|
prj_prs.add(its[0])
|
|
prj_prs.add(its[1])
|
|
total += len(prj_prs)
|
|
with open("../../../../../../project/python/DupPR/label_data/%d_yes.txt"%(prj,),"r") as fp:
|
|
for item in fp.readlines():
|
|
its = item.split("\t")
|
|
prj_prs_yes.add(its[0])
|
|
prj_prs_yes.add(its[1])
|
|
yes += len(prj_prs_yes)
|
|
|
|
# 有多少个 dup
|
|
dups = dbop.select_one("select count(*) from rq2_metrics where is_dup = 1",None)[0]
|
|
|
|
# 算下比例
|
|
print total, dups, dups*1.0/total
|
|
print total, yes, yes*1.0/total
|
|
|
|
|
|
if __name__ == "__main__":
|
|
work() |