forked from felixonmars/dnsmasq-china-list
Refactor find_redundant to fix #201
This commit is contained in:
parent
f7e27452b6
commit
a29cf774e9
|
@ -1,47 +1,64 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
''' Find accelerated-domains.china.conf for redundant item.
|
||||
''' Find redundant items in accelerated-domains.china.conf.
|
||||
e.g. 'bar.foo.com' is redundant for 'foo.com'.
|
||||
'''
|
||||
|
||||
LEAF = 1
|
||||
|
||||
def main():
|
||||
with open('accelerated-domains.china.conf', 'r') as f:
|
||||
lines = f.readlines()
|
||||
def load(conf_file):
|
||||
''' Parse conf file & Prepare data structure
|
||||
Returns: [ ['abc', 'com'],
|
||||
['bar', 'foo', 'com'],
|
||||
... ]
|
||||
'''
|
||||
|
||||
# Parse conf file & prepare data structure
|
||||
data = {}
|
||||
for line in lines:
|
||||
if line == '' or line.startswith('#'):
|
||||
continue
|
||||
domain = line.split('/')[1].lower()
|
||||
labels = domain.split('.')
|
||||
labels.reverse()
|
||||
if domain in data:
|
||||
print(f"Redundant found: {domain}")
|
||||
data[domain] = labels
|
||||
domains = list(data.keys())
|
||||
domains.sort(key=lambda k: len(data[k]))
|
||||
results = []
|
||||
with open(conf_file, 'r') as f:
|
||||
for line in f.readlines():
|
||||
line = line.strip()
|
||||
if line == '' or line.startswith('#'):
|
||||
continue
|
||||
# A domain name is case-insensitive and
|
||||
# consists of several labels, separated by a full stop
|
||||
domain_name = line.split('/')[1]
|
||||
domain_name = domain_name.lower()
|
||||
domain_labels = domain_name.split('.')
|
||||
results.append(domain_labels)
|
||||
|
||||
# Sort results by domain labels' length
|
||||
results.sort(key=len)
|
||||
return results
|
||||
|
||||
|
||||
def find(labelses):
|
||||
''' Find redundant items by a tree of top-level domain label to sub-level.
|
||||
`tree` is like { 'com': { 'foo: { 'bar': LEAF },
|
||||
'abc': LEAF },
|
||||
'org': ... }
|
||||
'''
|
||||
tree = {}
|
||||
for domain in domains:
|
||||
labels = data[domain]
|
||||
node = tree # Init current node with root node
|
||||
for i, label in enumerate(labels):
|
||||
isLastLabel = i + 1 == len(labels)
|
||||
# Check whether redundant
|
||||
if (node == LEAF) or (isLastLabel and label in node):
|
||||
print(f"Redundant found: {domain}")
|
||||
break
|
||||
# Create leaf node
|
||||
if isLastLabel:
|
||||
node[label] = LEAF
|
||||
break
|
||||
# Create branch node
|
||||
if label not in node:
|
||||
node[label] = {}
|
||||
LEAF = 1
|
||||
for labels in labelses:
|
||||
domain = '.'.join(labels)
|
||||
# Init root node as current node
|
||||
node = tree
|
||||
while len(labels) > 0:
|
||||
label = labels.pop()
|
||||
if label in node:
|
||||
# If child node is a LEAF node,
|
||||
# current domain must be an existed domain or a subdomain of an existed.
|
||||
if node[label] == LEAF:
|
||||
print(f"Redundant found: {domain} at {label}")
|
||||
break
|
||||
else:
|
||||
# Create a leaf node if current label is last one
|
||||
if len(labels) == 0:
|
||||
node[label] = LEAF
|
||||
# Create a branch node
|
||||
else:
|
||||
node[label] = {}
|
||||
# Iterate to child node
|
||||
node = node[label]
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
find(load('accelerated-domains.china.conf'))
|
||||
|
|
Loading…
Reference in New Issue