841 lines
34 KiB
Python
841 lines
34 KiB
Python
'''
|
|
# @ Author: Jie Yang
|
|
# @ Create Time: 2018
|
|
# @ Last Modified by: Jie Yang Contact: jieynlp@gmail.com
|
|
# @ Last Modified time: 2022-04-20 12:38:31
|
|
'''
|
|
# -*- coding: utf-8 -*-
|
|
import os.path
|
|
import platform
|
|
from tkinter import filedialog
|
|
from tkinter import font
|
|
from tkinter import messagebox
|
|
from collections import deque
|
|
from tkinter import *
|
|
from tkinter.ttk import Frame, Button, Radiobutton, Label, Combobox
|
|
from tkinter.simpledialog import Dialog
|
|
from tkinter.scrolledtext import ScrolledText
|
|
from dataclasses import dataclass
|
|
from typing import List, Optional, Tuple
|
|
import json
|
|
|
|
from utils.recommend import *
|
|
|
|
|
|
class Editor(ScrolledText):
|
|
def __init__(self, parent, entity_pattern, recommend_pattern):
|
|
super().__init__(parent, selectbackground='light salmon')
|
|
self.entity_pattern = entity_pattern
|
|
self.recommend_pattern = recommend_pattern
|
|
fnt = font.Font(family='Times', size=20, weight="bold", underline=0)
|
|
self.config(insertbackground='red', insertwidth=4, font=fnt)
|
|
|
|
def _ignore(_): return 'break'
|
|
|
|
# Disable the default copy behaviour when right click.
|
|
# For MacOS, right click is button 2, other systems are button3
|
|
self.bind('<Button-2>', _ignore)
|
|
self.bind('<Button-3>', _ignore)
|
|
self.set_colors(None)
|
|
|
|
def set_colors(self, colors: Optional[List[Tuple[str, str]]]):
|
|
"""
|
|
Set colors for different entity type
|
|
:param colors: list of (entity, color), or None to disable colorful annotation
|
|
"""
|
|
self.colors = colors
|
|
for t in self.tag_names():
|
|
if t.startswith('entity') or t.startswith('recommend'):
|
|
self.tag_delete(t)
|
|
# TODO color edge to discriminate recommend
|
|
self.tag_configure("edge", background="light grey", foreground='DimGrey', font=('Times', 12))
|
|
if colors is None:
|
|
self.tag_configure("recommend", background='light green')
|
|
self.tag_configure("entity", background="SkyBlue1")
|
|
else:
|
|
for label, color in self.colors:
|
|
self.tag_configure('entity_' + label, background=color)
|
|
self.tag_configure('recommend_' + label, background=color)
|
|
|
|
def _highlight_entity(self, start: str, count: int, tag_name: str):
|
|
end = f'{start}+{count}c'
|
|
sharp_pos = self.get(start, end).rfind('#')
|
|
word_start = f"{start}+2c"
|
|
word_end = f"{start}+{sharp_pos}c"
|
|
if self.colors:
|
|
label_start = f'{start}+{sharp_pos + 1}c'
|
|
label_end = f'{start}+{count - 2}c'
|
|
label = self.get(label_start, label_end)
|
|
tag_name = f'{tag_name}_{label}'
|
|
self.tag_add(tag_name, word_start, word_end)
|
|
self.tag_add("edge", start, word_start)
|
|
self.tag_add("edge", word_end, end)
|
|
|
|
def show_annotation_tag(self, show: bool):
|
|
self.tag_configure('edge', elide=not show)
|
|
|
|
def highlight_recommend(self, start: str, count: int):
|
|
self._highlight_entity(start, count, 'recommend')
|
|
|
|
def highlight_entity(self, start: str, count: int):
|
|
self._highlight_entity(start, count, 'entity')
|
|
|
|
def get_text(self) -> str:
|
|
"""get text from 0 to end"""
|
|
return self.get("1.0", "end-1c")
|
|
|
|
def _highlight_entities(self, pattern, highlight_func):
|
|
count_var = StringVar()
|
|
from_index = '1.0'
|
|
while True:
|
|
pos = self.search(pattern, from_index, END, count=count_var, regexp=True)
|
|
if pos == "":
|
|
break
|
|
from_index = f"{pos}+{count_var.get()}c"
|
|
highlight_func(pos, int(count_var.get()))
|
|
|
|
def update_view(self):
|
|
self._highlight_entities(self.entity_pattern, self.highlight_entity)
|
|
self._highlight_entities(self.recommend_pattern, self.highlight_recommend)
|
|
|
|
def current_entity(self) -> (str, (str, int)):
|
|
def find_pattern_span_in_line(pattern):
|
|
row, col = self.index(INSERT).split('.')
|
|
cursor_col = int(col)
|
|
count_var = StringVar()
|
|
from_index = f'{row}.0'
|
|
while True:
|
|
pos = self.search(pattern, from_index, f'{row}.end', count=count_var, regexp=True)
|
|
if pos == '':
|
|
break
|
|
row, col = pos.split('.')
|
|
match_end = f'{row}.{int(col) + int(count_var.get())}' # here don't use offset form
|
|
if int(col) < cursor_col < (int(col) + int(count_var.get())):
|
|
return pos, match_end
|
|
from_index = match_end
|
|
return None
|
|
|
|
span = find_pattern_span_in_line(self.entity_pattern)
|
|
if span is not None:
|
|
return 'gold', span
|
|
span = find_pattern_span_in_line(self.recommend_pattern)
|
|
if span is not None:
|
|
return 'recommend', span
|
|
else:
|
|
return None, (None, None)
|
|
|
|
def get_selection(self) -> Optional[str]:
|
|
try:
|
|
return self.selection_get()
|
|
except TclError:
|
|
return None
|
|
|
|
|
|
@dataclass
|
|
class KeyDef:
|
|
key: str
|
|
name: str
|
|
desc: str = ''
|
|
color: str = None
|
|
|
|
|
|
class KeyMapFrame(Frame):
|
|
def __init__(self, parent, keymap: List[KeyDef]):
|
|
super().__init__(parent, relief='groove')
|
|
self.keymap = sorted(keymap, key=lambda x: x.key)
|
|
self.rows = len(keymap)
|
|
self.textFontStyle = 'Times'
|
|
self.key_labels = []
|
|
self.name_entries = []
|
|
self.create_widgets()
|
|
|
|
def create_widgets(self):
|
|
title = Label(self, text="Shortcuts map", foreground="blue", font=(self.textFontStyle, 14, "bold"))
|
|
title.grid(row=0, column=0, columnspan=2, sticky=W, padx=6, pady=8)
|
|
for row, item in enumerate(self.keymap, 1):
|
|
key_lbl = Label(self, text=item.key.upper() + ": ", font=(self.textFontStyle, 14, "bold"))
|
|
key_lbl.grid(row=row, column=0, sticky=NW, padx=4, pady=4)
|
|
self.key_labels.append(key_lbl)
|
|
|
|
name_entry = Entry(self, font=(self.textFontStyle, 14), bg=item.color)
|
|
name_entry.insert(0, item.name)
|
|
name_entry.grid(row=row, column=1, columnspan=1, rowspan=1, sticky=NW, padx=4, pady=4)
|
|
self.name_entries.append(name_entry)
|
|
|
|
def update_keymap(self, keymap):
|
|
self.keymap = sorted(keymap, key=lambda x: x.key)
|
|
for lbl in self.key_labels:
|
|
lbl.destroy()
|
|
for ent in self.name_entries:
|
|
ent.destroy()
|
|
self.key_labels = []
|
|
self.name_entries = []
|
|
self.create_widgets()
|
|
|
|
def read_keymap(self) -> List[KeyDef]:
|
|
"""read current keymap in GUI, might be changed by user"""
|
|
new_map = []
|
|
for i, cmd in enumerate(self.keymap):
|
|
new_name = self.name_entries[i].get()
|
|
if new_name.strip() != '':
|
|
new_map.append(KeyDef(cmd.key, new_name, cmd.desc, cmd.color))
|
|
else:
|
|
print(f'{cmd.key} key deleted')
|
|
return new_map
|
|
|
|
|
|
class QueryExport(Dialog):
|
|
def __init__(self, parent, filename, sample):
|
|
self.confirmed = False
|
|
self.sample = sample
|
|
super().__init__(parent, 'Exporting ' + filename) # here dialog shows
|
|
|
|
def body(self, master):
|
|
"""override"""
|
|
box = Frame(master, relief='groove')
|
|
self.scheme_var = StringVar(master, "BMES")
|
|
Radiobutton(box, text="BMES", variable=self.scheme_var, value="BMES").pack(side=LEFT, padx=5, pady=5)
|
|
Radiobutton(box, text="BIO", variable=self.scheme_var, value="BIO").pack(side=LEFT, padx=5, pady=5)
|
|
box.pack()
|
|
self.segmented_var = BooleanVar(master, self._guess_segmented())
|
|
Checkbutton(master, text="Segmented", variable=self.segmented_var).pack()
|
|
self.only_NP_var = BooleanVar(master, False)
|
|
Checkbutton(master, text="Only NP label", variable=self.only_NP_var).pack()
|
|
self.export_recommended_var = BooleanVar(master, True)
|
|
Checkbutton(master, text="Export Recommended", variable=self.export_recommended_var).pack()
|
|
|
|
def apply(self):
|
|
"""override, called after press ok, not called on cancel"""
|
|
self.confirmed = True
|
|
|
|
def segmented(self) -> bool:
|
|
return self.segmented_var.get()
|
|
|
|
def only_NP(self) -> bool:
|
|
return self.only_NP_var.get()
|
|
|
|
def keep_recommended(self) -> bool:
|
|
return self.export_recommended_var.get()
|
|
|
|
def tag_scheme(self) -> str:
|
|
return self.scheme_var.get()
|
|
|
|
def _guess_segmented(self):
|
|
"""False for non-segmented Chinese, True for English or Segmented Chinese.
|
|
Make naive guess, user should check whether the guess is right
|
|
"""
|
|
ascii_percent = sum(1 for c in self.sample if c.isascii()) / len(self.sample)
|
|
is_english = (ascii_percent > 0.8)
|
|
space_percent = self.sample.count(' ') / len(self.sample)
|
|
many_space = (space_percent > 0.2)
|
|
return is_english or many_space or False
|
|
|
|
|
|
def all_colors():
|
|
colors = []
|
|
for color in ('LightBlue', 'LightCyan', 'LightGoldenrod', 'LightPink',
|
|
'LightSalmon', 'LightSkyBlue', 'LightSteelBlue', 'LightYellow'):
|
|
colors += [c + n for c, n in zip([color] * 5, ['', '1', '2', '3', '4'])]
|
|
return sorted(colors, key=lambda c: list(reversed(c)))
|
|
|
|
|
|
class Application(Frame):
|
|
def __init__(self, parent):
|
|
super().__init__(parent)
|
|
self.Version = "YEDDA-V1.0 Annotator"
|
|
self.configFile = "configs/default.config"
|
|
self.OS = platform.system().lower()
|
|
self.fileName = ""
|
|
self.file_encoding = 'utf-8'
|
|
self.debug = False
|
|
self.history = deque(maxlen=20)
|
|
|
|
# default GUI display parameter
|
|
self.readConfig()
|
|
self.textColumn = 5
|
|
|
|
|
|
self.entity_regex = r'\[\@.*?\#.*?\*\](?!\#)'
|
|
self.recommendRe = r'\[\$.*?\#.*?\*\](?!\#)'
|
|
self.goldAndrecomRe = r'\[[\@\$)].*?\#.*?\*\](?!\#)'
|
|
self.textFontStyle = "Times"
|
|
self.initUI()
|
|
|
|
def KeyDef2Dic(self):
|
|
config_dic = {}
|
|
for item in self.pressCommand:
|
|
config_dic[item.key] = item.name
|
|
return config_dic
|
|
|
|
def readConfig(self):
|
|
self.pressCommand = []
|
|
with open(self.configFile, 'r') as fp:
|
|
config_dict = json.load(fp)
|
|
for index,entity in config_dict.items():
|
|
self.pressCommand.append(KeyDef(index,entity))
|
|
for key, color in zip(self.pressCommand, all_colors()):
|
|
key.color = color
|
|
|
|
# default GUI display parameter
|
|
self.textRow = max(len(self.pressCommand), 20)
|
|
|
|
|
|
def initUI(self):
|
|
self.master.title(self.Version)
|
|
self.pack(fill=BOTH, expand=True)
|
|
|
|
for i in range(0, self.textColumn):
|
|
self.columnconfigure(i, weight=2)
|
|
# self.columnconfigure(0, weight=2)
|
|
|
|
self.columnconfigure(self.textColumn + 2, weight=1)
|
|
self.columnconfigure(self.textColumn + 4, weight=1)
|
|
for i in range(0, 16):
|
|
self.rowconfigure(i, weight=1)
|
|
|
|
self.filename_lbl = Label(self, text="File: no file is opened")
|
|
self.filename_lbl.grid(sticky=W, pady=4, padx=5)
|
|
self.text = Editor(self, self.entity_regex, self.recommendRe)
|
|
self.text.grid(row=1, column=0, columnspan=self.textColumn, rowspan=self.textRow, padx=12, sticky=NSEW)
|
|
|
|
btn = Button(self, text="Open", command=self.onOpen)
|
|
btn.grid(row=1, column=self.textColumn + 1)
|
|
btn = Button(self, text="ReMap", command=self.renewPressCommand)
|
|
btn.grid(row=2, column=self.textColumn + 1, pady=4)
|
|
btn = Button(self, text="NewMap", command=self.savenewPressCommand)
|
|
btn.grid(row=3, column=self.textColumn + 1, pady=4)
|
|
btn = Button(self, text="Export", command=self.generateSequenceFile)
|
|
btn.grid(row=4, column=self.textColumn + 1, pady=4)
|
|
|
|
self.use_recommend = BooleanVar(self, True)
|
|
check = Checkbutton(self, text='Recommend', command=self.toggle_use_recommend, variable=self.use_recommend)
|
|
check.grid(row=5, column=self.textColumn + 1, sticky=W, pady=4)
|
|
|
|
show_tags_var = BooleanVar(self, True)
|
|
check = Checkbutton(self, text='Show Tags', variable=show_tags_var,
|
|
command=lambda: self.text.show_annotation_tag(show_tags_var.get()))
|
|
check.grid(row=6, column=self.textColumn + 1, sticky=W)
|
|
|
|
self.use_colorful_var = BooleanVar(self, False)
|
|
check = Checkbutton(self, text='Colorful', variable=self.use_colorful_var, command=self.toggle_use_colorful)
|
|
check.grid(row=7, column=self.textColumn + 1, sticky=W)
|
|
|
|
self.cursor_index_label = Label(self, text="Ln 1, Col 0")
|
|
self.cursor_index_label.grid(row=self.textRow + 1, sticky=NSEW, pady=4, padx=4)
|
|
cmd_var = StringVar()
|
|
cmd_var.trace_add('write', lambda _, _1, _2: self.preview_cmd_range())
|
|
self.entry = Entry(self, validate='focus', vcmd=self.preview_cmd_range, textvariable=cmd_var)
|
|
self.entry.grid(row=self.textRow + 1, column=1, columnspan=self.textColumn - 2, sticky=NSEW, pady=4, padx=8)
|
|
self.entry.bind('<FocusOut>', self.clear_preview_mark)
|
|
self.entry.bind('<Return>', self.execute_command)
|
|
|
|
btn = Button(self, text="Enter", command=lambda: self.execute_command(None))
|
|
btn.grid(row=self.textRow + 1, column=self.textColumn - 1)
|
|
|
|
all_keys = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
for press_key in all_keys:
|
|
self.text.bind(press_key, self.alphanum_key_pressed, add='')
|
|
if self.OS != "windows":
|
|
self.text.bind("<Control-Key-" + press_key + ">", self.keepCurrent)
|
|
self.text.bind("<Control-Key-" + press_key + ">", self.keepCurrent)
|
|
|
|
self.text.bind('<Control-Key-z>', self.backToHistory)
|
|
|
|
self.text.bind('<Double-Button-1>', self.doubleLeftClick)
|
|
self.text.bind('<ButtonRelease-1>', self.show_cursor_pos)
|
|
self.text.bind('<KeyRelease>', self.show_cursor_pos)
|
|
|
|
self.keymap_frame = KeyMapFrame(self, self.pressCommand)
|
|
self.keymap_frame.grid(row=1, column=self.textColumn + 2, rowspan=self.keymap_frame.rows,
|
|
columnspan=2, padx=6, pady=6, sticky=NW)
|
|
|
|
Label(self, text="KeyMap Templates:").grid(row=8, column=self.textColumn + 1)
|
|
self.configListBox = Combobox(self, values=getConfigList(), state='readonly')
|
|
self.configListBox.grid(row=8, column=self.textColumn + 2, columnspan=2)
|
|
# select current config file
|
|
self.configListBox.set(self.configFile.split(os.sep)[-1])
|
|
self.configListBox.bind('<<ComboboxSelected>>', self.on_select_configfile)
|
|
|
|
def show_cursor_pos(self, _):
|
|
cursor_index = self.text.index(INSERT)
|
|
row, col = cursor_index.split('.')
|
|
self.cursor_index_label.config(text=f"Ln {row}, Col {col}")
|
|
|
|
# TODO: select entity by double left click
|
|
def doubleLeftClick(self, _):
|
|
if self.debug:
|
|
print("Action Track: doubleLeftClick")
|
|
pass
|
|
# cursor_index = self.text.index(INSERT)
|
|
# start_index = ("%s - %sc" % (cursor_index, 5))
|
|
# end_index = ("%s + %sc" % (cursor_index, 5))
|
|
# self.text.tag_add('SEL', '1.0',"end-1c")
|
|
|
|
def toggle_use_recommend(self):
|
|
if not self.use_recommend.get():
|
|
content = self.text.get_text()
|
|
content = removeRecommendContent(content, self.recommendRe)
|
|
self.writeFile(self.fileName, content, '1.0')
|
|
|
|
def toggle_use_colorful(self):
|
|
if self.use_colorful_var.get():
|
|
self.text.set_colors([(d.name, d.color) for d in self.pressCommand])
|
|
else:
|
|
self.text.set_colors(None)
|
|
self.text.update_view()
|
|
|
|
def onOpen(self):
|
|
filename = filedialog.askopenfilename(
|
|
filetypes=[('all files', '.*'), ('text files', '.txt'), ('ann files', '.ann')])
|
|
if filename != '':
|
|
self.text.delete("1.0", END)
|
|
text = self.readFile(filename)
|
|
self.text.insert(END, text)
|
|
self.filename_lbl.config(text="File: " + filename)
|
|
self.autoLoadNewFile(self.fileName, "1.0")
|
|
self.text.mark_set(INSERT, "1.0")
|
|
self.show_cursor_pos(None)
|
|
|
|
def readFile(self, filename):
|
|
f = open(filename)
|
|
try:
|
|
text = f.read()
|
|
self.file_encoding = f.encoding
|
|
except UnicodeDecodeError:
|
|
f = open(filename, encoding='utf-8')
|
|
text = f.read()
|
|
self.fileName = filename
|
|
return text
|
|
|
|
def setFont(self, value):
|
|
_family = self.textFontStyle
|
|
_size = value
|
|
_weight = "bold"
|
|
_underline = 0
|
|
fnt = font.Font(family=_family, size=_size, weight=_weight, underline=_underline)
|
|
Text(self, font=fnt)
|
|
|
|
def setCursorLabel(self, cursor_index):
|
|
row, col = cursor_index.split('.')
|
|
self.cursor_index_label.config(text=f"Ln {row}, Col {col}")
|
|
|
|
def clear_preview_mark(self, _):
|
|
self.text.tag_delete('cmd-preview')
|
|
|
|
def preview_cmd_range(self):
|
|
preview_tag = 'cmd-preview'
|
|
cmd = self.entry.get().strip()
|
|
self.text.tag_delete(preview_tag)
|
|
self.text.tag_configure(preview_tag, background='light salmon')
|
|
match = re.match(r'^(-?[0-9]+).*', cmd)
|
|
if match:
|
|
count = int(match.group(1))
|
|
else:
|
|
count = 1
|
|
if count > 0:
|
|
self.text.tag_add(preview_tag, INSERT, f'{INSERT}+{count}c')
|
|
else:
|
|
self.text.tag_add(preview_tag, f'{INSERT}-{abs(count)}c', INSERT)
|
|
return True
|
|
|
|
def execute_command(self, _):
|
|
self.pushToHistory()
|
|
content = self.entry.get()
|
|
self.clearCommand()
|
|
self.execute_entry_command(content.strip())
|
|
return content
|
|
|
|
def alphanum_key_pressed(self, event):
|
|
press_key = event.char
|
|
self.pushToHistory()
|
|
self.clearCommand()
|
|
self.execute_cursor_command(press_key.lower())
|
|
return 'break'
|
|
|
|
def backToHistory(self, _):
|
|
if self.debug:
|
|
print("Action Track: backToHistory")
|
|
if len(self.history) > 0:
|
|
content, cursor = self.history.pop()
|
|
self.writeFile(self.fileName, content, cursor)
|
|
else:
|
|
print("History is empty!")
|
|
|
|
def keepCurrent(self, _):
|
|
if self.debug:
|
|
print("Action Track: keepCurrent")
|
|
print("keep current, insert:", INSERT)
|
|
print("before:", self.text.index(INSERT))
|
|
self.text.insert(INSERT, 'p')
|
|
print("after:", self.text.index(INSERT))
|
|
|
|
def clearCommand(self):
|
|
if self.debug:
|
|
print("Action Track: clearCommand")
|
|
self.entry.delete(0, 'end')
|
|
|
|
def execute_cursor_command(self, command):
|
|
print("Command:" + command)
|
|
found, (start, end) = self.text.current_entity()
|
|
selected = self.text.get_selection()
|
|
if not found and not selected:
|
|
print(f'{command} outside entity, no selection, do nothing')
|
|
return
|
|
# selected whole entity, cursor just outside it
|
|
selected_whole = selected is not None and \
|
|
(re.match(self.entity_regex, selected) or re.match(self.recommendRe, selected))
|
|
|
|
# cursor outside existing entity & has selection
|
|
if not found and selected is not None and not selected_whole:
|
|
if self.get_cmd_by_key(command) is None:
|
|
print(f'{command} key not bound, outside entity, do nothing')
|
|
return
|
|
cursor_index = self.text.index(SEL_LAST)
|
|
entity_content, cursor_index = self.replaceString(selected, selected, command, cursor_index)
|
|
above_half = self.text.get('1.0', SEL_FIRST) + entity_content
|
|
below_half = self.text.get(SEL_LAST, "end-1c")
|
|
content = self.addRecommendContent(above_half, below_half, self.use_recommend.get())
|
|
self.writeFile(self.fileName, content, cursor_index)
|
|
# Cursor inside existing entity, no matter has or not has selection.
|
|
# Or Cursor outside existing entity (just on the edge), with the whole entity selected
|
|
else:
|
|
if selected_whole:
|
|
start, end = self.text.index(SEL_FIRST), self.text.index(SEL_LAST)
|
|
covered_string = self.text.get(start, end)
|
|
old_entity, old_label = covered_string.strip('[@$*]').rsplit('#', 1)
|
|
|
|
if command == "q":
|
|
print('q: remove entity label')
|
|
new_cursor = f'{end}-{5 + len(old_label)}c'
|
|
entity_content = old_entity
|
|
elif command == 'y':
|
|
print("y: confirm recommend label")
|
|
entity_content = f'[@{old_entity}#{old_label}*]'
|
|
new_cursor = end
|
|
elif len(old_entity) > 0 and self.get_cmd_by_key(command) is not None:
|
|
print(f'{command}: change entity type')
|
|
cmd = self.get_cmd_by_key(command)
|
|
entity_content = f'[@{old_entity}#{cmd.name}*]'
|
|
delta = len(cmd.name) - len(old_label)
|
|
new_cursor = end + (f'+{delta}c' if delta >= 0 else f'{delta}c')
|
|
else:
|
|
print(f'{command}: key not bound, do nothing')
|
|
return
|
|
above_half = self.text.get('1.0', start) + entity_content
|
|
below_half = self.text.get(end, 'end-1c')
|
|
content = self.addRecommendContent(above_half, below_half, self.use_recommend.get())
|
|
self.writeFile(self.fileName, content, new_cursor)
|
|
|
|
def execute_entry_command(self, command):
|
|
print(f"EntryCommand: {command}")
|
|
if command == '': # move to next line
|
|
row, _ = self.text.index(INSERT).split('.')
|
|
self.text.mark_set(INSERT, f'{int(row) + 1}.0')
|
|
self.show_cursor_pos(None)
|
|
elif command.isdigit():
|
|
self.text.mark_set(INSERT, f'{INSERT}+{command}c')
|
|
self.show_cursor_pos(None)
|
|
self.preview_cmd_range()
|
|
elif len(command) >= 2 and command[0] == '-' and command[1:].isdigit():
|
|
self.text.mark_set(INSERT, f'{INSERT}{command}c')
|
|
self.show_cursor_pos(None)
|
|
self.preview_cmd_range()
|
|
else:
|
|
def split_commands(string):
|
|
commands = []
|
|
num = ''
|
|
for c in string:
|
|
if c.isdigit():
|
|
num += c
|
|
else:
|
|
commands.append((int(num), c))
|
|
num = ''
|
|
return commands
|
|
|
|
for select_num, cmd in split_commands(command):
|
|
assert select_num > 0
|
|
sel_start = self.text.index(INSERT)
|
|
sel_end = self.text.index(f'{INSERT}+{select_num}c')
|
|
selected = self.text.get(sel_start, sel_end)
|
|
if self.get_cmd_by_key(cmd) is not None:
|
|
above_half = self.text.get('1.0', sel_start)
|
|
below_half = self.text.get(sel_start, "end-1c")
|
|
below_half, new_cursor = self.replaceString(below_half, selected, cmd, sel_end)
|
|
content = self.addRecommendContent(above_half, below_half, self.use_recommend.get())
|
|
self.writeFile(self.fileName, content, new_cursor)
|
|
|
|
|
|
def replaceString(self, content, string, replaceType, cursor_index):
|
|
keydef = self.get_cmd_by_key(replaceType)
|
|
if keydef is not None:
|
|
new_string = "[@" + string + "#" + keydef.name + "*]"
|
|
row, col = cursor_index.split('.')
|
|
newcursor_index = f"{row}.{int(col) + len(keydef.name) + 5}"
|
|
content = content.replace(string, new_string, 1)
|
|
return content, newcursor_index
|
|
else:
|
|
print("Invalid command!")
|
|
print("cursor index: ", self.text.index(INSERT))
|
|
return content, cursor_index
|
|
|
|
def writeFile(self, fileName, content, newcursor_index):
|
|
print("writeFile")
|
|
if len(fileName) > 0:
|
|
if ".ann" in fileName:
|
|
new_name = fileName
|
|
ann_file = open(new_name, 'w', encoding=self.file_encoding)
|
|
ann_file.write(content)
|
|
ann_file.close()
|
|
else:
|
|
new_name = fileName + '.ann'
|
|
ann_file = open(new_name, 'w', encoding=self.file_encoding)
|
|
ann_file.write(content)
|
|
ann_file.close()
|
|
self.autoLoadNewFile(new_name, newcursor_index)
|
|
else:
|
|
print("Don't write to empty file!")
|
|
|
|
|
|
def addRecommendContent(self, train_data, decode_data, recommendMode):
|
|
if not recommendMode:
|
|
content = train_data + decode_data
|
|
else:
|
|
if self.debug:
|
|
print("Action Track: addRecommendContent, start Recommend entity")
|
|
content = maximum_matching(train_data, decode_data)
|
|
return content
|
|
|
|
|
|
def autoLoadNewFile(self, fileName, newcursor_index):
|
|
if self.debug:
|
|
print("Action Track: autoLoadNewFile")
|
|
if len(fileName) > 0:
|
|
self.text.delete("1.0", END)
|
|
text = self.readFile(fileName)
|
|
self.text.insert("end-1c", text)
|
|
self.filename_lbl.config(text="File: " + fileName)
|
|
self.text.mark_set(INSERT, newcursor_index)
|
|
self.text.see(newcursor_index)
|
|
self.show_cursor_pos(None)
|
|
self.text.update_view()
|
|
|
|
|
|
def pushToHistory(self):
|
|
self.history.append((self.text.get_text(), self.text.index(INSERT)))
|
|
|
|
# update shortcut map, directly in current configfile
|
|
def renewPressCommand(self):
|
|
if self.debug:
|
|
print("Action Track: renewPressCommand")
|
|
self.pressCommand = self.keymap_frame.read_keymap()
|
|
with open(self.configFile, 'wb') as fp:
|
|
json.dump(self.KeyDef2Dic(self.pressCommand), fp)
|
|
self.keymap_frame.update_keymap(self.pressCommand)
|
|
messagebox.showinfo("Remap Notification",
|
|
"Shortcut map has been updated!\n\n" +
|
|
"Configure file has been saved in File:" + self.configFile)
|
|
|
|
# save as new shortcut map
|
|
def savenewPressCommand(self):
|
|
if self.debug:
|
|
print("Action Track: savenewPressCommand")
|
|
self.pressCommand = self.keymap_frame.read_keymap()
|
|
# prompt to ask configFile name
|
|
self.configFile = filedialog.asksaveasfilename(
|
|
initialdir="./configs/",
|
|
title="Save New Config",
|
|
filetypes=(("YEDDA configs", "*.config"), ("all files", "*.*")))
|
|
# change to relative path following self.init()
|
|
self.configFile = os.path.relpath(self.configFile)
|
|
# make sure ending with ".config"
|
|
if not self.configFile.endswith(".config"):
|
|
self.configFile += ".config"
|
|
with open(self.configFile, 'wb') as fp:
|
|
json.dump(self.KeyDef2Dic(self.pressCommand), fp)
|
|
self.keymap_frame.update_keymap(self.pressCommand)
|
|
messagebox.showinfo("Save New Map Notification",
|
|
"Shortcut map has been saved and updated!\n\n"
|
|
+ "Configure file has been saved in File:" + self.configFile)
|
|
|
|
def on_select_configfile(self, event=None):
|
|
if event and self.debug:
|
|
print("Change shortcut map to: ", event.widget.get())
|
|
self.configFile = os.path.join("configs", event.widget.get())
|
|
self.configListBox.set(self.configFile.split(os.sep)[-1])
|
|
self.readConfig()
|
|
self.keymap_frame.update_keymap(self.pressCommand)
|
|
|
|
|
|
def generateSequenceFile(self):
|
|
if (".ann" not in self.fileName) and (".txt" not in self.fileName):
|
|
out_error = "Export only works on filename ended in .ann or .txt!\nPlease rename file."
|
|
print(out_error)
|
|
messagebox.showerror("Export error!", out_error)
|
|
return -1
|
|
dlg = QueryExport(self, self.fileName, self.text.get_text()[:100])
|
|
if not dlg.confirmed:
|
|
print("Operation canceled")
|
|
return
|
|
fileLines = open(self.fileName, 'r', encoding=self.file_encoding).readlines()
|
|
lineNum = len(fileLines)
|
|
new_filename = self.fileName.split('.ann')[0] + '.' + dlg.tag_scheme().lower()
|
|
seqFile = open(new_filename, 'w', encoding=self.file_encoding)
|
|
for line in fileLines:
|
|
if len(line) <= 2:
|
|
seqFile.write('\n')
|
|
continue
|
|
else:
|
|
if not dlg.keep_recommended():
|
|
line = removeRecommendContent(line, self.recommendRe)
|
|
pattern = self.entity_regex
|
|
else:
|
|
pattern = self.goldAndrecomRe
|
|
wordTagPairs = getWordTagPairs(line, dlg.segmented(), dlg.tag_scheme(), dlg.only_NP(), pattern)
|
|
for wordTag in wordTagPairs:
|
|
seqFile.write(wordTag)
|
|
# use null line to separate sentences
|
|
seqFile.write('\n')
|
|
seqFile.close()
|
|
print("Exported file into sequence style in file: ", new_filename)
|
|
print("Line number:", lineNum)
|
|
showMessage = "Exported file successfully!\n\n"
|
|
showMessage += "Tag scheme: " + dlg.tag_scheme() + "\n\n"
|
|
showMessage += "Keep Recom: " + str(dlg.keep_recommended()) + "\n\n"
|
|
showMessage += "Text Segmented: " + str(dlg.segmented()) + "\n\n"
|
|
showMessage += "Line Number: " + str(lineNum) + "\n\n"
|
|
showMessage += "Saved to File: " + new_filename
|
|
messagebox.showinfo("Export Message", showMessage)
|
|
|
|
def get_cmd_by_key(self, key):
|
|
return next((item for item in self.pressCommand if item.key == key), None)
|
|
|
|
def get_cmd_by_name(self, name):
|
|
return next((item for item in self.pressCommand if item.name == name), None)
|
|
|
|
|
|
def getConfigList():
|
|
fileNames = os.listdir("./configs")
|
|
filteredFileNames = sorted(filter(lambda x: (not x.startswith(".")) and (x.endswith(".config")), fileNames))
|
|
return list(filteredFileNames)
|
|
|
|
|
|
def getWordTagPairs(tagedSentence, segmented=True, tagScheme="BMES", onlyNP=False, entityRe=r'\[\@.*?\#.*?\*\]'):
|
|
sentence = tagedSentence.strip('\n')
|
|
tagged_chunks = []
|
|
for match in re.finditer(entityRe, sentence):
|
|
chunk = (match.group(), match.start(), match.end(), True) # (chunk_of_words, start, end, is_tagged)
|
|
tagged_chunks.append(chunk)
|
|
|
|
if len(tagged_chunks) == 0:
|
|
tagged_chunks = [(sentence, 0, len(sentence), False)] # TODO semantically wrong
|
|
|
|
chunks = []
|
|
for idx in range(0, len(tagged_chunks)):
|
|
if idx == 0:
|
|
if tagged_chunks[idx][1] > 0: # first character is not tagged
|
|
chunks.append((sentence[0:tagged_chunks[idx][1]], 0, tagged_chunks[idx][1], False))
|
|
chunks.append(tagged_chunks[idx])
|
|
else:
|
|
chunks.append(tagged_chunks[idx])
|
|
else:
|
|
if tagged_chunks[idx][1] == tagged_chunks[idx - 1][2]:
|
|
chunks.append(tagged_chunks[idx])
|
|
elif tagged_chunks[idx][1] < tagged_chunks[idx - 1][2]:
|
|
print("ERROR: found pattern has overlap!", tagged_chunks[idx][1], ' with ', tagged_chunks[idx - 1][2])
|
|
else:
|
|
chunks.append(
|
|
(sentence[tagged_chunks[idx - 1][2]:tagged_chunks[idx][1]], tagged_chunks[idx - 1][2],
|
|
tagged_chunks[idx][1],
|
|
False))
|
|
chunks.append(tagged_chunks[idx])
|
|
|
|
sent_len = len(sentence)
|
|
if idx == len(tagged_chunks) - 1:
|
|
if tagged_chunks[idx][2] > sent_len:
|
|
print("ERROR: found pattern position larger than sentence length!")
|
|
elif tagged_chunks[idx][2] < sent_len:
|
|
chunks.append([sentence[tagged_chunks[idx][2]:sent_len], tagged_chunks[idx][2], sent_len, False])
|
|
else:
|
|
continue
|
|
return turnFullListToOutputPair(chunks, segmented, tagScheme, onlyNP)
|
|
|
|
|
|
def turnFullListToOutputPair(fullList, segmented=True, tagScheme="BMES", onlyNP=False):
|
|
pair_list = []
|
|
for chunk_words, start, end, is_tagged in fullList:
|
|
if is_tagged:
|
|
plain_words, label = chunk_words.strip('[@$]').rsplit('#', 1)
|
|
label = label.strip('*')
|
|
if segmented:
|
|
plain_words = plain_words.split()
|
|
if onlyNP:
|
|
label = "NP"
|
|
outList = outputWithTagScheme(plain_words, label, tagScheme)
|
|
pair_list.extend(outList)
|
|
else:
|
|
if segmented:
|
|
words = chunk_words.split()
|
|
else:
|
|
words = chunk_words # actually chars
|
|
for word_or_char in words:
|
|
if word_or_char == ' ':
|
|
continue
|
|
pair = word_or_char + ' ' + 'O\n'
|
|
pair_list.append(pair)
|
|
return pair_list
|
|
|
|
|
|
def outputWithTagScheme(input_list, label, tagScheme="BMES"):
|
|
output_list = []
|
|
list_length = len(input_list)
|
|
if tagScheme == "BMES":
|
|
if list_length == 1:
|
|
pair = input_list[0] + ' ' + 'S-' + label + '\n'
|
|
output_list.append(pair)
|
|
else:
|
|
for idx in range(list_length):
|
|
if idx == 0:
|
|
pair = input_list[idx] + ' ' + 'B-' + label + '\n'
|
|
elif idx == list_length - 1:
|
|
pair = input_list[idx] + ' ' + 'E-' + label + '\n'
|
|
else:
|
|
pair = input_list[idx] + ' ' + 'M-' + label + '\n'
|
|
output_list.append(pair)
|
|
else:
|
|
for idx in range(list_length):
|
|
if idx == 0:
|
|
pair = input_list[idx] + ' ' + 'B-' + label + '\n'
|
|
else:
|
|
pair = input_list[idx] + ' ' + 'I-' + label + '\n'
|
|
output_list.append(pair)
|
|
return output_list
|
|
|
|
|
|
def removeRecommendContent(content, recommendRe=r'\[\$.*?\#.*?\*\](?!\#)'):
|
|
output_content = ""
|
|
last_match_end = 0
|
|
for match in re.finditer(recommendRe, content):
|
|
matched = content[match.span()[0]:match.span()[1]]
|
|
words = matched.strip('[$]').split("#")[0]
|
|
output_content += content[last_match_end:match.span()[0]] + words
|
|
last_match_end = match.span()[1]
|
|
output_content += content[last_match_end:]
|
|
return output_content
|
|
|
|
|
|
def main():
|
|
print("YEDDA launched!")
|
|
print("OS:", platform.system())
|
|
root = Tk()
|
|
width, height = 1300, 700
|
|
x = max((root.winfo_screenwidth() - width) // 2, 0)
|
|
y = max((root.winfo_screenheight() - height) // 2, 0)
|
|
root.geometry(f'{width}x{height}+{x}+{y}')
|
|
app = Application(root)
|
|
app.setFont(17)
|
|
root.mainloop()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|