From 0aff2b5ce47b4d6a7964e84673495bcb0d21e553 Mon Sep 17 00:00:00 2001 From: Maximilian Schmidt <maximilian.schmidt@ims.uni-stuttgart.de> Date: Mon, 4 Mar 2024 18:30:56 +0100 Subject: [PATCH] Add more replacements for context for QA --- task.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/task.py b/task.py index 529a62f..08948ba 100644 --- a/task.py +++ b/task.py @@ -258,9 +258,16 @@ class QuestionAnswering(Task): evaluation_usage = ModelUsage() def replace_symbol_for_grammar(sample: Mapping): - sample["context"] = sample["context"].replace("–", "-") + symbol_replacement_mapping = { + "\u2013": "-", + "\u2014": "-", + } + symbol_replacement_mapping = dict((re.escape(k), v) for k, v in symbol_replacement_mapping.items()) + symbol_replacement_pattern = re.compile("|".join(symbol_replacement_mapping.keys())) + replace_fn = lambda text: symbol_replacement_pattern.sub(lambda m: symbol_replacement_mapping[re.escape(m.group(0))], text) + sample["context"] = replace_fn(sample["context"]) sample["answers"]["text"] = [ - text.replace("–", "-") for text in sample["answers"]["text"] + replace_fn(text) for text in sample["answers"]["text"] ] return sample -- GitLab