From 0aff2b5ce47b4d6a7964e84673495bcb0d21e553 Mon Sep 17 00:00:00 2001
From: Maximilian Schmidt <maximilian.schmidt@ims.uni-stuttgart.de>
Date: Mon, 4 Mar 2024 18:30:56 +0100
Subject: [PATCH] Add more replacements for context for QA

---
 task.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/task.py b/task.py
index 529a62f..08948ba 100644
--- a/task.py
+++ b/task.py
@@ -258,9 +258,16 @@ class QuestionAnswering(Task):
         evaluation_usage = ModelUsage()
 
         def replace_symbol_for_grammar(sample: Mapping):
-            sample["context"] = sample["context"].replace("–", "-")
+            symbol_replacement_mapping = {
+                "\u2013": "-",
+                "\u2014": "-",
+            }
+            symbol_replacement_mapping = dict((re.escape(k), v) for k, v in symbol_replacement_mapping.items()) 
+            symbol_replacement_pattern = re.compile("|".join(symbol_replacement_mapping.keys()))
+            replace_fn = lambda text: symbol_replacement_pattern.sub(lambda m: symbol_replacement_mapping[re.escape(m.group(0))], text)
+            sample["context"] = replace_fn(sample["context"])
             sample["answers"]["text"] = [
-                text.replace("–", "-") for text in sample["answers"]["text"]
+                replace_fn(text) for text in sample["answers"]["text"]
             ]
             return sample
 
-- 
GitLab