Spaces:
Running
Running
from evaluate import load | |
import pandas as pd | |
import string | |
# Load SARI metric | |
sari = load("sari") | |
# Read the CSV | |
df = pd.read_csv("MT0_xxl_results/result_pt_80p") | |
def process_sentence(sentence): | |
if not isinstance(sentence, str): | |
return "" | |
sentence = sentence.split('\n')[0] | |
sentence = sentence.strip().lower() | |
for punctuation in string.punctuation: | |
sentence = sentence.replace(punctuation, "") | |
sentence = sentence.strip() | |
if sentence and sentence[-1] == '।': | |
sentence = sentence[:-1] | |
return sentence | |
# Process predictions | |
original = [process_sentence(s) for s in df['original']] | |
predicted = [process_sentence(s) for s in df['pred_label']] | |
# Assuming columns "ref1", "ref2", ... "refN" are reference columns | |
# Change ["ref1", "ref2", "refN"] to your actual column names | |
reference_columns = ["label1", "label2", "label3", "label4"] | |
references = [] | |
for _, row in df.iterrows(): | |
current_references = [process_sentence(row[col]) for col in reference_columns] | |
references.append(current_references) | |
# Compute SARI score | |
results = {} | |
results['sari'] = sari.compute(sources=original, predictions=predicted, references=references) | |
print(results) | |