Commit 3cb276f1 authored by Juliette Faille's avatar Juliette Faille
Browse files

initial commit

parent 93f37332
__author__='jfaille'
import os
import json
import copy
def last_line_of_file(path_file):
with open(path_file) as file:
content = file.readlines()
content = [x.strip() for x in content]
if len(content)>2:
return content[-1], content[-3]
else:
print('Error: ', path_file)
def parse_results(line_results):
results=line_results.split(' ')
results=[r for r in results if len(r)>0]
return results
def add_results_to_dict(path):
list_dict_results=[]
for name_participant in os.listdir(path):
print('path', path+'/'+name_participant)
for line_result in os.listdir(path+'/'+name_participant):
dict_results = {}
dict_results['submission_id'] = name_participant
dict_results['sample_id']=line_result
print(path+'/'+name_participant+'/'+line_result)
if last_line_of_file(path+'/'+name_participant+'/'+line_result) is not None:
line_scores, line_headers = last_line_of_file(path+'/'+name_participant+'/'+line_result)
list_scores = parse_results(line_scores)
list_name_metrics = parse_results(line_headers)
for i in range(len(list_scores)):
dict_results[list_name_metrics[i]]=list_scores[i]
list_dict_results.append(dict_results)
return list_dict_results
def open_json(path_json):
dict_file = open(path_json, "r")
dict_file_read = dict_file.read()
dict_str = json.loads(dict_file_read)
dict_file.close()
return dict_str
def merge_results_auto_human(list_dict_auto, list_dict_human):
list_dict_all=[]
for dict_auto in list_dict_auto:
dict_all = copy.deepcopy(dict_auto)
for dict_human in list_dict_human:
if dict_human['sample_id'] == dict_auto['sample_id'] and dict_human['submission_id'] == translation[dict_auto['submission_id']]:
dict_all["Correctness"]=dict_human["Correctness"]
dict_all["DataCoverage"] = dict_human["DataCoverage"]
dict_all["Fluency"] = dict_human["Fluency"]
dict_all["Relevance"] = dict_human["Relevance"]
dict_all["TextStructure"] = dict_human["TextStructure"]
if "TextStructure" in dict_all:
list_dict_all.append(dict_all)
else:
print( dict_auto['submission_id'])
return list_dict_all
translation = {
"FBConvAI_34": "FBConvAI",
"DANGNT": "DANGNT-SGU",
"OSU_Neural_NLG_30": "OSU_Neural_NLG",
"Huawei_Noahs_Ark_Lab_2_17": "Huawei_Noahs_Ark_Lab",
"baseline_FORGe": "Baseline-FORGE2017",
"TGen": "TGen",
"NUIG-DSI": "NUIG-DSI",
"ORANGE-NLG": "ORANGE-NLG",
"Amazon_AI_(Shanghai)": "Amazon_AI_(Shanghai)",
"CycleGT": "CycleGT",
"RALI": "RALI",
"NILC": "NILC",
"bt5": "bt5",
"UPC-POE": "UPC-POE",
"cuni-ufal": "cuni-ufal",
}
if __name__ == '__main__':
input_path_raw_results='/home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_pred_2020/results'
input_path_human='/home/jfaille/Documents/webnlg_pipeline_analysis/webnlg_analysis/data_correlation/english_humeval_data_all_teams.json'
#input_path_human = '/home/jfaille/Documents/webnlg_pipeline_analysis/webnlg_analysis/data_correlation/2020_old/final_scores_human_eval_and_auto.json'
#path_file='/home/jfaille/Documents/webnlg_pipeline_analysis/5'
output_path_list_dict_results='/home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_pred_2020/dict_results.json'
list_dict_results = add_results_to_dict(input_path_raw_results)
output_path_allresults='/home/jfaille/Documents/webnlg_pipeline_analysis/webnlg_analysis/data_correlation/final_scores_human_eval_and_auto.json'
list_dict_results_human_eval=open_json(input_path_human)
print('list_dict_results_human_eval', len(list_dict_results_human_eval))
#print(list_dict_results)
print('auto', len(list_dict_results))
list_dict_all_results=merge_results_auto_human(list_dict_results, list_dict_results_human_eval)
#print(list_dict_all_results)
print('human+auto', len(list_dict_all_results))
#for dict_result in list_dict_results:
# print((len(dict_result)))
#with open(output_path_list_dict_results, 'w+') as fout:
# json.dump(list_dict_results, fout)
with open(output_path_allresults, 'w+') as fout:
json.dump(list_dict_all_results, fout)
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
["Agremiação Sportiva Arapiraquense is a club that plays in Brazil's Campeonato Brasileiro Série C league. Also known as Alvinegro, the club represents 17,000 members.", 'Agremiacao Sportiva Arapiraquense, known as Alvinegro play in the Campeonato Brasileiro Série C league from Brazil has 17000 members.', 'The 17000 member group Agremiação Sportiva Arapiraquense, nicknamed Alvinegro, play in the Campeonato Brasileiro Série C league from Brazil.'] ['The nickname of Agremiação Sportiva Arapiraquense is "Alvinegro". They have 17000 members and play in the Campeonato Brasileiro Série C league in Brazil.', 'The nickname of Agremiação Sportiva Arapiraquense is "Alvinegro". They have 17000 members and play in the Campeonato Brasileiro Série C league in Brazil.', 'The nickname of Agremiação Sportiva Arapiraquense is "Alvinegro". They have 17000 members and play in the Campeonato Brasileiro Série C league in Brazil.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
40.15 0.39 0.41 0.72 0.77 0.95 0.96 0.96 0.34
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
["Nie Haisheng is from Hubei in the People's Republic of China.", "Nie Haisheng was born in Hubei province and is a citizen of the people's Republic of China.", "Nie Haisheng was born in Hubei in the People's Republic of China."] ["Nie Haisheng was born in Hubei and is a national of the People's Republic of China.", "Nie Haisheng was born in Hubei and is a national of the People's Republic of China.", "Nie Haisheng was born in Hubei and is a national of the People's Republic of China."]
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
62.79 0.65 0.55 0.86 0.58 0.98 0.98 0.98 0.51
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['The 11th Mississippi Infantry Monument, established in the year 2000, is located in Adams County Pennsylvania which is east of Franklin County Pennsylvania.', 'To the east of Franklin County, Pennsylvania is Adams County, Pennsylvania where the 11th Mississippi Infantry Monument was established in 2000.', "Pennsylvania's Franklin County is found to the west of Adams County, the location of the 11th Mississippi Infantry Monument, erected in 2000."] ['The 11th Mississippi Infantry Monument, established in 2000, is located in Adams County, Pennsylvania, which has Franklin County to its west.', 'The 11th Mississippi Infantry Monument, established in 2000, is located in Adams County, Pennsylvania, which has Franklin County to its west.', 'The 11th Mississippi Infantry Monument, established in 2000, is located in Adams County, Pennsylvania, which has Franklin County to its west.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
61.81 0.62 0.44 0.72 0.67 0.96 0.97 0.96 0.37
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/00
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
['eval.py', '-R', '/home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/', '-H', '/home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_pred_2020/tmp.txt', '-nr', '1', '-m', 'bleu,meteor,chrf++,ter,bert,bleurt']
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
['eval.py', '-R', '/home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/0', '-H', '/home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_pred_2020/tmp.txt', '-nr', '1', '-m', 'bleu,meteor,chrf++,ter,bert,bleurt']
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/0
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['Anatole de Grunwald died in London.'] ['Anatole de Grunwald died in London.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
100 1 1 1 0.5 1 1 1 0.99
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['The Acharya Institute of Technology which is affiliated to Visvesvaraya Technological University has 700 postgraduate students.', 'The Acharya Institute of Technology is affiliated with Visvesvaraya Technological University and has about 700 post-graduate students.', 'The Acharya Institute of Technology, with around 700 postgraduate students, is affiliated with the Visvesvaraya Technological University.'] ['The Acharya Institute of Technology is affiliated to the Visvesvaraya Technological University and has 700 postgraduate students.', 'The Acharya Institute of Technology is affiliated to the Visvesvaraya Technological University and has 700 postgraduate students.', 'The Acharya Institute of Technology is affiliated to the Visvesvaraya Technological University and has 700 postgraduate students.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
82.3 0.82 0.53 0.87 0.59 1 0.99 0.99 0.69
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['Trane, which was founded on January 1, 1913 in La Crosse, Wisconsin (a city 204 meters above sea level) and has a revenue of $10,264,000,000.', 'Trane was founded on 1-1-1913 in LaCrosse, Wisconsin, and has revenue of $10,264,000,000. LaCrosse is 204 meters above sea level.', 'Trane, with a revenue of $10,264,000,000.00, was founded on January 1, 1913 in La Crosse, Wisconsin. La Crosse sits 204 meters above sea level.'] ['Trane, a company with a revenue of $10,264,000,000, was founded in La Crosse, Wisconsin on January 1, 1913. La Crosse is located at 204.0 above sea level.', 'Trane, a company with a revenue of $10,264,000,000, was founded in La Crosse, Wisconsin on January 1, 1913. La Crosse is located at 204.0 above sea level.', 'Trane, a company with a revenue of $10,264,000,000, was founded in La Crosse, Wisconsin on January 1, 1913. La Crosse is located at 204.0 above sea level.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
50.18 0.5 0.46 0.76 0.69 0.96 0.97 0.97 0.6
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['Lady Anne Monson lived in the Kingdom of England and held British nationality but resided in India for most of her life. Pranab Mukherjee is the leader of India, which has a total area of 32875900000000.', 'Lady Anne Monson is a British national who currently resides in India, a country of 3,287,590 square kilometers led by Pranab Mukherjee.', 'Lady Anne Monson was born in England but had her residence in India which has total area of 3287590000000.0. Pranab Mukherjee was a leader of India.'] ['Lady Anne Monson is a national of the Kingdom of England and resides in India, a country with a total area of 3287590000000.0 and led by Pranab Mukherjee.', 'Lady Anne Monson is a national of the Kingdom of England and resides in India, a country with a total area of 3287590000000.0 and led by Pranab Mukherjee.', 'Lady Anne Monson is a national of the Kingdom of England and resides in India, a country with a total area of 3287590000000.0 and led by Pranab Mukherjee.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
62.41 0.62 0.38 0.59 0.79 0.93 0.93 0.92 0.24
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['Super Capers made $30,955 and starred infamous actor, Adam West. It was written and directed by Ray Griggs.', 'The film Super Capers, which grossed $30,955, is both directed and written by Ray Griggs, and it stars Adam West.', 'Super Capers is written and directed by Ray Griggs. It stars Adam West and accumulated a box office of $30,955.'] ['Super Capers, starring Adam West, was written by Ray Griggs and directed by Ray Griggs. It has a gross of $30955.0.', 'Super Capers, starring Adam West, was written by Ray Griggs and directed by Ray Griggs. It has a gross of $30955.0.', 'Super Capers, starring Adam West, was written by Ray Griggs and directed by Ray Griggs. It has a gross of $30955.0.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
38.62 0.39 0.34 0.56 0.8 0.93 0.93 0.93 0.55
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['Darinka Dentcheva, a citizen of the United States of America, attended Humboldt University and is known for her work with stochastic programming.', 'Darinka Dentcheva, a stochastic programmer and graduate of Humboldt University, is a citizen of the United States of America.', "Darinka Dentcheva, United States of America ('United States') citizen and Humboldt University graduate, is known for her work with stochastic programming."] ['The long name of the United States is the United States of America. Darinka Dentcheva was born in the United States and studied at Humboldt University. She is known for Stochastic programming.', 'The long name of the United States is the United States of America. Darinka Dentcheva was born in the United States and studied at Humboldt University. She is known for Stochastic programming.', 'The long name of the United States is the United States of America. Darinka Dentcheva was born in the United States and studied at Humboldt University. She is known for Stochastic programming.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
24.08 0.24 0.36 0.62 0.96 0.91 0.93 0.92 -0.27
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['The MotorSport Vision operated Bedford Aerodrome has a runway length of 1095.0.', 'Bedford Aerodrome operated by MotorSport Vision has the runway length of 1095.', 'Bedford Aerodrome is run by MotorSport Vision and has a runway length of 1095.'] ['Bedford Aerodrome, operated by MotorSport Vision, has a runway length of 1095.0.', 'Bedford Aerodrome, operated by MotorSport Vision, has a runway length of 1095.0.', 'Bedford Aerodrome, operated by MotorSport Vision, has a runway length of 1095.0.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
61.05 0.61 0.5 0.82 0.63 0.97 0.98 0.97 0.82
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
["Aleksandr Prudnikov plays for FC Terek Grozny (based at Grozny) and managed by Rashid Rakhimov.Aleksandr Prudnikov played for FC Spartak Moscow's youth team an his current club is FC Amkar Perm.", 'Aleksandr Prudnikov is in the FC Terek Grozny club (based in Grozny) and managed by Rashid Rakhimov. He is a member of the youth side of FC Spartak Moscow and also plays for FC Amkar Perm.', 'Aleksandr Prudnikov youth football club was FC Spartak Moscow and he also played for FC Terek Grozny located in Grozny and managed by Rashid Rakhimov. Aleksandr Prudnikov currently plays for FC Amkar Perm.'] ['FC Spartak Moscow youth player, Aleksandr Prudnikov, currently plays for FC Amkar Perm. He has also played for FC Terek Grozny, the ground of which, is located in Grozny and which is managed by Rashid Rakhimov.', 'FC Spartak Moscow youth player, Aleksandr Prudnikov, currently plays for FC Amkar Perm. He has also played for FC Terek Grozny, the ground of which, is located in Grozny and which is managed by Rashid Rakhimov.', 'FC Spartak Moscow youth player, Aleksandr Prudnikov, currently plays for FC Amkar Perm. He has also played for FC Terek Grozny, the ground of which, is located in Grozny and which is managed by Rashid Rakhimov.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
44.87 0.45 0.43 0.7 0.77 0.93 0.94 0.94 0.13
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['The title of the leader of Turkey is "President".', 'The title for the leader of Turkey is President.', 'One of the leaders of Turkey is the president.'] ["Turkey's leader has the title of President.", "Turkey's leader has the title of President.", "Turkey's leader has the title of President."]
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
0 0.14 0.37 0.5 0.8 0.93 0.92 0.92 0.42
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['The founding date of the German Empire was the first of January 1871.', 'The founding date of the German Empire is 1871-01-01.', 'The German Empire was founded on January 1, 1871.'] ['The German Empire was founded on 1871-01-01.', 'The German Empire was founded on 1871-01-01.', 'The German Empire was founded on 1871-01-01.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
83.45 0.6 0.4 0.68 0.68 0.96 0.97 0.97 0.43
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['Al Asad Airbase has a runway length of 3,992.88 and is run by the United States Air Force.', 'The operating organisation for Al Asad airbase, which has a runway length of 3992.88, is the United States Air Force.', 'Al Asad Airbase is operated by the United States Air Force and has a runway length of 3,992.88.'] ['The United States Air Force is the operating organisation for Al Asad airbase which has a runway length of 3992.88.', 'The United States Air Force is the operating organisation for Al Asad airbase which has a runway length of 3992.88.', 'The United States Air Force is the operating organisation for Al Asad airbase which has a runway length of 3992.88.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
66.28 0.66 0.53 0.88 0.63 0.97 0.97 0.97 0.4
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['India was founded on January 26, 1950.', 'India was founded on the 26th of January in 1950.', 'India was founded on January 26th, 1950.'] ['The founding date of India is 1950-01-26.', 'The founding date of India is 1950-01-26.', 'The founding date of India is 1950-01-26.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
0 0.06 0.27 0.23 0.91 0.89 0.92 0.9 0.34
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
["John Mills had a main role in the 94 minute movie It's Great to Be Young which took 282838 pounds at the box office. Louis Levy composed the music for the movie which had Gilbert Taylor as the cinematographer.", "The 1956 film “ It's Great to Be Young” cost £282,838 to produce and is 94 minutes in length. Louis Levy composed music for the film and Gilbert Taylor did the cinematography. The movie stars John Mills.", "It's Great to Be Young, the 94-minute, 1956 film, received £282,838. The lead was played by John Mills, and the music was composed by Louis Levy. Gilbert Taylor was responsible for the cinematography."] ["The musical score of It's Great to Be Young (1956 film) was composed by Louis Levy and starred John Mills. The film was directed by Gilbert Taylor and has a runtime of 94 minutes and a gross of 282838.0.", "The musical score of It's Great to Be Young (1956 film) was composed by Louis Levy and starred John Mills. The film was directed by Gilbert Taylor and has a runtime of 94 minutes and a gross of 282838.0.", "The musical score of It's Great to Be Young (1956 film) was composed by Louis Levy and starred John Mills. The film was directed by Gilbert Taylor and has a runtime of 94 minutes and a gross of 282838.0."]
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
24.67 0.27 0.31 0.48 0.84 0.9 0.91 0.9 0.14
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['The campus of the University of Burgundy is located in city of Dijon in France.', 'The University of Burgundy is located in France with its campus in the city of Dijon.', 'The University of Burgundy campus is located in Dijon in France.'] ['The University of Burgundy is located in Dijon, France.', 'The University of Burgundy is located in Dijon, France.', 'The University of Burgundy is located in Dijon, France.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
67.75 0.68 0.45 0.76 0.58 0.99 0.97 0.98 0.58
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['The Fellowship of the Ring, which was released on July 29, 1954, was written by J.R.R. Tolkien.', 'The Fellowship of the Ring, written by J.R.R. Tolkien, was published on July 29, 1954.', 'The Fellowship of the Ring, written by J.R.R. Tolkien, was released on July 29, 1954.'] ['The Fellowship of the Ring, written by J.R.R. Tolkien, was released on July 29, 1954.', 'The Fellowship of the Ring, written by J.R.R. Tolkien, was released on July 29, 1954.', 'The Fellowship of the Ring, written by J.R.R. Tolkien, was released on July 29, 1954.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
100 1 1 1 0.5 1 1 1 0.96
refs_path : /home/jfaille/Documents/webnlg_pipeline_analysis/GenerationEval/data_ref_2020/
STARTING TO PARSE INPUTS...
FINISHING TO PARSE INPUTS...
STARTING TO COMPUTE BLEU...
FINISHING TO COMPUTE BLEU...
STARTING TO COMPUTE METEOR...
FINISHING TO COMPUTE METEOR...
STARTING TO COMPUTE CHRF++...
FINISHING TO COMPUTE CHRF++...
STARTING TO COMPUTE TER...
FINISHING TO COMPUTE TER...
STARTING TO COMPUTE BERT SCORE...
The problem
['The Train song Mermaid was written by Amund Bjorklund.', 'The Train song Mermaid was written by Amund Bjørklund.', 'The Train song, Mermaid, was written by Amund Bjørklund.'] ['Mermaid (Train song) was written by Amund Bjorklund.', 'Mermaid (Train song) was written by Amund Bjorklund.', 'Mermaid (Train song) was written by Amund Bjorklund.']
BLEU BLEU NLTK METEOR chrF++ TER BERT-SCORE P BERT-SCORE R BERT-SCORE F1 BLEURT
------ ----------- -------- -------- ----- -------------- -------------- --------------- --------
53.48 0.53 0.49 0.8 0.65 0.95 0.96 0.96 0.66
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment