| from nltk.translate.bleu_score import sentence_bleu,SmoothingFunction | |
| from nltk.tokenize import word_tokenize | |
| from utils.tokenizer import tokenize | |
| import re | |
| def is_korean(text): | |
| for char in text: | |
| if '가' <= char <= '힣': | |
| return True | |
| return False | |
| def simple_score(text1, text2): | |
| text1 = re.sub("\n", " ", text1) | |
| text2 = re.sub("\n", " ", text2) | |
| if is_korean(text1): | |
| reference = tokenize(text1) | |
| candidate = tokenize(text2) | |
| else: | |
| reference = word_tokenize(text1.lower()) | |
| candidate = word_tokenize(text2.lower()) | |
| # base = sentence_bleu([reference], reference) | |
| score = sentence_bleu([reference], candidate, smoothing_function=SmoothingFunction().method2) | |
| return score | |
| if __name__ == "__main__": | |
| lang = input('lang(en,ko)>') | |
| while True: | |
| ref = input("ref: ") | |
| cand = input("cand: ") | |
| print('score',simple_score(ref, cand, lang)) | |