{ "version": "1.0", "truncation": { "direction": "Left", "max_length": 512, "strategy": "LongestFirst", "stride": 0 }, "padding": null, "added_tokens": [ { "id": 0, "content": "^", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "&", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": " ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "?", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|-|\\+|\\\\|\\/|:|~|@|\\?|>>?|\\*|\\$|\\%[0-9]{2}|[0-9])" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "^", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "&", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "&": { "id": "&", "ids": [ 1 ], "tokens": [ "&" ] }, "^": { "id": "^", "ids": [ 0 ], "tokens": [ "^" ] } } }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "^": 0, "&": 1, " ": 2, "?": 3, "[MASK]": 4, "c": 5, "C": 6, "(": 7, ")": 8, "1": 9, "2": 10, "3": 11, "=": 12, "F": 13, "N": 14, "O": 15, "o": 16 }, "unk_token": "?" } }