| |
| from transformers import FillMaskPipeline ,DistilBertTokenizer,TFAutoModelForMaskedLM,AutoTokenizer |
| from transformers import BertTokenizer |
|
|
| |
| tokenizer_path_1="./vocab.txt" |
| tokenizer_1 = BertTokenizer.from_pretrained(tokenizer_path_1) |
|
|
| |
| model_path="./bert_lm_10" |
| model_1 = TFAutoModelForMaskedLM.from_pretrained(model_path) |
|
|
| |
| unmasker = FillMaskPipeline(model=model_1,tokenizer=tokenizer_1) |
|
|
| |
| txt="a polynomial [MASK] from 3-SAT." |
| |
|
|
| |
| |
| |
| |
| |
| |
| default_name="bert-base-uncased" |
|
|
| tokenizer = AutoTokenizer.from_pretrained(default_name) |
|
|
| model = TFAutoModelForMaskedLM.from_pretrained(default_name) |
| unmasker_bert = FillMaskPipeline(model=model,tokenizer=tokenizer) |
| |
| |
| def unmask_words(txt_with_mask,k_suggestions=5): |
| results_cc=unmasker(txt_with_mask,top_k=k_suggestions) |
| |
| labels={} |
| for res in results_cc: |
| labels["".join(res["token_str"].split(" "))]=res["score"] |
| |
| results_bert=unmasker_bert(txt_with_mask,top_k=k_suggestions) |
| |
| labels_bert={} |
| for res in results_bert: |
| labels_bert["".join(res["token_str"].split(" "))]=res["score"] |
| |
| return labels,labels_bert |
| |
| |
|
|
| |
| |
|
|
| import gradio as gr |
| description="""This is a demo to show the Masked Language model pretrained on data collected from ~197k papers on arXiv consisting of mathematical proofs and theorems. |
| The aim of this interface is to show the difference between English and scientific English pretraining. |
| For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb) |
| or contact [mishra@di.ens.fr](mishra@di.ens.fr). |
| """ |
|
|
| examples=[["as pspace is [MASK] under complement."], |
| ["n!-(n-1)[MASK]"], |
| ["[MASK] these two classes is a major problem."], |
| ["This would show that the polynomial hierarchy at the second [MASK], which is considered only"], |
| ["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data, |
| and their combined [MASK]"""] |
| ] |
|
|
|
|
|
|
| input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:") |
| interface=gr.Interface(fn=unmask_words,inputs=[input_box, |
| gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")], |
| outputs=[gr.outputs.Label(label="top words:"),gr.outputs.Label(label="top words eng-bert:")], |
| examples=examples, |
| theme="darkhuggingface", |
| title="CC-Bert MLM",description=description,allow_flagging=True) |
| |
| interface.launch() |