Spaces:

HMPhuoc
/

toxic

Running

HMPhuoc commited on Apr 19, 2024

Commit

e5fcabb

1 Parent(s): 8256522

use bert model

Files changed (8) hide show

added_tokens.json DELETED Viewed

@@ -1,3 +0,0 @@
-{
-  "<mask>": 64000
-}

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ import unicodedata as ud
 from underthesea import word_tokenize
-#from phoBERT import BERT_predict
 # Load tokenizer
 # fp = Path(__file__).with_name('tokenizer.pkl')
@@ -86,14 +86,14 @@ def judge(x):
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)
-  # bert_pred = BERT_predict(x)
   #print(result)
   return_result = 'Result'
   result_lstm = np.round(lstm_pred, 2)
   result_gru = np.round(gru_pred, 2)
-  # result_bert = np.round(bert_pred, 2)
   for i in range(6):
-    result.append((result_lstm[i]+result_gru[i])/2)
   return (result)

 from underthesea import word_tokenize
+from phoBERT import BERT_predict
 # Load tokenizer
 # fp = Path(__file__).with_name('tokenizer.pkl')
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)
+  bert_pred = BERT_predict(x)
   #print(result)
   return_result = 'Result'
   result_lstm = np.round(lstm_pred, 2)
   result_gru = np.round(gru_pred, 2)
+  result_bert = np.round(bert_pred, 2)
   for i in range(6):
+    result.append((result_lstm[i]+result_gru[i]+result_bert[i])/3)
   return (result)

bpe.codes DELETED Viewed

The diff for this file is too large to render. See raw diff

phoBERT.py CHANGED Viewed

@@ -5,7 +5,7 @@ import __main__
 #phobert = AutoModel.from_pretrained("vinai/phobert-base")
-tokenizer = AutoTokenizer.from_pretrained("./")
 class PhoBertModel(torch.nn.Module):
   def __init__(self):
@@ -35,7 +35,7 @@ class PhoBertModel(torch.nn.Module):
 setattr(__main__, "PhoBertModel", PhoBertModel)
 def getModel():
-    model = torch.load('phoBertModel.pth', map_location=torch.device('cpu'))
     model.eval()
     return model

 #phobert = AutoModel.from_pretrained("vinai/phobert-base")
+tokenizer = AutoTokenizer.from_pretrained("./bert/bert_tokenizer")
 class PhoBertModel(torch.nn.Module):
   def __init__(self):
 setattr(__main__, "PhoBertModel", PhoBertModel)
 def getModel():
+    model = torch.load('./bert/phoBertModel.pth', map_location=torch.device('cpu'))
     model.eval()
     return model

phoBertModel.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d5fca9d837d05b1e8330798e32a59b5200bf677d5cf2f178727dcd131c86230b
-size 542499629

special_tokens_map.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "bos_token": "<s>",
-  "cls_token": "<s>",
-  "eos_token": "</s>",
-  "mask_token": "<mask>",
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "unk_token": "<unk>"
-}

tokenizer_config.json DELETED Viewed

@@ -1,54 +0,0 @@
-{
-  "added_tokens_decoder": {
-    "0": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<pad>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "3": {
-      "content": "<unk>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "64000": {
-      "content": "<mask>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "bos_token": "<s>",
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "<s>",
-  "eos_token": "</s>",
-  "mask_token": "<mask>",
-  "model_max_length": 256,
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "tokenizer_class": "PhobertTokenizer",
-  "unk_token": "<unk>"
-}

vocab.txt DELETED Viewed

The diff for this file is too large to render. See raw diff