Add HF-standard offline package (auto_map + modeling_kbert_mtl.py)

Browse files

Files changed (2) hide show

config.json +168 -26
modeling_kbert_mtl.py +52 -0

config.json CHANGED Viewed

@@ -1,39 +1,181 @@
 {
-  "_name_or_path": "klue/bert-base",
   "architectures": [
     "KbertMTL"
   ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "langquant_info": {
-    "base_model": "klue/bert-base",
     "head_structure": {
       "action": 6,
       "emotion": 7,
-      "regression": 3,
-      "sentiment": 5
     },
-    "license": "cc-by-nc-4.0",
-    "model_name": "LQ-KBERT-Base",
-    "organization": "LangQuant",
-    "source_ckpt": "lq-kbert-base.pt",
-    "task": "Crypto Market Korean Sentiment & Action Signal Classification",
-    "year": 2025
   },
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
-  "transformers_version": "4.27.0",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 32000
-}

 {
+  "return_dict": true,
+  "output_hidden_states": false,
+  "output_attentions": false,
+  "torchscript": false,
+  "torch_dtype": null,
+  "use_bfloat16": false,
+  "tf_legacy_loss": false,
+  "pruned_heads": {},
+  "tie_word_embeddings": true,
+  "is_encoder_decoder": false,
+  "is_decoder": false,
+  "cross_attention_hidden_size": null,
+  "add_cross_attention": false,
+  "tie_encoder_decoder": false,
+  "max_length": 20,
+  "min_length": 0,
+  "do_sample": false,
+  "early_stopping": false,
+  "num_beams": 1,
+  "num_beam_groups": 1,
+  "diversity_penalty": 0.0,
+  "temperature": 1.0,
+  "top_k": 50,
+  "top_p": 1.0,
+  "typical_p": 1.0,
+  "repetition_penalty": 1.0,
+  "length_penalty": 1.0,
+  "no_repeat_ngram_size": 0,
+  "encoder_no_repeat_ngram_size": 0,
+  "bad_words_ids": null,
+  "num_return_sequences": 1,
+  "chunk_size_feed_forward": 0,
+  "output_scores": false,
+  "return_dict_in_generate": false,
+  "forced_bos_token_id": null,
+  "forced_eos_token_id": null,
+  "remove_invalid_values": false,
+  "exponential_decay_length_penalty": null,
+  "suppress_tokens": null,
+  "begin_suppress_tokens": null,
   "architectures": [
     "KbertMTL"
   ],
+  "finetuning_task": null,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1"
+  },
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1
+  },
+  "tokenizer_class": null,
+  "prefix": null,
+  "bos_token_id": null,
+  "pad_token_id": 0,
+  "eos_token_id": null,
+  "sep_token_id": null,
+  "decoder_start_token_id": null,
+  "task_specific_params": null,
+  "problem_type": null,
+  "_name_or_path": "klue/bert-base",
+  "transformers_version": "4.27.0",
+  "model_type": "kbert_mtl",
+  "vocab_size": 32000,
+  "hidden_size": 768,
+  "num_hidden_layers": 12,
+  "num_attention_heads": 12,
   "hidden_act": "gelu",
+  "intermediate_size": 3072,
   "hidden_dropout_prob": 0.1,
+  "attention_probs_dropout_prob": 0.1,
+  "max_position_embeddings": 512,
+  "type_vocab_size": 2,
   "initializer_range": 0.02,
+  "layer_norm_eps": 1e-12,
+  "position_embedding_type": "absolute",
+  "use_cache": true,
+  "classifier_dropout": null,
+  "base_model_name_or_path": "klue/bert-base",
+  "base_model_config": {
+    "return_dict": true,
+    "output_hidden_states": false,
+    "output_attentions": false,
+    "torchscript": false,
+    "torch_dtype": null,
+    "use_bfloat16": false,
+    "tf_legacy_loss": false,
+    "pruned_heads": {},
+    "tie_word_embeddings": true,
+    "is_encoder_decoder": false,
+    "is_decoder": false,
+    "cross_attention_hidden_size": null,
+    "add_cross_attention": false,
+    "tie_encoder_decoder": false,
+    "max_length": 20,
+    "min_length": 0,
+    "do_sample": false,
+    "early_stopping": false,
+    "num_beams": 1,
+    "num_beam_groups": 1,
+    "diversity_penalty": 0.0,
+    "temperature": 1.0,
+    "top_k": 50,
+    "top_p": 1.0,
+    "typical_p": 1.0,
+    "repetition_penalty": 1.0,
+    "length_penalty": 1.0,
+    "no_repeat_ngram_size": 0,
+    "encoder_no_repeat_ngram_size": 0,
+    "bad_words_ids": null,
+    "num_return_sequences": 1,
+    "chunk_size_feed_forward": 0,
+    "output_scores": false,
+    "return_dict_in_generate": false,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "remove_invalid_values": false,
+    "exponential_decay_length_penalty": null,
+    "suppress_tokens": null,
+    "begin_suppress_tokens": null,
+    "architectures": [
+      "BertForMaskedLM"
+    ],
+    "finetuning_task": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "tokenizer_class": null,
+    "prefix": null,
+    "bos_token_id": null,
+    "pad_token_id": 0,
+    "eos_token_id": null,
+    "sep_token_id": null,
+    "decoder_start_token_id": null,
+    "task_specific_params": null,
+    "problem_type": null,
+    "_name_or_path": "klue/bert-base",
+    "transformers_version": "4.27.0",
+    "model_type": "bert",
+    "vocab_size": 32000,
+    "hidden_size": 768,
+    "num_hidden_layers": 12,
+    "num_attention_heads": 12,
+    "hidden_act": "gelu",
+    "intermediate_size": 3072,
+    "hidden_dropout_prob": 0.1,
+    "attention_probs_dropout_prob": 0.1,
+    "max_position_embeddings": 512,
+    "type_vocab_size": 2,
+    "initializer_range": 0.02,
+    "layer_norm_eps": 1e-12,
+    "position_embedding_type": "absolute",
+    "use_cache": true,
+    "classifier_dropout": null
+  },
+  "emo_threshold": 0.35,
   "langquant_info": {
+    "model_name": "LQ-KBERT-Base",
+    "organization": "LangQuant",
+    "year": 2025,
+    "license": "cc-by-nc-4.0",
+    "task": "Korean sentiment/action multi-task",
     "head_structure": {
+      "sentiment": 5,
       "action": 6,
       "emotion": 7,
+      "regression": 3
     },
+    "source_ckpt": "lq-kbert-base.pt"
   },
+  "auto_map": {
+    "AutoModel": "modeling_kbert_mtl.KbertMTL"
+  }
+}

modeling_kbert_mtl.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# modeling_kbert_mtl.py
+import torch
+import torch.nn as nn
+from transformers import PreTrainedModel, AutoModel, AutoConfig
+class KbertMTL(PreTrainedModel):
+    """
+    LangQuant KBERT Multi-Task Head (HF-standard, offline-friendly)
+    Outputs (dict):
+      - logits_senti: (B,5)
+      - logits_act:   (B,6)
+      - logits_emo:   (B,7)
+      - pred_reg:     (B,3)  # [certainty, relevance, toxicity] in 0~1 (권장)
+      - last_hidden_state: (B, L, H) from base encoder
+    """
+    def __init__(self, config):
+        super().__init__(config)
+        if not hasattr(config, "base_model_config") or config.base_model_config is None:
+            raise ValueError(
+                "config.base_model_config is required for offline load. "
+                "Make sure your config.json contains a serialized base model config."
+            )
+        base_cfg = AutoConfig.from_dict(config.base_model_config)
+        self.bert = AutoModel.from_config(base_cfg)
+        hidden = self.bert.config.hidden_size
+        self.head_senti = nn.Linear(hidden, 5)
+        self.head_act   = nn.Linear(hidden, 6)
+        self.head_emo   = nn.Linear(hidden, 7)
+        self.head_reg   = nn.Linear(hidden, 3)
+        self.has_token_type = getattr(self.bert.embeddings, "token_type_embeddings", None) is not None
+        self.post_init()
+    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, **kwargs):
+        kw = dict(input_ids=input_ids, attention_mask=attention_mask)
+        if self.has_token_type and token_type_ids is not None:
+            kw["token_type_ids"] = token_type_ids
+        out = self.bert(**kw)
+        h = out.last_hidden_state[:, 0]  # [CLS]
+        return {
+            "logits_senti": self.head_senti(h),
+            "logits_act":   self.head_act(h),
+            "logits_emo":   self.head_emo(h),
+            "pred_reg":     self.head_reg(h),
+            "last_hidden_state": out.last_hidden_state
+        }