文章目录
- 1.在线加载
- 2.模型下载
- 3.离线加载
- 4.模型加载参数
- 5.模型调用
- 5.1不带Model Head的模型调用
- 5.2带Model Head的模型调用
from transformers import AutoConfig, AutoModel, AutoTokenizer
1.在线加载
model = AutoModel.from_pretrained("hfl/rbt3", force_download=True)
2.模型下载
!git clone "https://huggingface.co/hfl/rbt3"
!git lfs clone "https://huggingface.co/hfl/rbt3" --include="*.bin"
# 过滤掉tf文件
3.离线加载
model = AutoModel.from_pretrained("rbt3")
4.模型加载参数
model = AutoModel.from_pretrained("rbt3")
model.config
'''
BertConfig {
"_name_or_path": "rbt3",
"architectures": [
"BertForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"directionality": "bidi",
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 3,
"output_past": true,
"pad_token_id": 0,
"pooler_fc_size": 768,
"pooler_num_attention_heads": 12,
"pooler_num_fc_layers": 3,
"pooler_size_per_head": 128,
"pooler_type": "first_token_transform",
...
"transformers_version": "4.28.1",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 21128
}
'''
config = AutoConfig.from_pretrained("./rbt3/")
查看参数同pipeline,看config的基类代码,具体方法也是
from transformers import BertConfig
5.模型调用
sen = "弱小的我也有大梦想!"
tokenizer = AutoTokenizer.from_pretrained("rbt3")
inputs = tokenizer(sen, return_tensors="pt")
inputs
'''
{
'input_ids': tensor([[ 101, 2483, 2207, 4638, 2769, 738, 3300, 1920, 3457, 2682, 8013, 102]]),
'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
'''
5.1不带Model Head的模型调用
model = AutoModel.from_pretrained("rbt3", output_attentions=True)
output = model(**inputs)
output
'''
BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 0.6804, 0.6664, 0.7170, ..., -0.4102, 0.7839, -0.0262],
[-0.7378, -0.2748, 0.5034, ..., -0.1359, -0.4331, -0.5874],
[-0.0212, 0.5642, 0.1032, ..., -0.3617, 0.4646, -0.4747],
...,
[ 0.0853, 0.6679, -0.1757, ..., -0.0942, 0.4664, 0.2925],
[ 0.3336, 0.3224, -0.3355, ..., -0.3262, 0.2532, -0.2507],
[ 0.6761, 0.6688, 0.7154, ..., -0.4083, 0.7824, -0.0224]]],
grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-1.2646e-01, -9.8619e-01, -1.0000e+00, -9.8325e-01, 8.0238e-01,
-6.6268e-02, 6.6919e-02, 1.4784e-01, 9.9451e-01, 9.9995e-01,
-8.3051e-02, -1.0000e+00, -9.8865e-02, 9.9980e-01, -1.0000e+00,
9.9993e-01, 9.8291e-01, 9.5363e-01, -9.9948e-01, -1.3219e-01,
-9.9733e-01, -7.7934e-01, 1.0720e-01, 9.8040e-01, 9.9953e-01,
-9.9939e-01, -9.9997e-01, 1.4967e-01, -8.7627e-01, -9.9996e-01,
-9.9821e-01, -9.9999e-01, 1.9396e-01, -1.1277e-01, 9.9359e-01,
-9.9153e-01, 4.4752e-02, -9.8731e-01, -9.9942e-01, -9.9982e-01,
2.9360e-02, 9.9847e-01, -9.2014e-03, 9.9999e-01, 1.7111e-01,
4.5071e-03, 9.9998e-01, 9.9467e-01, 4.9726e-03, -9.0707e-01,
6.9056e-02, -1.8141e-01, -9.8831e-01, 9.9668e-01, 4.9800e-01,
1.2997e-01, 9.9895e-01, -1.0000e+00, -9.9990e-01, 9.9478e-01,
-9.9989e-01, 9.9906e-01, 9.9820e-01, 9.9990e-01, -6.8953e-01,
9.9990e-01, 9.9987e-01, 9.4563e-01, -3.7660e-01, -1.0000e+00,
1.3151e-01, -9.7371e-01, -9.9997e-01, -1.3228e-02, -2.9801e-01,
-9.9985e-01, 9.9662e-01, -2.0004e-01, 9.9997e-01, 3.6876e-01,
-9.9997e-01, 1.5462e-01, 1.9265e-01, 8.9871e-02, 9.9996e-01,
9.9998e-01, 1.5184e-01, -8.9714e-01, -2.1646e-01, -9.9922e-01,
...
1.7911e-02, 4.8672e-01],
[4.0732e-01, 3.8137e-02, 9.6832e-03, ..., 4.4490e-02,
2.2997e-02, 4.0793e-01],
[1.7047e-01, 3.6989e-02, 2.3646e-02, ..., 4.6833e-02,
2.5233e-01, 1.6721e-01]]]], grad_fn=<SoftmaxBackward0>)), cross_attentions=None)
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
'''
output.last_hidden_state.size()
'''
torch.Size([1, 12, 768])
'''
5.2带Model Head的模型调用
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification
clz_model = AutoModelForSequenceClassification.from_pretrained("rbt3", num_labels=10)
clz_model(**inputs)
'''
SequenceClassifierOutput(loss=None, logits=tensor([[-0.1776, 0.2208, -0.5060, -0.3938, -0.5837, 1.0171, -0.2616, 0.0495,
0.1728, 0.3047]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
'''
clz_model.config.num_labels
# 2