forked from qscqesze/MiniMaxTinyModelGenerator
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerator.py
More file actions
115 lines (91 loc) · 3.4 KB
/
generator.py
File metadata and controls
115 lines (91 loc) · 3.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import json
from pathlib import Path
import torch
from huggingface_hub import hf_hub_download
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
GenerationConfig,
set_seed,
)
MODEL_ID = "MiniMaxAI/MiniMax-M2"
SAVE_FOLDER_TMP = Path("./minimax-m2-tiny-random-tmp")
SAVE_FOLDER = Path("./minimax-m2-tiny-random")
def prepare_tokenizer_and_generation_config(base_model_id: str, save_dir: Path):
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
tokenizer.save_pretrained(save_dir)
generation_config = GenerationConfig.from_pretrained(
base_model_id,
trust_remote_code=True,
)
generation_config.save_pretrained(save_dir)
def build_tiny_config(base_model_id: str, save_dir: Path):
with open(
hf_hub_download(base_model_id, filename="config.json", repo_type="model"),
"r",
encoding="utf-8",
) as f:
config_json = json.load(f)
config_json["head_dim"] = 128
config_json["hidden_size"] = 256
config_json["intermediate_size"] = 128
config_json["num_attention_heads"] = 2
config_json["num_experts_per_tok"] = 2
config_json["num_hidden_layers"] = 2
config_json["num_key_value_heads"] = 1
config_json["num_local_experts"] = 8
config_json["rotary_dim"] = 64
config_json["tie_word_embeddings"] = True
config_json.pop("auto_map", None)
with open(save_dir / "config.json", "w", encoding="utf-8") as f:
json.dump(config_json, f, indent=2, ensure_ascii=False)
def init_random_tiny_model(save_dir: Path):
config = AutoConfig.from_pretrained(
save_dir,
trust_remote_code=True,
)
model = AutoModelForCausalLM.from_config(config)
set_seed(42)
model = model.cpu()
print(model)
with torch.no_grad():
for name, p in sorted(model.named_parameters()):
torch.nn.init.normal_(p, mean=0.0, std=0.2)
model.save_pretrained(save_dir)
print("=" * 100)
print("Stage 1 model saved to:", save_dir)
def reload_with_quant_and_resave(save_dir_tmp: Path, save_dir: Path, device: str = "cuda"):
print("=" * 100)
print("Stage 2: reload with quant and resave")
model = AutoModelForCausalLM.from_pretrained(
save_dir_tmp,
trust_remote_code=True,
device_map=device,
)
model.save_pretrained(save_dir)
print("Stage 2 model saved to:", save_dir)
def test_loaded_model(save_dir: Path, device: str = "cuda"):
print("=" * 100)
print("Stage 3: Testing loaded model")
model = AutoModelForCausalLM.from_pretrained(
save_dir,
trust_remote_code=True,
device_map=device,
)
tokenizer = AutoTokenizer.from_pretrained(save_dir)
messages = [
{"role": "user", "content": "Hello, how are you?"}
]
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to(device)
outputs = model.generate(inputs.input_ids, max_new_tokens=100)
response = tokenizer.decode(outputs)
print(response)
if __name__ == "__main__":
SAVE_FOLDER_TMP.mkdir(parents=True, exist_ok=True)
SAVE_FOLDER.mkdir(parents=True, exist_ok=True)
build_tiny_config(MODEL_ID, SAVE_FOLDER_TMP)
init_random_tiny_model(SAVE_FOLDER_TMP)
prepare_tokenizer_and_generation_config(MODEL_ID, SAVE_FOLDER)
reload_with_quant_and_resave(SAVE_FOLDER_TMP, SAVE_FOLDER)
test_loaded_model(SAVE_FOLDER)