From 1cd3206f5896a368ce88cdcad20c2b4e06cbb6ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E5=AE=87=E8=B1=AA?= Date: Tue, 26 Mar 2024 01:09:59 +0000 Subject: [PATCH] =?UTF-8?q?!1147=20=E4=BF=AE=E5=A4=8D=EF=BC=9A=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86bf16-dtype=E5=AD=97=E6=AE=B5=E4=BB=A5?= =?UTF-8?q?=E9=98=B2=E6=AD=A2=E5=BD=B1=E5=93=8D=E8=AE=AD=E7=BB=83=E7=B2=BE?= =?UTF-8?q?=E5=BA=A6=20Merge=20pull=20request=20!1147=20from=20=E9=BB=84?= =?UTF-8?q?=E5=AE=87=E8=B1=AA/master?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/baichuan/README.md | 1 + examples/baichuan/README_en.md | 1 + examples/baichuan2/README.md | 2 ++ examples/baichuan2/README_en.md | 2 ++ examples/llama2/README.md | 6 ++++-- examples/llama2/README_en.md | 3 ++- examples/mixtral/README.md | 6 ++++-- examples/mixtral/README_en.md | 6 ++++-- tools/checkpoint/loader_llama2_hf.py | 1 - 9 files changed, 20 insertions(+), 8 deletions(-) diff --git a/examples/baichuan/README.md b/examples/baichuan/README.md index 924b39e..a2e5806 100644 --- a/examples/baichuan/README.md +++ b/examples/baichuan/README.md @@ -345,6 +345,7 @@ python tools/checkpoint/util.py \ --load-dir ./baichuan-13B-hf \ --save-dir ./baichuan-13B-mt \ --tokenizer-model ./baichuan-13B-hf/tokenizer.model \ + --params-dtype bf16 \ --w-pack True ``` diff --git a/examples/baichuan/README_en.md b/examples/baichuan/README_en.md index 48a70bd..85038ac 100644 --- a/examples/baichuan/README_en.md +++ b/examples/baichuan/README_en.md @@ -338,6 +338,7 @@ python tools/checkpoint/util.py \ --load-dir ./baichuan-13B-hf \ --save-dir ./baichuan-13B-mt \ --tokenizer-model ./baichuan-13B-hf/tokenizer.model \ + --params-dtype bf16 \ --w-pack True ``` diff --git a/examples/baichuan2/README.md b/examples/baichuan2/README.md index d78631e..820d867 100644 --- a/examples/baichuan2/README.md +++ b/examples/baichuan2/README.md @@ -108,6 +108,7 @@ python tools/checkpoint/util.py \ --load-dir ./baichuan2-7B-hf \ --save-dir ./baichuan2-7B-mt \ --tokenizer-model ./baichuan2-7B-hf/tokenizer.model \ + --params-dtype bf16 \ --w-pack True ``` @@ -327,6 +328,7 @@ python tools/checkpoint/util.py \ --load-dir ./baichuan2-13B-hf \ --save-dir ./baichuan2-13B-mt \ --tokenizer-model ./baichuan2-13B-hf/tokenizer.model \ + --params-dtype bf16 \ --w-pack True ``` diff --git a/examples/baichuan2/README_en.md b/examples/baichuan2/README_en.md index 5d8388a..e8a12cc 100644 --- a/examples/baichuan2/README_en.md +++ b/examples/baichuan2/README_en.md @@ -112,6 +112,7 @@ python tools/checkpoint/util.py \ --load-dir ./baichuan2-7B-hf \ --save-dir ./baichuan2-7B-mt \ --tokenizer-model ./baichuan2-7B-hf/tokenizer.model \ + --params-dtype bf16 \ --w-pack True ``` Any Megatron weights with parallel slicing strategy --> Any Megatron weights with parallel slicing strategy @@ -331,6 +332,7 @@ python tools/checkpoint/util.py \ --load-dir ./baichuan2-13B-hf \ --save-dir ./baichuan2-13B-mt \ --tokenizer-model ./baichuan2-13B-hf/tokenizer.model \ + --params-dtype bf16 \ --w-pack True ``` diff --git a/examples/llama2/README.md b/examples/llama2/README.md index ca4800b..62c323a 100755 --- a/examples/llama2/README.md +++ b/examples/llama2/README.md @@ -769,7 +769,8 @@ pip install -r requirements.txt --target-pipeline-parallel-size 4 \ --load-dir ./llama2-70b-hf/ \ --save-dir ./load_ckpt \ - --tokenizer-model ./llama2-70b-hf/tokenizer.model + --tokenizer-model ./llama2-70b-hf/tokenizer.model \ + --params-dtype bf16 ``` 4.2 将Llama-2-34B权重从huggingface格式转换为megatron格式 @@ -786,7 +787,8 @@ pip install -r requirements.txt --target-pipeline-parallel-size 4 \ --load-dir ./codellama-34b-hf \ --save-dir ./load_ckpt \ - --tokenizer-model ./llama2-70b-hf/tokenizer.model + --tokenizer-model ./llama2-70b-hf/tokenizer.model \ + --params-dtype bf16 ``` 4.3 将Llama-2-70B权重从megatron格式转换为huggingface格式 diff --git a/examples/llama2/README_en.md b/examples/llama2/README_en.md index bdfd501..de54cbd 100644 --- a/examples/llama2/README_en.md +++ b/examples/llama2/README_en.md @@ -765,7 +765,8 @@ pip install -r requirements.txt --target-pipeline-parallel-size 4 \ --load-dir ./codellama-34b-hf \ --save-dir ./load_ckpt \ - --tokenizer-model ./llama2-70b-hf/tokenizer.model + --tokenizer-model ./llama2-70b-hf/tokenizer.model \ + --params-dtype bf16 ``` Any Megatron weights with parallel slicing strategy --> Any Megatron weights with parallel slicing strategy. diff --git a/examples/mixtral/README.md b/examples/mixtral/README.md index cd5a6cc..9582383 100644 --- a/examples/mixtral/README.md +++ b/examples/mixtral/README.md @@ -90,7 +90,8 @@ --tokenizer-model ../Mixtral-8x7B-v0.1/tokenizer.model \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 8 \ - --target-expert-parallel-size 2 + --target-expert-parallel-size 2 \ + --params-dtype bf16 cd .. ``` @@ -140,7 +141,8 @@ python ./tools/preprocess_data.py \ --tokenizer-model ../Mixtral-8x7B-v0.1/tokenizer.model \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 8 \ - --target-expert-parallel-size 2 + --target-expert-parallel-size 2 \ + --params-dtype bf16 ``` 2. 任意并行切分策略的Megatron权重 --> 任意并行切分策略的Megatron权重 ***(该场景一般用于重新配置切分后模型的权重,比如在双机16卡 EP2-PP8策略下训练完了,想在单机8卡 TP8上进行推理)*** diff --git a/examples/mixtral/README_en.md b/examples/mixtral/README_en.md index 5e8baa0..56787af 100644 --- a/examples/mixtral/README_en.md +++ b/examples/mixtral/README_en.md @@ -89,7 +89,8 @@ Recommended hardware configuration for inference: --tokenizer-model ../Mixtral-8x7B-v0.1/tokenizer.model \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 8 \ - --target-expert-parallel-size 2 + --target-expert-parallel-size 2 \ + --params-dtype bf16 cd .. ``` @@ -139,7 +140,8 @@ python ./tools/preprocess_data.py \ --tokenizer-model ../Mixtral-8x7B-v0.1/tokenizer.model \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 8 \ - --target-expert-parallel-size 2 + --target-expert-parallel-size 2 \ + --params-dtype bf16 ``` 2. Any Megatron weights with parallel slicing strategy --> Any Megatron weights with parallel slicing strategy ***(This scenario is generally used to reconfigure the sliced model weights, such as training on a dual-node 16-card EP2-PP8 strategy, and then wanting to infer on a single-node 8-card TP8)*** diff --git a/tools/checkpoint/loader_llama2_hf.py b/tools/checkpoint/loader_llama2_hf.py index 252b6ba..1a9d037 100644 --- a/tools/checkpoint/loader_llama2_hf.py +++ b/tools/checkpoint/loader_llama2_hf.py @@ -75,7 +75,6 @@ def load_args_from_checkpoint(args): args.use_rotary_position_embeddings = True args.swiglu = True args.tokenizer_type = "Llama2Tokenizer" - args.fp16 = True args.normalization = "RMSNorm" args.add_bias_linear = False args.untie_embeddings_and_output_weights = True