From 1cd3206f5896a368ce88cdcad20c2b4e06cbb6ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=BB=84=E5=AE=87=E8=B1=AA?= <huangyuhao18@huawei.com>
Date: Tue, 26 Mar 2024 01:09:59 +0000
Subject: [PATCH] =?UTF-8?q?!1147=20=E4=BF=AE=E5=A4=8D=EF=BC=9A=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0=E4=BA=86bf16-dtype=E5=AD=97=E6=AE=B5=E4=BB=A5?=
 =?UTF-8?q?=E9=98=B2=E6=AD=A2=E5=BD=B1=E5=93=8D=E8=AE=AD=E7=BB=83=E7=B2=BE?=
 =?UTF-8?q?=E5=BA=A6=20Merge=20pull=20request=20!1147=20from=20=E9=BB=84?=
 =?UTF-8?q?=E5=AE=87=E8=B1=AA/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/baichuan/README.md          | 1 +
 examples/baichuan/README_en.md       | 1 +
 examples/baichuan2/README.md         | 2 ++
 examples/baichuan2/README_en.md      | 2 ++
 examples/llama2/README.md            | 6 ++++--
 examples/llama2/README_en.md         | 3 ++-
 examples/mixtral/README.md           | 6 ++++--
 examples/mixtral/README_en.md        | 6 ++++--
 tools/checkpoint/loader_llama2_hf.py | 1 -
 9 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/examples/baichuan/README.md b/examples/baichuan/README.md
index 924b39e..a2e5806 100644
--- a/examples/baichuan/README.md
+++ b/examples/baichuan/README.md
@@ -345,6 +345,7 @@ python tools/checkpoint/util.py \
     --load-dir ./baichuan-13B-hf \
     --save-dir ./baichuan-13B-mt \
     --tokenizer-model ./baichuan-13B-hf/tokenizer.model \
+    --params-dtype bf16 \
     --w-pack True      
 ```
 
diff --git a/examples/baichuan/README_en.md b/examples/baichuan/README_en.md
index 48a70bd..85038ac 100644
--- a/examples/baichuan/README_en.md
+++ b/examples/baichuan/README_en.md
@@ -338,6 +338,7 @@ python tools/checkpoint/util.py \
     --load-dir ./baichuan-13B-hf \
     --save-dir ./baichuan-13B-mt \
     --tokenizer-model ./baichuan-13B-hf/tokenizer.model \
+    --params-dtype bf16 \
     --w-pack True  
 ```
 
diff --git a/examples/baichuan2/README.md b/examples/baichuan2/README.md
index d78631e..820d867 100644
--- a/examples/baichuan2/README.md
+++ b/examples/baichuan2/README.md
@@ -108,6 +108,7 @@ python tools/checkpoint/util.py \
     --load-dir ./baichuan2-7B-hf \
     --save-dir ./baichuan2-7B-mt \
     --tokenizer-model ./baichuan2-7B-hf/tokenizer.model \
+    --params-dtype bf16 \
     --w-pack True    
 ```
 
@@ -327,6 +328,7 @@ python tools/checkpoint/util.py \
     --load-dir ./baichuan2-13B-hf \
     --save-dir ./baichuan2-13B-mt \
     --tokenizer-model ./baichuan2-13B-hf/tokenizer.model \
+    --params-dtype bf16 \
     --w-pack True      
 ```
 
diff --git a/examples/baichuan2/README_en.md b/examples/baichuan2/README_en.md
index 5d8388a..e8a12cc 100644
--- a/examples/baichuan2/README_en.md
+++ b/examples/baichuan2/README_en.md
@@ -112,6 +112,7 @@ python tools/checkpoint/util.py \
     --load-dir ./baichuan2-7B-hf \
     --save-dir ./baichuan2-7B-mt \
     --tokenizer-model ./baichuan2-7B-hf/tokenizer.model \
+    --params-dtype bf16 \
     --w-pack True  
 ```
 Any Megatron weights with parallel slicing strategy --> Any Megatron weights with parallel slicing strategy
@@ -331,6 +332,7 @@ python tools/checkpoint/util.py \
     --load-dir ./baichuan2-13B-hf \
     --save-dir ./baichuan2-13B-mt \
     --tokenizer-model ./baichuan2-13B-hf/tokenizer.model \
+    --params-dtype bf16 \
     --w-pack True  
 ```
 
diff --git a/examples/llama2/README.md b/examples/llama2/README.md
index ca4800b..62c323a 100755
--- a/examples/llama2/README.md
+++ b/examples/llama2/README.md
@@ -769,7 +769,8 @@ pip install -r requirements.txt
     --target-pipeline-parallel-size 4 \
     --load-dir ./llama2-70b-hf/ \
     --save-dir ./load_ckpt \
-    --tokenizer-model ./llama2-70b-hf/tokenizer.model
+    --tokenizer-model ./llama2-70b-hf/tokenizer.model \
+    --params-dtype bf16 
   ```
 
 4.2 将Llama-2-34B权重从huggingface格式转换为megatron格式
@@ -786,7 +787,8 @@ pip install -r requirements.txt
      --target-pipeline-parallel-size 4 \
      --load-dir ./codellama-34b-hf \
      --save-dir ./load_ckpt \
-     --tokenizer-model ./llama2-70b-hf/tokenizer.model
+     --tokenizer-model ./llama2-70b-hf/tokenizer.model \
+     --params-dtype bf16 
     ```
 
 4.3 将Llama-2-70B权重从megatron格式转换为huggingface格式
diff --git a/examples/llama2/README_en.md b/examples/llama2/README_en.md
index bdfd501..de54cbd 100644
--- a/examples/llama2/README_en.md
+++ b/examples/llama2/README_en.md
@@ -765,7 +765,8 @@ pip install -r requirements.txt
      --target-pipeline-parallel-size 4 \
      --load-dir ./codellama-34b-hf \
      --save-dir ./load_ckpt \
-     --tokenizer-model ./llama2-70b-hf/tokenizer.model                                                               
+     --tokenizer-model ./llama2-70b-hf/tokenizer.model \
+     --params-dtype bf16                                                              
     ```
 
     Any Megatron weights with parallel slicing strategy --> Any Megatron weights with parallel slicing strategy.
diff --git a/examples/mixtral/README.md b/examples/mixtral/README.md
index cd5a6cc..9582383 100644
--- a/examples/mixtral/README.md
+++ b/examples/mixtral/README.md
@@ -90,7 +90,8 @@
         --tokenizer-model ../Mixtral-8x7B-v0.1/tokenizer.model \
         --target-tensor-parallel-size 1 \
         --target-pipeline-parallel-size 8 \
-        --target-expert-parallel-size 2 
+        --target-expert-parallel-size 2 \
+        --params-dtype bf16 
    cd ..
    ```
 
@@ -140,7 +141,8 @@ python ./tools/preprocess_data.py \
         --tokenizer-model ../Mixtral-8x7B-v0.1/tokenizer.model \
         --target-tensor-parallel-size 1 \
         --target-pipeline-parallel-size 8 \
-        --target-expert-parallel-size 2 
+        --target-expert-parallel-size 2 \
+        --params-dtype bf16 
     ```
 2. 任意并行切分策略的Megatron权重 --> 任意并行切分策略的Megatron权重
     ***（该场景一般用于重新配置切分后模型的权重，比如在双机16卡 EP2-PP8策略下训练完了，想在单机8卡 TP8上进行推理）***
diff --git a/examples/mixtral/README_en.md b/examples/mixtral/README_en.md
index 5e8baa0..56787af 100644
--- a/examples/mixtral/README_en.md
+++ b/examples/mixtral/README_en.md
@@ -89,7 +89,8 @@ Recommended hardware configuration for inference:
         --tokenizer-model ../Mixtral-8x7B-v0.1/tokenizer.model \
         --target-tensor-parallel-size 1 \
         --target-pipeline-parallel-size 8 \
-        --target-expert-parallel-size 2 
+        --target-expert-parallel-size 2 \
+        --params-dtype bf16 
     cd ..
    ```
 
@@ -139,7 +140,8 @@ python ./tools/preprocess_data.py \
         --tokenizer-model ../Mixtral-8x7B-v0.1/tokenizer.model \
         --target-tensor-parallel-size 1 \
         --target-pipeline-parallel-size 8 \
-        --target-expert-parallel-size 2 
+        --target-expert-parallel-size 2 \
+        --params-dtype bf16 
     ```
 2. Any Megatron weights with parallel slicing strategy --> Any Megatron weights with parallel slicing strategy
 ***(This scenario is generally used to reconfigure the sliced model weights, such as training on a dual-node 16-card EP2-PP8 strategy, and then wanting to infer on a single-node 8-card TP8)***
diff --git a/tools/checkpoint/loader_llama2_hf.py b/tools/checkpoint/loader_llama2_hf.py
index 252b6ba..1a9d037 100644
--- a/tools/checkpoint/loader_llama2_hf.py
+++ b/tools/checkpoint/loader_llama2_hf.py
@@ -75,7 +75,6 @@ def load_args_from_checkpoint(args):
     args.use_rotary_position_embeddings = True
     args.swiglu = True
     args.tokenizer_type = "Llama2Tokenizer"
-    args.fp16 = True
     args.normalization = "RMSNorm"
     args.add_bias_linear = False
     args.untie_embeddings_and_output_weights = True