MNIST_Example/run.sh

27 lines
650 B
Bash

#!/bin/bash
set -e
EXEC_PATH=$(pwd)
export RANK_SIZE=2
test_dist_2pcs()
{
export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json
export RANK_SIZE=2
}
test_dist_${RANK_SIZE}pcs
for((i=0;i<2;i++))
do
rm -rf device$i
mkdir device$i
cp ./train_dataparallel_debug_env.py ./config.py ./lenet.py ./dataset_distributed.py ./device$i
cd ./device$i
export DEVICE_ID=$i
export RANK_ID=$i
echo "start training for device $i"
env > env$i.log
python ./train_dataparallel_debug_env.py > train_dataparallel_debug_env.log$i 2>&1 &
cd ../
done
echo "The program launch succeed, the log is under device0/train.log0."