From 7f95e4ed78bcf37d22825cd85e8b4e458f3661d8 Mon Sep 17 00:00:00 2001 From: liuzx Date: Fri, 5 May 2023 15:42:47 +0800 Subject: [PATCH] update --- inference.py | 100 ++++++--------------- inference_for_multidataset.py | 158 ---------------------------------- pretrain.py | 2 +- upload.py | 14 --- upload_for_c2net.py | 6 -- 5 files changed, 27 insertions(+), 253 deletions(-) delete mode 100644 inference_for_multidataset.py delete mode 100644 upload.py delete mode 100644 upload_for_c2net.py diff --git a/inference.py b/inference.py index 73869af..73da324 100644 --- a/inference.py +++ b/inference.py @@ -1,85 +1,36 @@ + """ -######################## single-dataset inference lenet example ######################## -This example is a single-dataset inference tutorial. - -######################## Instructions for using the inference environment ######################## -1、Inference task requires predefined functions -(1)Copy single dataset from obs to inference image. -function ObsToEnv(obs_data_url, data_dir) - -(2)Copy ckpt file from obs to inference image. -function ObsUrlToEnv(obs_ckpt_url, ckpt_url) - -(3)Copy the output result to obs. -function EnvToObs(train_dir, obs_train_url) - -3、4 parameters need to be defined. ---data_url is the dataset you selected on the Qizhi platform ---ckpt_url is the weight file you choose on the Qizhi platform - ---data_url,--ckpt_url,--result_url,--device_target,These 4 parameters must be defined first in a single dataset, -otherwise an error will be reported. -There is no need to add these parameters to the running parameters of the Qizhi platform, -because they are predefined in the background, you only need to define them in your code. - -4、How the dataset is used -Inference task uses data_url as the input, and data_dir (ie: '/cache/data') as the calling method -of the dataset in the image. -For details, please refer to the following sample code. +使用注意事项: +1、本示例需要用户定义的参数有--multi_data_url,--pretrain_url,--result_url,这3个参数任务中必须定义 +具体的含义如下: +--multi_data_url是启智平台上选择的数据集的obs路径 +--pretrain_url是启智平台上选择的预训练模型文件的obs路径 +--result_url是训练结果回传到启智平台的obs路径 +2、用户需要调用OpenI.py下的DatasetToEnv,PretrainToEnv,UploadToOpenI等函数,来实现数据集、预训练模型文件、训练结果的拷贝和回传 """ import os import argparse -import moxing as mox import mindspore.nn as nn +import numpy as np from mindspore import context from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train import Model -from mindspore.nn.metrics import Accuracy from mindspore import Tensor -import numpy as np -from glob import glob from dataset import create_dataset from config import mnist_cfg as cfg from lenet import LeNet5 +from OpenI import OpenIMultiDatasetToEnv as DatasetToEnv +from OpenI import OpenIPretrainToEnv as PretrainToEnv +from OpenI import EnvToOpenI -### Copy single dataset from obs to inference image ### -def ObsToEnv(obs_data_url, data_dir): - try: - mox.file.copy_parallel(obs_data_url, data_dir) - print("Successfully Download {} to {}".format(obs_data_url, data_dir)) - except Exception as e: - print('moxing download {} to {} failed: '.format(obs_data_url, data_dir) + str(e)) - return -### Copy ckpt file from obs to inference image### -### To operate on folders, use mox.file.copy_parallel. If copying a file. -### Please use mox.file.copy to operate the file, this operation is to operate the file -def ObsUrlToEnv(obs_ckpt_url, ckpt_url): - try: - mox.file.copy(obs_ckpt_url, ckpt_url) - print("Successfully Download {} to {}".format(obs_ckpt_url,ckpt_url)) - except Exception as e: - print('moxing download {} to {} failed: '.format(obs_ckpt_url, ckpt_url) + str(e)) - return -### Copy the output result to obs### -def EnvToObs(train_dir, obs_train_url): - try: - mox.file.copy_parallel(train_dir, obs_train_url) - print("Successfully Upload {} to {}".format(train_dir,obs_train_url)) - except Exception as e: - print('moxing upload {} to {} failed: '.format(train_dir,obs_train_url) + str(e)) - return -### --data_url,--ckpt_url,--result_url,--device_target,These 4 parameters must be defined first in a inference task, -### otherwise an error will be reported. -### There is no need to add these parameters to the running parameters of the Qizhi platform, -### because they are predefined in the background, you only need to define them in your code. parser = argparse.ArgumentParser(description='MindSpore Lenet Example') -parser.add_argument('--data_url', +parser.add_argument('--multi_data_url', type=str, default= '/cache/data/', help='path where the dataset is saved') -parser.add_argument('--ckpt_url', +parser.add_argument('--pretrain_url', help='model to save/load', default= '/cache/checkpoint.ckpt') parser.add_argument('--result_url', @@ -93,18 +44,20 @@ if __name__ == "__main__": ###Initialize the data and result directories in the inference image### data_dir = '/cache/data' + pretrain_dir = '/cache/pretrain' result_dir = '/cache/result' - ckpt_url = '/cache/checkpoint.ckpt' if not os.path.exists(data_dir): os.makedirs(data_dir) + if not os.path.exists(pretrain_dir): + os.makedirs(pretrain_dir) if not os.path.exists(result_dir): - os.makedirs(result_dir) + os.makedirs(result_dir) - ###Copy dataset from obs to inference image - ObsToEnv(args.data_url, data_dir) + ###拷贝数据集到训练环境 + DatasetToEnv(args.multi_data_url, data_dir) - ###Copy ckpt file from obs to inference image - ObsUrlToEnv(args.ckpt_url, ckpt_url) + ###拷贝预训练模型文件到训练环境 + PretrainToEnv(args.pretrain_url, pretrain_dir) context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) network = LeNet5(cfg.num_classes) @@ -115,9 +68,9 @@ if __name__ == "__main__": print("============== Starting Testing ==============") - param_dict = load_checkpoint(os.path.join(ckpt_url)) + param_dict = load_checkpoint(os.path.join(pretrain_dir, "checkpoint_lenet-1_1875.ckpt")) load_param_into_net(network, param_dict) - ds_test = create_dataset(os.path.join(data_dir, "test"), batch_size=1).create_dict_iterator() + ds_test = create_dataset(os.path.join(data_dir + "/MNISTData", "test"), batch_size=1).create_dict_iterator() data = next(ds_test) images = data["image"].asnumpy() labels = data["label"].asnumpy() @@ -134,6 +87,5 @@ if __name__ == "__main__": with open(file_path, 'a+') as file: file.write(" {}: {:.2f} \n".format("Predicted", predicted[0])) - ###Copy result data from the local running environment back to obs, - ###and download it in the inference task corresponding to the Qizhi platform - EnvToObs(result_dir, args.result_url) \ No newline at end of file + ###上传训练结果到启智平台 + EnvToOpenI(result_dir, args.result_url) \ No newline at end of file diff --git a/inference_for_multidataset.py b/inference_for_multidataset.py deleted file mode 100644 index 31f9a47..0000000 --- a/inference_for_multidataset.py +++ /dev/null @@ -1,158 +0,0 @@ -""" -######################## multi-dataset inference lenet example ######################## -This example is a single-dataset inference tutorial. - -######################## Instructions for using the inference environment ######################## -1、Inference task requires predefined functions -(1)Copy multi dataset from obs to inference image. -function MultiObsToEnv(obs_data_url, data_dir) - -(2)Copy ckpt file from obs to inference image. -function ObsUrlToEnv(obs_ckpt_url, ckpt_url) - -(3)Copy the output result to obs. -function EnvToObs(train_dir, obs_train_url) - -3、5 parameters need to be defined. ---data_url is the first dataset you selected on the Qizhi platform ---multi_data_url is the multi dataset you selected on the Qizhi platform ---ckpt_url is the weight file you choose on the Qizhi platform ---result_url is the output - ---data_url,--multi_data_url,--ckpt_url,--result_url,--device_target,These 5 parameters must be defined first in a single dataset, -otherwise an error will be reported. -There is no need to add these parameters to the running parameters of the Qizhi platform, -because they are predefined in the background, you only need to define them in your code. - -4、How the dataset is used -Multi-datasets use multi_data_url as input, data_dir + dataset name + file or folder name in the dataset as the -calling path of the dataset in the inference image. -For example, the calling path of the test folder in the MNIST_Data dataset in this example is -data_dir + "/MNIST_Data" +"/test" - -For details, please refer to the following sample code. -""" - -import os -import argparse -import moxing as mox -import mindspore.nn as nn -from mindspore import context -from mindspore.train.serialization import load_checkpoint, load_param_into_net -from mindspore.train import Model -from mindspore.nn.metrics import Accuracy -from mindspore import Tensor -import numpy as np -from glob import glob -from dataset import create_dataset -from config import mnist_cfg as cfg -from lenet import LeNet5 -import json - -### Copy multiple datasets from obs to inference image ### -def MultiObsToEnv(multi_data_url, data_dir): - #--multi_data_url is json data, need to do json parsing for multi_data_url - multi_data_json = json.loads(multi_data_url) - for i in range(len(multi_data_json)): - path = data_dir + "/" + multi_data_json[i]["dataset_name"] - if not os.path.exists(path): - os.makedirs(path) - try: - mox.file.copy_parallel(multi_data_json[i]["dataset_url"], path) - print("Successfully Download {} to {}".format(multi_data_json[i]["dataset_url"],path)) - except Exception as e: - print('moxing download {} to {} failed: '.format( - multi_data_json[i]["dataset_url"], path) + str(e)) - return -### Copy ckpt file from obs to inference image### -### To operate on folders, use mox.file.copy_parallel. If copying a file. -### Please use mox.file.copy to operate the file, this operation is to operate the file -def ObsUrlToEnv(obs_ckpt_url, ckpt_url): - try: - mox.file.copy(obs_ckpt_url, ckpt_url) - print("Successfully Download {} to {}".format(obs_ckpt_url,ckpt_url)) - except Exception as e: - print('moxing download {} to {} failed: '.format(obs_ckpt_url, ckpt_url) + str(e)) - return -### Copy the output result to obs### -def EnvToObs(train_dir, obs_train_url): - try: - mox.file.copy_parallel(train_dir, obs_train_url) - print("Successfully Upload {} to {}".format(train_dir,obs_train_url)) - except Exception as e: - print('moxing upload {} to {} failed: '.format(train_dir,obs_train_url) + str(e)) - return - - - -### --data_url,--multi_data_url,--ckpt_url,--result_url,--device_target,These 5 parameters must be defined first in a multi dataset inference task, -### otherwise an error will be reported. -### There is no need to add these parameters to the running parameters of the Qizhi platform, -### because they are predefined in the background, you only need to define them in your code. -parser = argparse.ArgumentParser(description='MindSpore Lenet Example') -parser.add_argument('--data_url', - type=str, - default= '/cache/data1/', - help='path where the dataset is saved') -parser.add_argument('--multi_data_url', - type=str, - default= '/cache/data/', - help='path where the dataset is saved') -parser.add_argument('--ckpt_url', - help='model to save/load', - default= '/cache/checkpoint.ckpt') -parser.add_argument('--result_url', - help='result folder to save/load', - default= '/cache/result/') -parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'], - help='device where the code will be implemented (default: Ascend)') - -if __name__ == "__main__": - args, unknown = parser.parse_known_args() - - ###Initialize the data and result directories in the inference image### - data_dir = '/cache/data' - result_dir = '/cache/result' - ckpt_url = '/cache/checkpoint.ckpt' - if not os.path.exists(data_dir): - os.makedirs(data_dir) - if not os.path.exists(result_dir): - os.makedirs(result_dir) - - ###Copy multiple dataset from obs to inference image - MultiObsToEnv(args.multi_data_url, data_dir) - - ###Copy ckpt file from obs to inference image - ObsUrlToEnv(args.ckpt_url, ckpt_url) - - context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) - network = LeNet5(cfg.num_classes) - net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") - repeat_size = cfg.epoch_size - net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) - model = Model(network, net_loss, net_opt, metrics={"Accuracy"}) - - print("============== Starting Testing ==============") - - param_dict = load_checkpoint(os.path.join(ckpt_url)) - load_param_into_net(network, param_dict) - ds_test = create_dataset(os.path.join(data_dir + "/MNISTData", "test"), batch_size=1).create_dict_iterator() - data = next(ds_test) - images = data["image"].asnumpy() - labels = data["label"].asnumpy() - print('Tensor:', Tensor(data['image'])) - output = model.predict(Tensor(data['image'])) - predicted = np.argmax(output.asnumpy(), axis=1) - pred = np.argmax(output.asnumpy(), axis=1) - print('predicted:', predicted) - print('pred:', pred) - - print(f'Predicted: "{predicted[0]}", Actual: "{labels[0]}"') - filename = 'result.txt' - file_path = os.path.join(result_dir, filename) - with open(file_path, 'a+') as file: - file.write(" {}: {:.2f} \n".format("Predicted", predicted[0])) - - ###Copy result data from the local running environment back to obs, - ###and download it in the inference task corresponding to the Qizhi platform - EnvToObs(result_dir, args.result_url) \ No newline at end of file diff --git a/pretrain.py b/pretrain.py index 3b4b488..ace1b0b 100644 --- a/pretrain.py +++ b/pretrain.py @@ -107,7 +107,7 @@ if __name__ == "__main__": time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) ###假如选择了模型文件,使用pretrain_dir,注意ckpt_url的方式依然保留,你依然可以使用ckpt_url的方式,但是这种方式将会逐渐废弃 - load_param_into_net(network, load_checkpoint(os.path.join(pretrain_dir, "checkpoint_lenet-2_1875.ckpt"))) + load_param_into_net(network, load_checkpoint(os.path.join(pretrain_dir, "checkpoint_lenet-1_1875.ckpt"))) if args.device_target != "Ascend": model = Model(network, diff --git a/upload.py b/upload.py deleted file mode 100644 index 6060ab2..0000000 --- a/upload.py +++ /dev/null @@ -1,14 +0,0 @@ -from mindspore.train.callback import Callback -import moxing as mox - -class UploadOutput(Callback): - def __init__(self, train_dir, obs_train_url): - self.train_dir = train_dir - self.obs_train_url = obs_train_url - def epoch_end(self,run_context): - try: - mox.file.copy_parallel(self.train_dir , self.obs_train_url ) - print("Successfully Upload {} to {}".format(self.train_dir ,self.obs_train_url )) - except Exception as e: - print('moxing upload {} to {} failed: '.format(self.train_dir ,self.obs_train_url ) + str(e)) - return diff --git a/upload_for_c2net.py b/upload_for_c2net.py deleted file mode 100644 index b725724..0000000 --- a/upload_for_c2net.py +++ /dev/null @@ -1,6 +0,0 @@ -from mindspore.train.callback import Callback -import os - -class UploadOutput(Callback): - def epoch_end(self,run_context): - os.system("cd /cache/script_for_grampus/ &&./uploader_for_npu " + "/cache/output/")