72 lines
2.6 KiB
Python
72 lines
2.6 KiB
Python
'''
|
||
imagnet-1k 数据集已通过磁盘挂载的方式挂载在训练镜像中,
|
||
用户可参考下列示例代码读取直接使用。
|
||
|
||
挂载路径为
|
||
.
|
||
└── cache/
|
||
├── ascend
|
||
├── outputs
|
||
├── user-job-dir
|
||
└── sfs/
|
||
└── data/
|
||
└── imagenet/
|
||
├── train/
|
||
│ └── n01440764/
|
||
│ └── n01440764_11063.JPEG
|
||
└── val/
|
||
└── n01440764/
|
||
└── ILSVRC2012_val_00011993.JPEG
|
||
|
||
mindspore.dataset.ImageFolderDataset
|
||
- 读取imagenet-1k数据,同一文件夹下的数据为同一类class。
|
||
mindspore.dataset.vision.c_transforms
|
||
- 数据加载和预处理。
|
||
mindspore.dataset.ImageFolderDataset
|
||
- map:给定一组数据增强列表,按顺序将数据增强作用在数据集对象上。
|
||
- batch:将数据集中连续 batch_size 条数据合并为一个批处理数据。
|
||
- to_json:将数据处理管道序列化为JSON字符串,如果提供了文件名,则转储到文件中。
|
||
|
||
'''
|
||
|
||
import os
|
||
import argparse
|
||
import moxing as mox
|
||
|
||
import mindspore as ms
|
||
from mindspore.dataset import ImageFolderDataset
|
||
import mindspore.dataset.vision.c_transforms as transforms
|
||
from openi import env_to_openi
|
||
|
||
parser = argparse.ArgumentParser(description='Read big dataset ImageNet Example')
|
||
parser.add_argument('--train_url',
|
||
help='output folder to save/load',
|
||
default= '/cache/output/')
|
||
|
||
if __name__ == "__main__":
|
||
args, unknown = parser.parse_known_args()
|
||
data_path = '/cache/sfs/data/imagenet/'
|
||
modelart_output = '/cache/output'
|
||
if not os.path.exists(modelart_output):
|
||
os.makedirs(modelart_output)
|
||
|
||
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
|
||
std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
|
||
dataset_train = ImageFolderDataset(os.path.join(data_path, "train"),
|
||
shuffle=True)
|
||
trans_train = [
|
||
transforms.RandomCropDecodeResize(size=224,
|
||
scale=(0.08, 1.0),
|
||
ratio=(0.75, 1.333)),
|
||
transforms.RandomHorizontalFlip(prob=0.5),
|
||
transforms.Normalize(mean=mean, std=std),
|
||
transforms.HWC2CHW()
|
||
]
|
||
|
||
dataset_train = dataset_train.map(operations=trans_train,
|
||
input_columns=["image"])
|
||
dataset_train = dataset_train.batch(batch_size=16, drop_remainder=True)
|
||
|
||
data_info = dataset_train.to_json(filename= modelart_output + '/data_info.json')
|
||
print(data_info)
|
||
env_to_openi(modelart_output, args.train_url) |