diff --git a/docs/api/api_python/mindspore/mindspore.set_context.rst b/docs/api/api_python/mindspore/mindspore.set_context.rst index 5263850a186..9f8ed9f4e22 100644 --- a/docs/api/api_python/mindspore/mindspore.set_context.rst +++ b/docs/api/api_python/mindspore/mindspore.set_context.rst @@ -78,6 +78,8 @@ mindspore.set_context | | memory_optimize_level | CPU/GPU/Ascend | | +------------------------------+----------------------------+ | | memory_offload | GPU/Ascend | + | +------------------------------+----------------------------+ + | | ascend_config | Ascend | +-------------------------+------------------------------+----------------------------+ 参数: @@ -167,6 +169,22 @@ mindspore.set_context - ON:开启memory offload功能。在Ascend硬件平台,未设置环境变量“GRAPH_OP_RUN=1”时本参数不生效;设置memory_optimize_level='O1'时本参数不生效。 - OFF:关闭memory offload功能。 + - **ascend_config** (dict) - 设置Ascend硬件平台专用的参数,默认不设置。当前只仅支持在Ascend910B硬件平台设置,其他平台不生效。 + + - **precision_mode** (str): 混合精度模式设置,Ascend910B硬件平台训练默认值:must_keep_origin_dtype,推理网络默认值:force_fp16。其值范围如下: + + - force_fp16: 当算子既支持float16,又支持float32,直接选择float16. + - allow_fp32_to_fp16: 当算子不支持float32数据类型时,直接降低精度float16. + - allow_mix_precision: 自动混合精度,针对全网算子,按照内置的优化策略,自动将部分算子的精度降低到float16或bfloat16. + - must_keep_origin_dtype: 保持原图精度. + - force_fp32: 当算子既支持float16,又支持float32,直接选择float32. + - force_lowerprecision: 当算子支持float16或者bfloat16,又支持float32,直接选择float16或者bfloat16. + - allow_fp32_to_bf16: 当算子不支持float32数据类型时,直接降低精度到bfloat16. + - allow_fp32_to_lowprecision: 当算子不支持float32数据类型时,直接降低精度到float16或者bfloat16. + - allow_mix_precision_fp16: 自动混合精度,正对全网算子,按照内置的优化策略,自动将部分算子的精度降低到float16. + - allow_mix_precision_bf16: 自动混合精度,正对全网算子,按照内置的优化策略,自动将部分算子的精度降低到bfloat16. + + - **jit_compile** (bool): 表示是否选择在线编译。默认值:True。当设置为False时,优先选择系统中已经编译好的算子二进制文件,提升编译性能。 异常: - **ValueError** - 输入key不是上下文中的属性。 diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc index 86fe0acf7f6..0c016f551cb 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include "include/transform/graph_ir/types.h" #include "include/transform/graph_ir/utils.h" #include "include/common/utils/utils.h" @@ -50,6 +51,7 @@ constexpr auto kMindsporeDumpConfig = "MINDSPORE_DUMP_CONFIG"; constexpr auto kOpDebugConfigFile = "ge_op_debug_config.ini"; constexpr char kGeDumpMode[3][7] = {"all", "input", "output"}; const std::set kIgnoreGEShapeOps = {kSoftMarginLossOpName}; +const std::set kAscend910BVersions = {"Ascend910B1", "Ascend910B2", "Ascend910B3", "Ascend910B4"}; std::string GetGraphName(const FuncGraphPtr &graph) { MS_EXCEPTION_IF_NULL(graph); @@ -567,6 +569,32 @@ void UseOpDebugConfig(std::map *ge_options) { MS_LOG(INFO) << "Use MS_COMPILER_OP_DEBUG_CONFIG:" << ge_op_debug_config; } } +void GeDeviceContext::SetAscendConfig(const std::shared_ptr &ms_context_ptr, + std::map *ge_options) { + MS_EXCEPTION_IF_NULL(ms_context_ptr); + MS_EXCEPTION_IF_NULL(ge_options); + if (ms_context_ptr->get_param(MS_CTX_PRECISION_MODE) != "") { + (*ge_options)["ge.exec.precision_mode"] = ms_context_ptr->get_param(MS_CTX_PRECISION_MODE); + MS_LOG(INFO) << "Set precision_mode " << ms_context_ptr->get_param(MS_CTX_PRECISION_MODE) << "."; + } else if (IsGeTrain()) { + auto soc_version = device::ascend::GetSocVersion(); + if (kAscend910BVersions.count(soc_version) != 0) { + (*ge_options)["ge.exec.precision_mode"] = "must_keep_origin_dtype"; + MS_LOG(INFO) << "Set precision_mode must_keep_origin_dtype. soc_version is " << soc_version; + } else { + (*ge_options)["ge.exec.precision_mode"] = "allow_fp32_to_fp16"; + MS_LOG(INFO) << "Set precision_mode allow_fp32_to_fp16. soc_version is " << soc_version; + } + } else { + (*ge_options)["ge.exec.precision_mode"] = "force_fp16"; + } + + if (ms_context_ptr->get_param(MS_CTX_ENABLE_JIT_COMPILE)) { + (*ge_options)["ge.jit_compile"] = "1"; + } else { + (*ge_options)["ge.jit_compile"] = "0"; + } +} void GeDeviceContext::GetGeOptions(const std::shared_ptr &ms_context_ptr, std::map *ge_options) { @@ -664,11 +692,7 @@ void GeDeviceContext::GetGeOptions(const std::shared_ptr &ms_context_ MS_LOG(WARNING) << "Set proto lib path failed!"; } - if (training) { - (*ge_options)["ge.exec.precision_mode"] = "allow_mix_precision"; - } else { - (*ge_options)["ge.exec.precision_mode"] = "force_fp16"; - } + SetAscendConfig(ms_context_ptr, ge_options); (*ge_options)["ge.enableSmallChannel"] = "1"; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.h b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.h index fa2a3035982..d3992b806fc 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.h @@ -96,6 +96,8 @@ class GeDeviceContext : public DeviceInterface &inst_context); void GetGeOptions(const std::shared_ptr &inst_context, std::map *ge_options); void SetHcclOptions(const std::shared_ptr &inst_context, std::map *ge_options); + void SetAscendConfig(const std::shared_ptr &ms_context_ptr, + std::map *ge_options); void SetDisableReuseMemoryFlag(std::map *ge_options) const; std::unique_ptr deprecated_interface_; diff --git a/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc b/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc index 9b766969ba4..ddcc9144117 100644 --- a/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc +++ b/mindspore/ccsrc/pybind_api/utils/ms_context_py.cc @@ -93,6 +93,8 @@ void RegMsContext(const py::module *m) { .value("profiling_options", MsCtxParam::MS_CTX_PROFILING_OPTIONS) .value("save_dump_path", MsCtxParam::MS_CTX_SAVE_DUMP_PATH) .value("deterministic", MsCtxParam::MS_CTX_DETERMINISTIC) + .value("precision_mode", MsCtxParam::MS_CTX_PRECISION_MODE) + .value("jit_compile", MsCtxParam::MS_CTX_ENABLE_JIT_COMPILE) .value("save_graphs_path", MsCtxParam::MS_CTX_SAVE_GRAPHS_PATH) .value("compile_cache_path", MsCtxParam::MS_CTX_COMPILE_CACHE_PATH) .value("variable_memory_max_size", MsCtxParam::MS_CTX_VARIABLE_MEMORY_MAX_SIZE) diff --git a/mindspore/core/utils/ms_context.cc b/mindspore/core/utils/ms_context.cc index 63b3f7c238c..4f70455bb6e 100644 --- a/mindspore/core/utils/ms_context.cc +++ b/mindspore/core/utils/ms_context.cc @@ -64,6 +64,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { set_param(MS_CTX_ENABLE_DUMP, false); set_param(MS_CTX_SAVE_DUMP_PATH, "."); set_param(MS_CTX_DETERMINISTIC, "OFF"); + set_param(MS_CTX_PRECISION_MODE, ""); set_param(MS_CTX_ENV_CONFIG_PATH, ""); set_param(MS_CTX_TUNE_MODE, "NO_TUNE"); set_param(MS_CTX_GRAPH_KERNEL_FLAGS, ""); @@ -117,6 +118,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { set_param(MS_CTX_ENABLE_RECOVERY, false); set_param(MS_CTX_ENABLE_GE_HETEROGENOUS, false); set_param(MS_CTX_DISABLE_FORMAT_TRANSFORM, false); + set_param(MS_CTX_ENABLE_JIT_COMPILE, true); set_param(MS_CTX_MEMORY_OPTIMIZE_LEVEL, kOptimizeO0); set_param(MS_CTX_OP_TIMEOUT, kOpTimeout); diff --git a/mindspore/core/utils/ms_context.h b/mindspore/core/utils/ms_context.h index d196644c23a..e39387033b3 100644 --- a/mindspore/core/utils/ms_context.h +++ b/mindspore/core/utils/ms_context.h @@ -96,6 +96,7 @@ enum MsCtxParam : unsigned { MS_CTX_ENABLE_RECOVERY, MS_CTX_ENABLE_GE_HETEROGENOUS, MS_CTX_DISABLE_FORMAT_TRANSFORM, + MS_CTX_ENABLE_JIT_COMPILE, MS_CTX_TYPE_BOOL_END, // parameter of type int @@ -139,6 +140,7 @@ enum MsCtxParam : unsigned { MS_CTX_GRAPH_KERNEL_FLAGS, MS_CTX_INFER_PRECISION_MODE, // GPU inference precision mode configured by Serving or Unify API. MS_CTX_DETERMINISTIC, + MS_CTX_PRECISION_MODE, MS_CTX_TYPE_STRING_END, // parameter numbers of each type diff --git a/mindspore/python/mindspore/context.py b/mindspore/python/mindspore/context.py index a034816a554..1e6daca76b9 100644 --- a/mindspore/python/mindspore/context.py +++ b/mindspore/python/mindspore/context.py @@ -244,6 +244,35 @@ class _Context: f"{deterministic_options}, but got {deterministic}.") self.set_param(ms_ctx_param.deterministic, deterministic) + def set_ascend_config(self, ascend_config): + """ + Enable ascend config. + + Args: + ascend_config (dict): 'precision_mode' + - precision_mode (str): "force_fp16", "allow_fp32_to_fp16", "allow_mix_precision", + "must_keep_origin_dtype", "force_fp32", "force_lowerprecision", "allow_fp32_to_bf16", + "allow_fp32_to_lowprecision", "allow_mix_precision_fp16" and "allow_mix_precision_bf16". + """ + + ascend_cfgs = {'precision_mode': ["force_fp16", "allow_fp32_to_fp16", "allow_mix_precision", + "must_keep_origin_dtype", "force_fp32", "force_lowerprecision", + "allow_fp32_to_bf16", "allow_fp32_to_lowprecision", + "allow_mix_precision_fp16", "allow_mix_precision_bf16"], + 'jit_compile': [True, False]} + for ascend_key in ascend_config: + if ascend_key not in ascend_cfgs: + raise ValueError(f"For 'context.set_context', the key of argument 'ascend_config' must be one of " + f"{ascend_cfgs}, but got {ascend_key}.") + supported_modes = ascend_cfgs.get(ascend_key) + if ascend_config[ascend_key] not in supported_modes: + raise ValueError(f"For 'ascend_config', the value of argument {ascend_key} must be one of " + f"{supported_modes}, but got {ascend_config[ascend_key]}.") + if ascend_key == 'precision_mode': + self.set_param(ms_ctx_param.precision_mode, ascend_config[ascend_key]) + if ascend_key == 'jit_compile': + self.set_param(ms_ctx_param.jit_compile, ascend_config[ascend_key]) + def set_backend_policy(self, policy): success = self._context_handle.set_backend_policy(policy) if not success: @@ -421,7 +450,8 @@ class _Context: 'memory_optimize_level': set_memory_optimize_level, 'op_timeout': set_op_timeout, 'memory_offload': set_memory_offload, - 'deterministic': set_deterministic + 'deterministic': set_deterministic, + 'ascend_config': set_ascend_config } @property @@ -706,7 +736,8 @@ def _check_target_specific_cfgs(device, arg_key): 'auto_tune_mode': ['Ascend'], 'max_device_memory': ['Ascend', 'GPU'], 'mempool_block_size': ['GPU', 'Ascend'], - 'disable_format_transform': ['GPU'] + 'disable_format_transform': ['GPU'], + 'ascend_config': ['Ascend'] } # configs not in map device_cfgs are supposed to be suitable for all devices if arg_key not in device_cfgs: @@ -728,7 +759,7 @@ def _check_target_specific_cfgs(device, arg_key): max_device_memory=str, print_file_path=str, max_call_depth=int, env_config_path=str, graph_kernel_flags=str, save_compile_cache=bool, runtime_num_threads=int, load_compile_cache=bool, grad_for_scalar=bool, pynative_synchronize=bool, mempool_block_size=str, disable_format_transform=bool, - op_timeout=int, deterministic=str) + op_timeout=int, deterministic=str, ascend_config=dict) def set_context(**kwargs): """ Set context for running environment. @@ -809,6 +840,8 @@ def set_context(**kwargs): | | memory_optimize_level | CPU/GPU/Ascend | | +------------------------------+----------------------------+ | | memory_offload | GPU/Ascend | + | +------------------------------+----------------------------+ + | | ascend_config | Ascend | +-------------------------+------------------------------+----------------------------+ Args: @@ -982,7 +1015,33 @@ def set_context(**kwargs): when the environment variable "GRAPH_OP_RUN=1" is not set; This parameter does not take effect when memory_optimize_level is set 'O1'. - OFF: Turn off the memory Offload function. + ascend_config (dict): Set the parameters specific to Ascend hardware platform. It is not set by default. + Currently, only setting `precision_mode' and jit_compile are supported on Ascend910B hardware platform. + - precision_mode (str): Mixed precision mode setting, on Ascend910B hardware platform, the default + value of training network is must_keep_origin_dtype, and the default value of inference network + is force_fp16. The value range is as follows: + + - force_fp16: When the operator supports both float16 and float32, select float16 directly. + - allow_fp32_to_fp16: When the operator does not support the float32 data type, directly reduce + the precision of float16. + - allow_mix_precision: Automatic mixing precision, facing the whole network operator, according + to the built-in optimization strategy, automatically reduces the precision of some operators + to float16 or bfloat16. + - must_keep_origin_dtype: Keep the accuracy of the original drawing. + - force_fp32: When the operator supports both float16 and float32, select float32 directly. + - force_lowerprecision: When the operator supports both float16 or bfloat16 and float32, select + float16 or bfloat16 directly. + - allow_fp32_to_bf16: When the operator does not support the float32 data type, directly reduce + the precision of bfloat16. + - allow_fp32_to_lowprecision: When the operator does not support the float32 data type, directly + reduce the precision of float16 or bfloat16. + - allow_mix_precision_fp16: Automatic mixing precision, facing the whole network operator, automatically + reduces the precision of some operators to float16 according to the built-in optimization strategy. + - allow_mix_precision_bf16: Automatic mixing precision, facing the whole network operator, according to + the built-in optimization strategy, automatically reduces the precision of some operators to bfloat16. + + - jit_compile (bool): Whether to select online compilation. Default: True. Raises: ValueError: If input key is not an attribute in context. @@ -1014,6 +1073,7 @@ def set_context(**kwargs): >>> ms.set_context(memory_optimize_level='O0') >>> ms.set_context(memory_offload='ON') >>> ms.set_context(deterministic='ON') + >>> ms.set_context(ascend_config={"precision_mode": "force_fp16", "jit_compile": True}) """ ctx = _context() # set device target first @@ -1029,6 +1089,8 @@ def set_context(**kwargs): logger.warning(f"For 'context.set_context', '{key}' parameter is deprecated. " "For details, please see the interface parameter API comments") continue + if key in ('precision_mode', 'jit_compile'): + raise ValueError(f"Please set '{key}' through parameter ascend_config") if key == 'save_graphs': if value is True: value = 1 diff --git a/tests/ut/python/pynative_mode/test_context.py b/tests/ut/python/pynative_mode/test_context.py index 74119f7ffaa..14a4b47ee2d 100644 --- a/tests/ut/python/pynative_mode/test_context.py +++ b/tests/ut/python/pynative_mode/test_context.py @@ -99,6 +99,31 @@ def test_max_device_memory_size(): context.set_context(max_device_memory="3.5G") context.set_context.__wrapped__(max_device_memory="3GB") + +def test_ascend_config(): + """" + Feature: test_ascend_config function + Description: Test case for simplest ascend_config + Expectation: The results are as expected + """ + context.set_context(device_target="Ascend") + with pytest.raises(ValueError): + context.set_context(precision_mode="force_fp16") + with pytest.raises(ValueError): + context.set_context(jit_compile=True) + with pytest.raises(ValueError): + context.set_context(ascend_config={"precision_mode": "xxx"}) + with pytest.raises(ValueError): + context.set_context(ascend_config={"xxxx": 1}) + with pytest.raises(ValueError): + context.set_context(ascend_config={"jit_compile": "xxx"}) + with pytest.raises(ValueError): + context.set_context(ascend_config={"jit_compile": 2}) + with pytest.raises(ValueError): + context.set_context(ascend_config={"precision_mode": 2}) + context.set_context.__wrapped__(ascend_config={"precision_mode": "force_fp16", "jit_compile": True}) + + def test_print_file_path(): """test_print_file_path""" with pytest.raises(IOError):