-
Notifications
You must be signed in to change notification settings - Fork 55
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #448 from ValeevGroup/evaleev/feature/mkl-fair-dis…
…patch allows to use fair dispatch in Intel MKL
- Loading branch information
Showing
10 changed files
with
266 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
src/TiledArray/external/agnerfog/intel_cpu_feature_patch.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/*********************** intel_cpu_feature_patch.c ************************** | ||
* Author: Agner Fog | ||
* Date created: 2014-07-30 | ||
* Last modified: 2019-12-29 | ||
* Source URL: https://www.agner.org/optimize/intel_dispatch_patch.zip | ||
* Language: C or C++ | ||
* | ||
* Description: | ||
* Patch for Intel compiler version 13.0 and later, including the general | ||
* libraries, LIBM and SVML, but not MKL and VML. | ||
* | ||
* Example of how to patch Intel's CPU feature dispatcher in order to improve | ||
* compatibility of generated code with non-Intel processors. | ||
* In Windows: Use the static link libraries (*.lib), not the dynamic link | ||
* librarise (*.DLL). | ||
* In Linux and Mac: use static linking (*.a) or dynamic linking (*.so). | ||
* | ||
* Include this code in your C or C++ program and call intel_cpu_patch(); | ||
* before any call to the library functions. | ||
* | ||
* Copyright (c) 2014-2019. BSD License 2.0 | ||
******************************************************************************/ | ||
#include <stdint.h> | ||
|
||
#ifdef __cplusplus // use C-style linking | ||
extern "C" { | ||
#endif | ||
|
||
// link to Intel libraries | ||
extern int64_t __intel_cpu_feature_indicator; // CPU feature bits | ||
extern int64_t __intel_cpu_feature_indicator_x; // CPU feature bits | ||
void __intel_cpu_features_init(); // unfair dispatcher: checks CPU features for | ||
// Intel CPU's only | ||
void __intel_cpu_features_init_x(); // fair dispatcher: checks CPU features | ||
// without discriminating by CPU brand | ||
|
||
#ifdef __cplusplus | ||
} // end of extern "C" | ||
#endif | ||
|
||
void intel_cpu_patch() { | ||
// force a re-evaluation of the CPU features without discriminating by CPU | ||
// brand | ||
__intel_cpu_feature_indicator = 0; | ||
__intel_cpu_feature_indicator_x = 0; | ||
__intel_cpu_features_init_x(); | ||
__intel_cpu_feature_indicator = __intel_cpu_feature_indicator_x; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
/*********************** intel_mkl_cpuid_patch.c ************************** | ||
* Author: Agner Fog | ||
* Date created: 2019-12-29 | ||
* Source URL: https://www.agner.org/optimize/intel_dispatch_patch.zip | ||
* Language: C or C++ | ||
* | ||
* Description: | ||
* Patch for Intel Math Kernel Library (MKL) version 14.0 and later, except | ||
* the Vector Math Library (VML). | ||
* | ||
* Example of how to override Intel's CPU feature dispatcher in order to improve | ||
* compatibility of Intel function libraries with non-Intel processors. | ||
* | ||
* Include this code in your C or C++ program and make sure it is linked before | ||
* any Intel libraries. You may need to include intel_mkl_feature_patch.c as | ||
*well. | ||
* | ||
* Copyright (c) 2019. BSD License 2.0 | ||
******************************************************************************/ | ||
#include <stdint.h> | ||
|
||
#ifdef __cplusplus // use C-style linking | ||
extern "C" { | ||
#endif | ||
|
||
// detect if Intel CPU | ||
int mkl_serv_intel_cpu() { return 1; } | ||
|
||
// detect if Intel CPU | ||
int mkl_serv_intel_cpu_true() { return 1; } | ||
|
||
int mkl_serv_cpuhaspnr_true() { return 1; } | ||
|
||
int mkl_serv_cpuhaspnr() { return 1; } | ||
|
||
int mkl_serv_cpuhasnhm() { return 1; } | ||
|
||
int mkl_serv_cpuisbulldozer() { return 0; } | ||
|
||
int mkl_serv_cpuiszen() { return 0; } | ||
|
||
int mkl_serv_cpuisatomsse4_2() { return 0; } | ||
|
||
int mkl_serv_cpuisatomssse3() { return 0; } | ||
|
||
int mkl_serv_cpuisitbarcelona() { return 0; } | ||
|
||
int mkl_serv_cpuisskl() { return 0; } | ||
|
||
int mkl_serv_cpuisknm() { return 0; } | ||
|
||
int mkl_serv_cpuisclx() { return 0; } | ||
|
||
int mkl_serv_get_microarchitecture() { | ||
// I don't know what this number means | ||
return 33; | ||
} | ||
|
||
#ifdef __cplusplus | ||
} // end of extern "C" | ||
#endif |
49 changes: 49 additions & 0 deletions
49
src/TiledArray/external/agnerfog/intel_mkl_feature_patch.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/*********************** intel_mkl_feature_patch.c ************************** | ||
* Author: Agner Fog | ||
* Date created: 2014-07-30 | ||
* Last modified: 2019-12-29 | ||
* Source URL: https://www.agner.org/optimize/intel_dispatch_patch.zip | ||
* Language: C or C++ | ||
* | ||
* Description: | ||
* Patch for Intel Math Kernel Library (MKL) version 14.0 and later, except | ||
* the Vector Math Library (VML). | ||
* | ||
* Example of how to patch Intel's CPU feature dispatcher in order to improve | ||
* compatibility of Intel function libraries with non-Intel processors. | ||
* In Windows: Use the static link libraries (*.lib), not the dynamic link | ||
* librarise (*.DLL). | ||
* In Linux and Mac: use static linking (*.a) or dynamic linking (*.so). | ||
* | ||
* Include this code in your C or C++ program and call intel_mkl_patch(); | ||
* before any call to the MKL functions. You may need to include | ||
* intel_mkl_cpuid_patch.c as well. | ||
* | ||
* Copyright (c) 2014-2019. BSD License 2.0 | ||
******************************************************************************/ | ||
#include <stdint.h> | ||
|
||
#ifdef __cplusplus // use C-style linking | ||
extern "C" { | ||
#endif | ||
|
||
// link to MKL libraries | ||
extern int64_t __intel_mkl_feature_indicator; // CPU feature bits | ||
extern int64_t __intel_mkl_feature_indicator_x; // CPU feature bits | ||
void __intel_mkl_features_init(); // unfair dispatcher: checks CPU features for | ||
// Intel CPU's only | ||
void __intel_mkl_features_init_x(); // fair dispatcher: checks CPU features | ||
// without discriminating by CPU brand | ||
|
||
#ifdef __cplusplus | ||
} // end of extern "C" | ||
#endif | ||
|
||
void intel_mkl_use_fair_dispatch() { | ||
// force a re-evaluation of the CPU features without discriminating by CPU | ||
// brand | ||
__intel_mkl_feature_indicator = 0; | ||
__intel_mkl_feature_indicator_x = 0; | ||
__intel_mkl_features_init_x(); | ||
__intel_mkl_feature_indicator = __intel_mkl_feature_indicator_x; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
intel_dispatch_patch.zip | ||
======================== | ||
|
||
By Agner Fog, Technical University of Denmark, 2019. | ||
|
||
Intel's compilers are generating code that will run slower than necessary when | ||
the code is executed on a CPU that is not produced by Intel. This has been | ||
observed with Intel C, C++, and Fortran compilers. | ||
|
||
The same happens when certain function libraries produced by Intel are used, | ||
even if the code is compiled with another compiler, such as Microsoft, Gnu | ||
or Clang compilers. | ||
|
||
This problem is affecting several commonly used software programs such as | ||
Matlab, because they are using Intel software libraries. | ||
|
||
The library code and the code generated by an Intel compiler may contain | ||
multiple versions, each optimized for a particular instruction set extension. | ||
A so-called CPU dispatcher is chosing the optimal version of the code at | ||
runtime, based on which CPU it is running on. | ||
|
||
CPU dispatchers can be fair or unfair. A fair CPU dispatcher is chosing the | ||
optimal code based only on which instruction set extensions are supported | ||
by the CPU. An unfair dispatcher first checks the CPU brand. If the brand | ||
is not Intel, then the unfair dispatcher will chose the "generic" version | ||
of the code, i.e. the slowest version that is compatible with old CPUs | ||
without the relevant instruction set extensions. | ||
|
||
The CPU dispatchers in many Intel function libraries have two versions, a | ||
fair and an unfair one. It is not clear when the fair dispatcher is used | ||
and when the unfair dispatcher is used. My observations about fair and | ||
unfair CPU dispatching are as follows: | ||
|
||
* Code compiled with an Intel compiler will usually have unfair CPU dispatching. | ||
|
||
* The SVML (Short Vector Math Library) and IPP (Intel Performance Primitives) | ||
function libraries from Intel are using the fair CPU dispatcher when used | ||
with a non-Intel compiler. | ||
|
||
* The MKL (Math Kernel Library) library contains both fair and unfair | ||
dispatchers. It is not clear which dispatcher is used on each function. | ||
|
||
The code examples contained herein may be used for circumventing unfair CPU | ||
dispatching in order to improve compatibility with non-Intel CPUs. | ||
|
||
The following files are contained: | ||
|
||
intel_cpu_feature_patch.c | ||
------------------------- | ||
This code makes sure the fair dispatcher is called instead of the unfair | ||
one for code generated with an Intel compiler and for general Intel | ||
function libraries. | ||
|
||
intel_mkl_feature_patch.c | ||
------------------------- | ||
This does the same for the Intel MKL library. | ||
|
||
intel_mkl_cpuid_patch.c | ||
----------------------- | ||
This code example is overriding CPU detection functions in Intel's MKL | ||
function library. The mkl_serv_intel_cpu() function in MKL is returning | ||
1 when running on an Intel CPU and 0 when running on any other brand of | ||
CPU. You may include this code to replace this function in MKL with a | ||
function that returns 1 regardless of CPU brand. | ||
|
||
It may be necessary to use both intel_mkl_feature_patch.c and | ||
intel_mkl_cpuid_patch.c when using the MKL library in software that | ||
may run on any brand of CPU. | ||
|
||
An alternative method is to set the environment variable | ||
MKL_DEBUG_CPU_TYPE=5 | ||
when running on an AMD processor. This may be useful when you do not have | ||
access to the source code, for example when running Matlab software. | ||
|
||
The patches provided here are based on undocumented features in Intel | ||
function libraries. Use them at your own risk, and make sure to test your | ||
code properly to make sure it works as intended. | ||
|
||
The most reliable solution is, of course, to avoid Intel compilers and | ||
Intel function libraries in code that may run on other CPU brands such | ||
as AMD and VIA. You may find other function libraries on the web, or | ||
you may make your own functions. My vector class library (VCL) is useful | ||
for making mathematical functions that process multiple data in parallel, | ||
using the vector processing features of modern CPUs. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters