PothosConfigSIMD.cmake 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333
  1. # Copyright (C) 2012-2013 Povilas Kanapickas <povilas@radix.lt>
  2. # 2020 Nicholas Corgan <n.corgan@gmail.com>
  3. #
  4. # Distributed under the Boost Software License, Version 1.0.
  5. # (See accompanying file LICENSE_1_0.txt or copy at
  6. # http://www.boost.org/LICENSE_1_0.txt)
  7. include(CheckCXXSourceRuns)
  8. include(CheckCXXSourceCompiles)
  9. # We need this because calling CMAKE_CURRENT_LIST_DIR returns the directory
  10. # of the caller, and CMake doesn't provide a solution until 3.17.
  11. set(POTHOS_CONFIG_SIMD_LIST_DIR ${CMAKE_CURRENT_LIST_DIR} CACHE INTERNAL "")
  12. # ------------------------------------------------------------------------------
  13. # Compiler checks (internal)
  14. set(POTHOS_GCC 0)
  15. set(POTHOS_CLANG 0)
  16. set(POTHOS_MSVC 0)
  17. set(POTHOS_INTEL 0)
  18. set(POTHOS_MSVC_INTEL 0)
  19. if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
  20. set(POTHOS_CLANG 1)
  21. elseif(CMAKE_CXX_COMPILER_ID MATCHES "Apple")
  22. set(POTHOS_CLANG 1)
  23. elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
  24. set(POTHOS_GCC 1)
  25. elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
  26. if(MSVC)
  27. set(POTHOS_MSVC_INTEL 1)
  28. else()
  29. set(POTHOS_INTEL 1)
  30. endif()
  31. elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
  32. set(POTHOS_MSVC 1)
  33. else()
  34. message(FATAL_ERROR "Compiler '${CMAKE_CXX_COMPILER_ID}' not recognized")
  35. endif()
  36. if(CMAKE_SIZEOF_VOID_P MATCHES "8")
  37. set(POTHOS_64BIT 1)
  38. else()
  39. set(POTHOS_32BIT 1)
  40. endif()
  41. # ------------------------------------------------------------------------------
  42. # Architecture descriptions (internal)
  43. #
  44. # Each architecture has the following information specific to it:
  45. # - POTHOS_${ARCH}_TEST_CODE: source code snippet that uses functionality
  46. # from that arch. Used for @c check_cxx_source_runs macro.
  47. # We are taking extra care to confuse the compiler so that it does not
  48. # optimize things out. Nowadays compilers have good sense of when things
  49. # don't have side effects and will see through simple obfuscation
  50. # patterns.
  51. # - POTHOS_${ARCH}_CXX_FLAGS: compiler flags that are needed for compilation.
  52. # - POTHOS_${ARCH}_DEFINE: defines the macro that is needed to enable the
  53. # specific instruction set within the library.
  54. # - POTHOS_${ARCH}_SUFFIX: defines a suffix to append to the filename of the
  55. # source file specific to this architecture.
  56. #
  57. # Three lists are created:
  58. #
  59. # - POTHOS_ARCHS_PRI - primary architectures.
  60. # - POTHOS_ARCHS_SEC - secondary architectures. Effectively equivalent to one
  61. # of the primary architectures, just different instructions are generated
  62. # in specific scenarios.
  63. # - POTHOS_ARCHS - all architectures
  64. #
  65. list(APPEND POTHOS_ARCHS_PRI "X86_SSE2")
  66. if(POTHOS_CLANG OR POTHOS_GCC OR POTHOS_INTEL)
  67. set(POTHOS_X86_SSE2_CXX_FLAGS "-msse2")
  68. elseif((POTHOS_MSVC OR POTHOS_MSVC_INTEL) AND POTHOS_WIN32)
  69. set(POTHOS_X86_SSE2_CXX_FLAGS "/arch:SSE2")
  70. endif()
  71. set(POTHOS_X86_SSE2_DEFINE "POTHOS_ARCH_X86_SSE2")
  72. set(POTHOS_X86_SSE2_SUFFIX "-x86_sse2")
  73. set(POTHOS_X86_SSE2_TEST_CODE
  74. "#include <emmintrin.h>
  75. #include <iostream>
  76. char* prevent_optimization(char* ptr)
  77. {
  78. volatile bool never = false;
  79. if (never) {
  80. while (*ptr++)
  81. std::cout << *ptr;
  82. }
  83. char* volatile* volatile opaque;
  84. opaque = &ptr;
  85. return *opaque;
  86. }
  87. int main()
  88. {
  89. union {
  90. char data[16];
  91. __m128i align;
  92. };
  93. char* p = data;
  94. p = prevent_optimization(p);
  95. __m128i one = _mm_load_si128((__m128i*)p);
  96. one = _mm_or_si128(one, one);
  97. _mm_store_si128((__m128i*)p, one);
  98. p = prevent_optimization(p);
  99. }"
  100. )
  101. list(APPEND POTHOS_ARCHS_PRI "X86_SSE3")
  102. if(POTHOS_CLANG OR POTHOS_GCC OR POTHOS_INTEL)
  103. set(POTHOS_X86_SSE3_CXX_FLAGS "-msse3")
  104. elseif(POTHOS_MSVC AND POTHOS_WIN32)
  105. # Default for x64, so flag not supported
  106. set(POTHOS_X86_SSE3_CXX_FLAGS "/arch:SSE2")
  107. elseif(POTHOS_MSVC_INTEL)
  108. set(POTHOS_X86_SSE3_CXX_FLAGS "/arch:SSE3")
  109. endif()
  110. set(POTHOS_X86_SSE3_DEFINE "POTHOS_ARCH_X86_SSE3")
  111. set(POTHOS_X86_SSE3_SUFFIX "-x86_sse3")
  112. set(POTHOS_X86_SSE3_TEST_CODE
  113. "#include <pmmintrin.h>
  114. #include <iostream>
  115. char* prevent_optimization(char* ptr)
  116. {
  117. volatile bool never = false;
  118. if (never) {
  119. while (*ptr++)
  120. std::cout << *ptr;
  121. }
  122. char* volatile* volatile opaque;
  123. opaque = &ptr;
  124. return *opaque;
  125. }
  126. int main()
  127. {
  128. union {
  129. char data[16];
  130. __m128 align;
  131. };
  132. char* p = data;
  133. p = prevent_optimization(p);
  134. __m128 one = _mm_load_ps((float*)p);
  135. one = _mm_hadd_ps(one, one);
  136. _mm_store_ps((float*)p, one);
  137. p = prevent_optimization(p);
  138. }"
  139. )
  140. list(APPEND POTHOS_ARCHS_PRI "X86_SSSE3")
  141. if(POTHOS_CLANG OR POTHOS_GCC OR POTHOS_INTEL)
  142. set(POTHOS_X86_SSSE3_CXX_FLAGS "-mssse3")
  143. elseif(POTHOS_MSVC AND POTHOS_WIN32)
  144. # Default for x64, so flag not supported
  145. set(POTHOS_X86_SSSE3_CXX_FLAGS "/arch:SSE2")
  146. elseif(POTHOS_MSVC_INTEL)
  147. set(POTHOS_X86_SSSE3_CXX_FLAGS "/arch:SSSE3")
  148. endif()
  149. set(POTHOS_X86_SSSE3_DEFINE "POTHOS_ARCH_X86_SSSE3")
  150. set(POTHOS_X86_SSSE3_SUFFIX "-x86_ssse3")
  151. set(POTHOS_X86_SSSE3_TEST_CODE
  152. "#include <tmmintrin.h>
  153. #include <iostream>
  154. char* prevent_optimization(char* ptr)
  155. {
  156. volatile bool never = false;
  157. if (never) {
  158. while (*ptr++)
  159. std::cout << *ptr;
  160. }
  161. char* volatile* volatile opaque;
  162. opaque = &ptr;
  163. return *opaque;
  164. }
  165. int main()
  166. {
  167. union {
  168. char data[16];
  169. __m128i align;
  170. };
  171. char* p = data;
  172. p = prevent_optimization(p);
  173. __m128i one = _mm_load_si128((__m128i*)p);
  174. one = _mm_abs_epi8(one);
  175. _mm_store_si128((__m128i*)p, one);
  176. p = prevent_optimization(p);
  177. }"
  178. )
  179. list(APPEND POTHOS_ARCHS_PRI "X86_SSE4_1")
  180. if(POTHOS_CLANG OR POTHOS_GCC OR POTHOS_INTEL)
  181. set(POTHOS_X86_SSE4_1_CXX_FLAGS "-msse4.1")
  182. elseif(POTHOS_MSVC AND POTHOS_WIN32)
  183. # Default for x64, so flag not supported
  184. set(POTHOS_X86_SSE4_1_CXX_FLAGS "/arch:SSE2")
  185. elseif(POTHOS_MSVC_INTEL)
  186. set(POTHOS_X86_SSE4_1_CXX_FLAGS "/arch:SSE4.1")
  187. endif()
  188. set(POTHOS_X86_SSE4_1_DEFINE "POTHOS_ARCH_X86_SSE4_1")
  189. set(POTHOS_X86_SSE4_1_SUFFIX "-x86_sse4_1")
  190. set(POTHOS_X86_SSE4_1_TEST_CODE
  191. "#include <smmintrin.h>
  192. #include <iostream>
  193. char* prevent_optimization(char* ptr)
  194. {
  195. volatile bool never = false;
  196. if (never) {
  197. while (*ptr++)
  198. std::cout << *ptr;
  199. }
  200. char* volatile* volatile opaque;
  201. opaque = &ptr;
  202. return *opaque;
  203. }
  204. int main()
  205. {
  206. union {
  207. char data[16];
  208. __m128i align;
  209. };
  210. char* p = data;
  211. p = prevent_optimization(p);
  212. __m128i one = _mm_load_si128((__m128i*)p);
  213. one = _mm_cvtepi16_epi32(one);
  214. _mm_store_si128((__m128i*)p, one);
  215. p = prevent_optimization(p);
  216. }"
  217. )
  218. list(APPEND POTHOS_ARCHS_PRI "X86_POPCNT_INSN")
  219. # No MSVC flag
  220. if(POTHOS_CLANG OR POTHOS_GCC OR POTHOS_INTEL)
  221. set(POTHOS_X86_POPCNT_INSN_CXX_FLAGS "-mssse3 -mpopcnt")
  222. elseif(POTHOS_MSVC_INTEL)
  223. set(POTHOS_X86_POPCNT_INSN_CXX_FLAGS "/arch:SSE4.2")
  224. endif()
  225. set(POTHOS_X86_POPCNT_INSN_DEFINE "POTHOS_ARCH_X86_POPCNT_INSN")
  226. set(POTHOS_X86_POPCNT_INSN_SUFFIX "-x86_popcnt")
  227. set(POTHOS_X86_POPCNT_INSN_TEST_CODE
  228. "#include <nmmintrin.h>
  229. #include <iostream>
  230. unsigned* prevent_optimization(unsigned* ptr)
  231. {
  232. volatile bool never = false;
  233. if (never) {
  234. while (*ptr++)
  235. std::cout << *ptr;
  236. }
  237. unsigned* volatile* volatile opaque;
  238. opaque = &ptr;
  239. return *opaque;
  240. }
  241. int main()
  242. {
  243. unsigned data;
  244. unsigned* p = &data;
  245. p = prevent_optimization(p);
  246. *p = _mm_popcnt_u32(*p);
  247. p = prevent_optimization(p);
  248. }"
  249. )
  250. ###
  251. list(APPEND POTHOS_ARCHS_PRI "X86_AVX")
  252. if(POTHOS_CLANG OR POTHOS_GCC OR POTHOS_INTEL)
  253. set(POTHOS_X86_AVX_CXX_FLAGS "-mavx")
  254. elseif(POTHOS_MSVC OR POTHOS_MSVC_INTEL)
  255. set(POTHOS_X86_AVX_CXX_FLAGS "/arch:AVX")
  256. endif()
  257. set(POTHOS_X86_AVX_DEFINE "POTHOS_ARCH_X86_AVX")
  258. set(POTHOS_X86_AVX_SUFFIX "-x86_avx")
  259. set(POTHOS_X86_AVX_TEST_CODE
  260. "#include <immintrin.h>
  261. #include <iostream>
  262. #if (__clang_major__ == 3) && (__clang_minor__ == 6)
  263. #error Not supported.
  264. #endif
  265. #if (__GNUC__ == 4) && (__GNUC_MINOR__ == 4) && !defined(__INTEL_COMPILER) && !defined(__clang__)
  266. #error Not supported.
  267. #endif
  268. char* prevent_optimization(char* ptr)
  269. {
  270. volatile bool never = false;
  271. if (never) {
  272. while (*ptr++)
  273. std::cout << *ptr;
  274. }
  275. char* volatile* volatile opaque;
  276. opaque = &ptr;
  277. return *opaque;
  278. }
  279. int main()
  280. {
  281. union {
  282. char data[32];
  283. __m256 align;
  284. };
  285. char* p = data;
  286. p = prevent_optimization(p);
  287. __m256 one = _mm256_load_ps((float*)p);
  288. one = _mm256_add_ps(one, one);
  289. _mm256_store_ps((float*)p, one);
  290. p = prevent_optimization(p);
  291. }"
  292. )
  293. list(APPEND POTHOS_ARCHS_PRI "X86_AVX2")
  294. if(POTHOS_CLANG OR POTHOS_GCC)
  295. set(POTHOS_X86_AVX2_CXX_FLAGS "-mavx2")
  296. elseif(POTHOS_INTEL)
  297. set(POTHOS_X86_AVX2_CXX_FLAGS "-xCORE-AVX2")
  298. elseif(POTHOS_MSVC)
  299. set(POTHOS_X86_AVX2_CXX_FLAGS "/arch:AVX2")
  300. elseif(POTHOS_MSVC_INTEL)
  301. set(POTHOS_X86_AVX2_CXX_FLAGS "/arch:CORE-AVX2")
  302. endif()
  303. set(POTHOS_X86_AVX2_DEFINE "POTHOS_ARCH_X86_AVX2")
  304. set(POTHOS_X86_AVX2_SUFFIX "-x86_avx2")
  305. set(POTHOS_X86_AVX2_TEST_CODE
  306. "#include <immintrin.h>
  307. #include <iostream>
  308. #if (__clang_major__ == 3) && (__clang_minor__ == 6)
  309. #error Not supported.
  310. #endif
  311. char* prevent_optimization(char* ptr)
  312. {
  313. volatile bool never = false;
  314. if (never) {
  315. while (*ptr++)
  316. std::cout << *ptr;
  317. }
  318. char* volatile* volatile opaque;
  319. opaque = &ptr;
  320. return *opaque;
  321. }
  322. int main()
  323. {
  324. union {
  325. char data[32];
  326. __m256 align;
  327. };
  328. char* p = data;
  329. p = prevent_optimization(p);
  330. __m256i one = _mm256_load_si256((__m256i*)p);
  331. one = _mm256_or_si256(one, one);
  332. _mm256_store_si256((__m256i*)p, one);
  333. p = prevent_optimization(p);
  334. }"
  335. )
  336. list(APPEND POTHOS_ARCHS_PRI "X86_FMA3")
  337. if(POTHOS_CLANG OR POTHOS_GCC)
  338. set(POTHOS_X86_FMA3_CXX_FLAGS "-mfma")
  339. elseif(POTHOS_INTEL)
  340. set(POTHOS_X86_FMA3_CXX_FLAGS "-xCORE-AVX2")
  341. elseif(POTHOS_MSVC)
  342. set(POTHOS_X86_FMA3_CXX_FLAGS "/arch:AVX2")
  343. elseif(POTHOS_MSVC_INTEL)
  344. set(POTHOS_X86_FMA3_CXX_FLAGS "/arch:CORE-AVX2")
  345. endif()
  346. set(POTHOS_X86_FMA3_DEFINE "POTHOS_ARCH_X86_FMA3")
  347. set(POTHOS_X86_FMA3_SUFFIX "-x86_fma3")
  348. set(POTHOS_X86_FMA3_TEST_CODE
  349. "#include <immintrin.h>
  350. #include <iostream>
  351. char* prevent_optimization(char* ptr)
  352. {
  353. volatile bool never = false;
  354. if (never) {
  355. while (*ptr++)
  356. std::cout << *ptr;
  357. }
  358. char* volatile* volatile opaque;
  359. opaque = &ptr;
  360. return *opaque;
  361. }
  362. int main()
  363. {
  364. union {
  365. char data[16];
  366. __m128 align;
  367. };
  368. char* p = data;
  369. p = prevent_optimization(p);
  370. __m128 one = _mm_load_ps((float*)p);
  371. one = _mm_fmadd_ps(one, one, one);
  372. _mm_store_ps((float*)p, one);
  373. p = prevent_optimization(p);
  374. }"
  375. )
  376. list(APPEND POTHOS_ARCHS_PRI "X86_FMA4")
  377. if(POTHOS_CLANG OR POTHOS_GCC)
  378. # intel does not support FMA4
  379. set(POTHOS_X86_FMA4_CXX_FLAGS "-mfma4")
  380. elseif(POTHOS_MSVC OR POTHOS_MSVC_INTEL)
  381. set(POTHOS_X86_FMA4_CXX_FLAGS "/arch:AVX2")
  382. endif()
  383. set(POTHOS_X86_FMA4_DEFINE "POTHOS_ARCH_X86_FMA4")
  384. set(POTHOS_X86_FMA4_SUFFIX "-x86_fma4")
  385. set(POTHOS_X86_FMA4_TEST_CODE
  386. "#include <x86intrin.h>
  387. #include <iostream>
  388. char* prevent_optimization(char* ptr)
  389. {
  390. volatile bool never = false;
  391. if (never) {
  392. while (*ptr++)
  393. std::cout << *ptr;
  394. }
  395. char* volatile* volatile opaque;
  396. opaque = &ptr;
  397. return *opaque;
  398. }
  399. int main()
  400. {
  401. union {
  402. char data[16];
  403. __m128 align;
  404. };
  405. char* p = data;
  406. p = prevent_optimization(p);
  407. __m128 one = _mm_load_ps((float*)p);
  408. one = _mm_macc_ps(one, one, one);
  409. _mm_store_ps((float*)p, one);
  410. p = prevent_optimization(p);
  411. }"
  412. )
  413. list(APPEND POTHOS_ARCHS_PRI "X86_XOP")
  414. # No flag for MSVC
  415. if(POTHOS_CLANG OR POTHOS_GCC)
  416. set(POTHOS_X86_XOP_CXX_FLAGS "-mxop")
  417. endif()
  418. set(POTHOS_X86_XOP_DEFINE "POTHOS_ARCH_X86_XOP")
  419. set(POTHOS_X86_XOP_SUFFIX "-x86_xop")
  420. set(POTHOS_X86_XOP_TEST_CODE
  421. "#include <x86intrin.h>
  422. #include <iostream>
  423. char* prevent_optimization(char* ptr)
  424. {
  425. volatile bool never = false;
  426. if (never) {
  427. while (*ptr++)
  428. std::cout << *ptr;
  429. }
  430. char* volatile* volatile opaque;
  431. opaque = &ptr;
  432. return *opaque;
  433. }
  434. int main()
  435. {
  436. union {
  437. char data[16];
  438. __m128i align;
  439. };
  440. char* p = data;
  441. p = prevent_optimization(p);
  442. __m128i one = _mm_load_si128((__m128i*)p);
  443. one = _mm_cmov_si128(one, one, one);
  444. one = _mm_comeq_epi64(one, one);
  445. _mm_store_si128((__m128i*)p, one);
  446. p = prevent_optimization(p);
  447. }"
  448. )
  449. # No flag for MSVC
  450. list(APPEND POTHOS_ARCHS_PRI "X86_AVX512F")
  451. if(POTHOS_CLANG OR POTHOS_GCC)
  452. set(POTHOS_X86_AVX512F_CXX_FLAGS "-mavx512f -mavx512dq")
  453. elseif(POTHOS_INTEL)
  454. set(POTHOS_X86_AVX512F_CXX_FLAGS "-xCOMMON-AVX512")
  455. elseif(POTHOS_MSVC_INTEL)
  456. set(POTHOS_X86_AVX512F_CXX_FLAGS "/arch:COMMON-AVX512")
  457. endif()
  458. set(POTHOS_X86_AVX512F_DEFINE "POTHOS_ARCH_X86_AVX512F")
  459. set(POTHOS_X86_AVX512F_SUFFIX "-x86_avx512f")
  460. set(POTHOS_X86_AVX512F_TEST_CODE
  461. "#include <immintrin.h>
  462. #include <iostream>
  463. #if defined(__GNUC__) && (__GNUC__ < 6) && !defined(__INTEL_COMPILER) && !defined(__clang__)
  464. #error GCC 5.x and older are not supported on AVX512F.
  465. #endif
  466. char* prevent_optimization(char* ptr)
  467. {
  468. volatile bool never = false;
  469. if (never) {
  470. while (*ptr++)
  471. std::cout << *ptr;
  472. }
  473. char* volatile* volatile opaque;
  474. opaque = &ptr;
  475. return *opaque;
  476. }
  477. int main()
  478. {
  479. union {
  480. char data[64];
  481. __m512 align;
  482. };
  483. char* p = data;
  484. p = prevent_optimization(p);
  485. __m512 f = _mm512_load_ps((float*)p);
  486. p = prevent_optimization(p);
  487. __m512i i = _mm512_load_epi32((__m512i*)p);
  488. p = prevent_optimization(p);
  489. f = _mm512_add_ps(f, f);
  490. // MSVC 2017 miss this
  491. i = _mm512_or_epi32(i, i);
  492. f = _mm512_ceil_ps(f);
  493. // ICE on various versions of Clang trying to select palignr
  494. __m512i i2 = _mm512_load_epi32((__m512i*)p);
  495. __m512i ap = _mm512_alignr_epi32(i, i, 2);
  496. i = _mm512_mask_alignr_epi32(ap, 0xcccc, i2, i2, 14);
  497. p = prevent_optimization(p);
  498. _mm512_store_ps((float*)p, f);
  499. p = prevent_optimization(p);
  500. _mm512_store_epi32((void*)p, i);
  501. p = prevent_optimization(p);
  502. }"
  503. )
  504. # No flag for MSVC
  505. list(APPEND POTHOS_ARCHS_PRI "X86_AVX512BW")
  506. if(POTHOS_CLANG OR POTHOS_GCC)
  507. set(POTHOS_X86_AVX512BW_CXX_FLAGS "-mavx512bw")
  508. elseif(POTHOS_INTEL)
  509. set(POTHOS_X86_AVX512BW_CXX_FLAGS "-xCORE-AVX512")
  510. elseif(POTHOS_MSVC_INTEL)
  511. set(POTHOS_X86_AVX512BW_CXX_FLAGS "/arch:CORE-AVX512")
  512. endif()
  513. set(POTHOS_X86_AVX512BW_DEFINE "POTHOS_ARCH_X86_AVX512BW")
  514. set(POTHOS_X86_AVX512BW_SUFFIX "-x86_avx512bw")
  515. set(POTHOS_X86_AVX512BW_TEST_CODE
  516. "#include <immintrin.h>
  517. #include <iostream>
  518. char* prevent_optimization(char* ptr)
  519. {
  520. volatile bool never = false;
  521. if (never) {
  522. while (*ptr++)
  523. std::cout << *ptr;
  524. }
  525. char* volatile* volatile opaque;
  526. opaque = &ptr;
  527. return *opaque;
  528. }
  529. int main()
  530. {
  531. union {
  532. char data[64];
  533. __m512i align;
  534. };
  535. char* p = data;
  536. p = prevent_optimization(p);
  537. __m512i i = _mm512_load_si512((void*)p);
  538. i = _mm512_add_epi16(i, i); // only in AVX-512BW
  539. _mm512_store_si512((void*)p, i);
  540. p = prevent_optimization(p);
  541. }"
  542. )
  543. # No flag for MSVC
  544. list(APPEND POTHOS_ARCHS_PRI "X86_AVX512DQ")
  545. if(POTHOS_CLANG OR POTHOS_GCC OR POTHOS_INTEL)
  546. set(POTHOS_X86_AVX512DQ_CXX_FLAGS "-mavx512dq")
  547. endif()
  548. set(POTHOS_X86_AVX512DQ_DEFINE "POTHOS_ARCH_X86_AVX512DQ")
  549. set(POTHOS_X86_AVX512DQ_SUFFIX "-x86_avx512dq")
  550. set(POTHOS_X86_AVX512DQ_TEST_CODE
  551. "#include <immintrin.h>
  552. #include <iostream>
  553. char* prevent_optimization(char* ptr)
  554. {
  555. volatile bool never = false;
  556. if (never) {
  557. while (*ptr++)
  558. std::cout << *ptr;
  559. }
  560. char* volatile* volatile opaque;
  561. opaque = &ptr;
  562. return *opaque;
  563. }
  564. int main()
  565. {
  566. union {
  567. char data[64];
  568. __m512 align;
  569. };
  570. char* p = data;
  571. p = prevent_optimization(p);
  572. __m512 f = _mm512_load_ps((float*)p);
  573. f = _mm512_and_ps(f, f); // only in AVX512-DQ
  574. _mm512_store_ps((float*)p, f);
  575. p = prevent_optimization(p);
  576. }"
  577. )
  578. # No flag for MSVC
  579. list(APPEND POTHOS_ARCHS_PRI "X86_AVX512VL")
  580. if(POTHOS_CLANG OR POTHOS_GCC OR POTHOS_INTEL)
  581. set(POTHOS_X86_AVX512VL_CXX_FLAGS "-mavx512vl")
  582. endif()
  583. set(POTHOS_X86_AVX512VL_DEFINE "POTHOS_ARCH_X86_AVX512VL")
  584. set(POTHOS_X86_AVX512VL_SUFFIX "-x86_avx512vl")
  585. set(POTHOS_X86_AVX512VL_TEST_CODE
  586. "#if !defined(__APPLE__) && (__clang_major__ == 3)
  587. #error AVX512VL is not supported on clang 3.9 and earlier.
  588. #endif
  589. #include <immintrin.h>
  590. #include <iostream>
  591. char* prevent_optimization(char* ptr)
  592. {
  593. volatile bool never = false;
  594. if (never) {
  595. while (*ptr++)
  596. std::cout << *ptr;
  597. }
  598. char* volatile* volatile opaque;
  599. opaque = &ptr;
  600. return *opaque;
  601. }
  602. int main()
  603. {
  604. union {
  605. char data[16];
  606. __m128 align;
  607. };
  608. char* p = data;
  609. p = prevent_optimization(p);
  610. __m128 f = _mm_load_ps((float*)p);
  611. f = _mm_rcp14_ps(f); // only in AVX512-VL
  612. _mm_store_ps((float*)p, f);
  613. p = prevent_optimization(p);
  614. }"
  615. )
  616. list(APPEND POTHOS_ARCHS_PRI "ARM_NEON")
  617. if(POTHOS_CLANG OR POTHOS_GCC)
  618. set(POTHOS_ARM_NEON_CXX_FLAGS "-mfpu=neon")
  619. endif()
  620. set(POTHOS_ARM_NEON_DEFINE "POTHOS_ARCH_ARM_NEON")
  621. set(POTHOS_ARM_NEON_SUFFIX "-arm_neon")
  622. set(POTHOS_ARM_NEON_TEST_CODE
  623. "#if defined(__clang_major__)
  624. #if (__clang_major__ < 3) || ((__clang_major__ == 3) && (__clang_minor__ <= 3))
  625. #error NEON is not supported on clang 3.3 and earlier.
  626. #endif
  627. #endif
  628. #include <arm_neon.h>
  629. #include <iostream>
  630. char* prevent_optimization(char* ptr)
  631. {
  632. volatile bool never = false;
  633. if (never) {
  634. while (*ptr++)
  635. std::cout << *ptr;
  636. }
  637. char* volatile* volatile opaque;
  638. opaque = &ptr;
  639. return *opaque;
  640. }
  641. int main()
  642. {
  643. union {
  644. char data[16];
  645. uint32x4_t align;
  646. };
  647. char* p = data;
  648. p = prevent_optimization(p);
  649. uint32x4_t one = vld1q_u32((uint32_t*)p);
  650. one = vaddq_u32(one, one);
  651. vst1q_u32((uint32_t*)p, one);
  652. p = prevent_optimization(p);
  653. }"
  654. )
  655. list(APPEND POTHOS_ARCHS_SEC "ARM_NEON_FLT_SP")
  656. if(POTHOS_CLANG OR POTHOS_GCC)
  657. set(POTHOS_ARM_NEON_FLT_SP_CXX_FLAGS "-mfpu=neon")
  658. endif()
  659. set(POTHOS_ARM_NEON_FLT_SP_DEFINE "POTHOS_ARCH_ARM_NEON_FLT_SP")
  660. set(POTHOS_ARM_NEON_FLT_SP_SUFFIX "-arm_neon_flt_sp")
  661. list(APPEND POTHOS_ARCHS_PRI "ARM64_NEON")
  662. if(POTHOS_CLANG)
  663. set(POTHOS_ARM64_NEON_CXX_FLAGS "-arch arm64")
  664. elseif(POTHOS_GCC)
  665. set(POTHOS_ARM64_NEON_CXX_FLAGS "-mcpu=generic+simd")
  666. endif()
  667. set(POTHOS_ARM64_NEON_DEFINE "POTHOS_ARCH_ARM_NEON")
  668. set(POTHOS_ARM64_NEON_SUFFIX "-arm64_neon")
  669. set(POTHOS_ARM64_NEON_TEST_CODE
  670. "#include <arm_neon.h>
  671. #include <iostream>
  672. char* prevent_optimization(char* ptr)
  673. {
  674. volatile bool never = false;
  675. if (never) {
  676. while (*ptr++)
  677. std::cout << *ptr;
  678. }
  679. char* volatile* volatile opaque;
  680. opaque = &ptr;
  681. return *opaque;
  682. }
  683. int main()
  684. {
  685. union {
  686. char data[16];
  687. uint32x4_t align;
  688. };
  689. char* p = data;
  690. p = prevent_optimization(p);
  691. uint32x4_t one = vld1q_u32((uint32_t*)(*p));
  692. one = vaddq_u32(one, one);
  693. // GCC 4.8 misses a subset of functions
  694. one = vdupq_laneq_u32(one, 1);
  695. vst1q_u32((uint32_t*)(*p), one);
  696. p = prevent_optimization(p);
  697. }"
  698. )
  699. list(APPEND POTHOS_ARCHS_PRI "MIPS_MSA")
  700. set(POTHOS_MIPS_MSA_CXX_FLAGS "-mips64r5 -mmsa -mhard-float -mfp64 -mnan=legacy")
  701. set(POTHOS_MIPS_MSA_DEFINE "POTHOS_ARCH_MIPS_MSA")
  702. set(POTHOS_MIPS_MSA_SUFFIX "-mips_msa")
  703. set(POTHOS_MIPS_MSA_TEST_CODE
  704. "#include <msa.h>
  705. #include <iostream>
  706. char* prevent_optimization(char* ptr)
  707. {
  708. volatile bool never = false;
  709. if (never) {
  710. while (*ptr++)
  711. std::cout << *ptr;
  712. }
  713. char* volatile* volatile opaque;
  714. opaque = &ptr;
  715. return *opaque;
  716. }
  717. int main()
  718. {
  719. union {
  720. char data[16];
  721. v4i32 align;
  722. };
  723. char* p = data;
  724. p = prevent_optimization(p);
  725. v16i8 v = __msa_ld_b(p, 0);
  726. v = __msa_add_a_b(v, v);
  727. __msa_st_b(v, p, 0);
  728. p = prevent_optimization(p);
  729. }"
  730. )
  731. list(APPEND POTHOS_ARCHS_PRI "POWER_ALTIVEC")
  732. set(POTHOS_POWER_ALTIVEC_CXX_FLAGS "-maltivec")
  733. set(POTHOS_POWER_ALTIVEC_DEFINE "POTHOS_ARCH_POWER_ALTIVEC")
  734. set(POTHOS_POWER_ALTIVEC_SUFFIX "-power_altivec")
  735. set(POTHOS_POWER_ALTIVEC_TEST_CODE
  736. "#include <altivec.h>
  737. #include <iostream>
  738. #if defined(__GNUC__) && (__GNUC__ < 6) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
  739. #if !defined(__INTEL_COMPILER) && !defined(__clang__)
  740. #error GCC 5.0 and older are not supported on PPC little-endian.
  741. #endif
  742. #endif
  743. char* prevent_optimization(char* ptr)
  744. {
  745. volatile bool never = false;
  746. if (never) {
  747. while (*ptr++)
  748. std::cout << *ptr;
  749. }
  750. char* volatile* volatile opaque;
  751. opaque = &ptr;
  752. return *opaque;
  753. }
  754. int main()
  755. {
  756. union {
  757. char data[16];
  758. vector unsigned char align;
  759. };
  760. char* p = data;
  761. p = prevent_optimization(p);
  762. vector unsigned char v = vec_ld(0, (unsigned char*)p);
  763. v = vec_add(v, v);
  764. vec_st(v, 0, (unsigned char*)p);
  765. p = prevent_optimization(p);
  766. }"
  767. )
  768. list(APPEND POTHOS_ARCHS_PRI "POWER_VSX_206")
  769. set(POTHOS_POWER_VSX_206_CXX_FLAGS "-mvsx")
  770. set(POTHOS_POWER_VSX_206_DEFINE "POTHOS_ARCH_POWER_VSX_206")
  771. set(POTHOS_POWER_VSX_206_SUFFIX "-power_vsx_2.06")
  772. set(POTHOS_POWER_VSX_206_TEST_CODE
  773. "#include <altivec.h>
  774. #include <iostream>
  775. #if defined(__GNUC__) && (__GNUC__ < 6) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  776. #if !defined(__INTEL_COMPILER) && !defined(__clang__)
  777. #error GCC 5.0 and older are not supported on PPC little-endian.
  778. #endif
  779. #endif
  780. #if defined(__GNUC__) && (__GNUC__ < 6) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  781. #if !defined(__INTEL_COMPILER) && !defined(__clang__)
  782. // Internal compiler errors or wrong behaviour on various SIMD memory operations
  783. #error GCC 5.x and older not supported on VSX big-endian.
  784. #endif
  785. #endif
  786. char* prevent_optimization(char* ptr)
  787. {
  788. volatile bool never = false;
  789. if (never) {
  790. while (*ptr++)
  791. std::cout << *ptr;
  792. }
  793. char* volatile* volatile opaque;
  794. opaque = &ptr;
  795. return *opaque;
  796. }
  797. int main()
  798. {
  799. union {
  800. char data[16];
  801. vector unsigned char align;
  802. };
  803. char* p = data;
  804. p = prevent_optimization(p);
  805. vector unsigned char v = vec_vsx_ld(0, (unsigned char*)p);
  806. v = vec_add(v, v);
  807. vec_vsx_st(v, 0, (unsigned char*)p);
  808. p = prevent_optimization(p);
  809. }"
  810. )
  811. list(APPEND POTHOS_ARCHS_PRI "POWER_VSX_207")
  812. set(POTHOS_POWER_VSX_207_CXX_FLAGS "-mvsx -mcpu=power8")
  813. set(POTHOS_POWER_VSX_207_DEFINE "POTHOS_ARCH_POWER_VSX_207")
  814. set(POTHOS_POWER_VSX_207_SUFFIX "-power_vsx_2.07")
  815. set(POTHOS_POWER_VSX_207_TEST_CODE
  816. "#include <altivec.h>
  817. #include <iostream>
  818. #if defined(__GNUC__) && (__GNUC__ < 6) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  819. #if !defined(__INTEL_COMPILER) && !defined(__clang__)
  820. #error GCC 5.0 and older are not supported on PPC little-endian.
  821. #endif
  822. #endif
  823. #if defined(__GNUC__) && (__GNUC__ < 6) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  824. #if !defined(__INTEL_COMPILER) && !defined(__clang__)
  825. #error GCC 5.x and older not supported on VSX big-endian.
  826. #endif
  827. #endif
  828. char* prevent_optimization(char* ptr)
  829. {
  830. volatile bool never = false;
  831. if (never) {
  832. while (*ptr++)
  833. std::cout << *ptr;
  834. }
  835. char* volatile* volatile opaque;
  836. opaque = &ptr;
  837. return *opaque;
  838. }
  839. int main()
  840. {
  841. union {
  842. char data[16];
  843. vector unsigned char align;
  844. };
  845. char* p = data;
  846. p = prevent_optimization(p);
  847. vector unsigned char v = vec_vsx_ld(0, (unsigned char*)p);
  848. v = vec_vpopcnt(v);
  849. vec_vsx_st(v, 0, (unsigned char*)p);
  850. p = prevent_optimization(p);
  851. }"
  852. )
  853. set(POTHOS_ARCHS "${POTHOS_ARCHS_PRI};${POTHOS_ARCHS_SEC}")
  854. # ------------------------------------------------------------------------------
  855. # Given one arch, returns compilation flags and an unique identifier (internal)
  856. # If the given architecture does not exist, sets both result variables to ""
  857. #
  858. # Arguments:
  859. #
  860. # - CXX_FLAGS_VAR: the name of a variable to store the compilation flags to
  861. #
  862. # - DEFINES_LIST_VAR: the name of a variable to store comma defimited list of
  863. # preprocessor defines for the current architecture.
  864. #
  865. # - UNIQUE_ID_VAR: the name of a variable to store the unique identifier to
  866. #
  867. # - ARCH: an architecture
  868. #
  869. function(pothos_get_arch_info CXX_FLAGS_VAR DEFINES_LIST_VAR UNIQUE_ID_VAR ARCH)
  870. set(UNIQUE_ID "")
  871. set(CXX_FLAGS "")
  872. set(DISPATCH_FLAGS "")
  873. set(DEFINES_LIST "")
  874. string(REPLACE "," ";" ARCH_IDS "${ARCH}")
  875. list(SORT ARCH_IDS)
  876. foreach(ID ${ARCH_IDS})
  877. if(${ID} STREQUAL "NONE_NULL")
  878. set(UNIQUE_ID "${UNIQUE_ID}-fallback")
  879. else()
  880. list(FIND POTHOS_ARCHS "${ID}" FOUND)
  881. if(NOT ${FOUND} EQUAL -1)
  882. list(APPEND DEFINES_LIST "${POTHOS_${ID}_DEFINE}")
  883. set(CXX_FLAGS "${CXX_FLAGS} ${POTHOS_${ID}_CXX_FLAGS} -D${POTHOS_${ID}_DEFINE}")
  884. set(UNIQUE_ID "${UNIQUE_ID}${POTHOS_${ID}_SUFFIX}")
  885. endif()
  886. endif()
  887. endforeach()
  888. string(REPLACE ";" "," DEFINES_LIST "${DEFINES_LIST}")
  889. set(${CXX_FLAGS_VAR} "${CXX_FLAGS}" PARENT_SCOPE)
  890. set(${UNIQUE_ID_VAR} "${UNIQUE_ID}" PARENT_SCOPE)
  891. set(${DEFINES_LIST_VAR} "${DEFINES_LIST}" PARENT_SCOPE)
  892. endfunction()
  893. # ------------------------------------------------------------------------------
  894. #
  895. # pothos_multiarch(FILE_LIST_VAR SRC_FILE [ARCH...])
  896. #
  897. # A function that encapsulates the generation of build rules for libpothos
  898. # multi-architecture source files. The function creates a copy of @a SRC_FILE
  899. # for each supplied architecture definition. Each of these files is configured
  900. # with appropriate compile flags for the given architecture. The list of copied
  901. # files is appended to the variable supplied by @a FILE_LIST_VAR which can then
  902. # be used in add_library or add_executable calls.
  903. #
  904. # All copied files are placed in the build directory. The directory of
  905. # @a SRC_FILE is added to the default include paths.
  906. #
  907. # Arguments:
  908. #
  909. # * FILE_LIST_VAR: the name of the variable to append the list of generated
  910. # files to
  911. #
  912. # * ARCHSTRING_VAR: the name of the variable to set the list of arches to for
  913. # PothosUtil
  914. #
  915. # * SRC_FILE: the name of the source file relative to the @a
  916. # CMAKE_CURRENT_SOURCE_DIR
  917. #
  918. # * ARCH...: a list of architecture definitions. Each architecture definition
  919. # consist of comma separated list of identifiers directly corresponding to
  920. # macros defined in simdpp/simd.h, which ultimately identify instruction set
  921. # features. The user of the function must ensure that sensible combination of
  922. # identifiers is supplied.
  923. #
  924. # The following identifiers are currently supported:
  925. # X86_SSE2, X86_SSE3, X86_SSSE3, X86_SSE4_1,
  926. # X86_AVX, X86_AVX2, X86_FMA3, X86_FMA4,
  927. # X86_AVX512F, X86_AVX512BW, X86_AVX512DQ, X86_AVX512VL, X86_XOP,
  928. # ARM_NEON, ARM_NEON_FLT_SP, ARM64_NEON,
  929. # MIPS_MSA, POWER_ALTIVEC, POWER_VSX_206, POWER_VSX_207
  930. #
  931. function(pothos_multiarch FILE_LIST_VAR ARCHSTRING_VAR SRC_FILE)
  932. if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${SRC_FILE}")
  933. message(FATAL_ERROR "File \"${SRC_FILE}\" does not exist")
  934. endif()
  935. get_filename_component(SRC_PATH "${SRC_FILE}" PATH)
  936. get_filename_component(SRC_NAME "${SRC_FILE}" NAME_WE)
  937. get_filename_component(SRC_EXT "${SRC_FILE}" EXT)
  938. set(FILE_LIST "")
  939. set(SUFFIXES "")
  940. list(APPEND ARCHS ${ARGV})
  941. list(REMOVE_AT ARCHS 0 1 2) # strip non-arch parameters
  942. foreach(ARCH ${ARCHS})
  943. pothos_get_arch_info(CXX_FLAGS DEFINES_LIST SUFFIX ${ARCH})
  944. # Shorter way of removing first character
  945. string(REGEX REPLACE "^-" "" SUFFIX ${SUFFIX})
  946. # Hash and truncate the string to shorten the output filepath
  947. string(REPLACE "-" "__" namespace ${SUFFIX})
  948. list(APPEND SUFFIXES ${SUFFIX})
  949. string(MD5 suffixhash ${SUFFIX})
  950. string(SUBSTRING ${suffixhash} 0 6 suffixhash)
  951. # The space is necessary, or for some reason, the flag will be prepended to the next.
  952. set(CXX_FLAGS "-I\"${CMAKE_CURRENT_SOURCE_DIR}/${SRC_PATH}\" ${CXX_FLAGS} -DPOTHOS_SIMD_NAMESPACE=${namespace} ")
  953. if(NOT "${SUFFIX}" STREQUAL "")
  954. # Copy the source file and add the required flags
  955. set(DST_ABS_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SRC_PATH}/${SRC_NAME}-${suffixhash}${SRC_EXT}")
  956. set(SRC_ABS_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${SRC_FILE}")
  957. # CMake does not support adding per-source-file include directories.
  958. # Also when CXX_FLAGS is used for this purpose, CMake does not add
  959. # local includes as the dependencies of the result object file thus
  960. # does not rebuild the file when these included files are changed.
  961. # The work around is to use add_custom_command with IMPLICIT_DEPENDS
  962. # option which only works on make-based systems
  963. add_custom_command(OUTPUT "${DST_ABS_FILE}"
  964. COMMAND ${CMAKE_COMMAND} -E copy "${SRC_ABS_FILE}" "${DST_ABS_FILE}"
  965. COMMENT "Generating ${SRC_FILE} ${SUFFIX} implementation"
  966. IMPLICIT_DEPENDS CXX "${SRC_ABS_FILE}")
  967. list(APPEND FILE_LIST "${DST_ABS_FILE}")
  968. set_source_files_properties("${DST_ABS_FILE}" PROPERTIES COMPILE_FLAGS ${CXX_FLAGS}
  969. GENERATED TRUE)
  970. endif()
  971. endforeach()
  972. # Equivalent of string(JOIN ...), which is too recent for us
  973. foreach(suffix ${SUFFIXES})
  974. set(ARCHSTRING "${ARCHSTRING},${suffix}")
  975. endforeach()
  976. string(REPLACE "-" "__" ARCHSTRING ${ARCHSTRING})
  977. string(SUBSTRING ${ARCHSTRING} 1 -1 ARCHSTRING)
  978. set(${ARCHSTRING_VAR} ${ARCHSTRING} PARENT_SCOPE)
  979. set(RECV_FILE_LIST ${${FILE_LIST_VAR}})
  980. list(APPEND RECV_FILE_LIST ${FILE_LIST})
  981. set(${FILE_LIST_VAR} ${RECV_FILE_LIST} PARENT_SCOPE)
  982. endfunction()
  983. # ------------------------------------------------------------------------------
  984. # Given a list of archs, return all possible permutations of them (internal)
  985. #
  986. # Arguments:
  987. #
  988. # - ALL_ARCHS_VAL: the name of the variable to store the permutation to
  989. #
  990. # - ARCH...: a list of supported architectures
  991. function(pothos_get_arch_perm ALL_ARCHS_VAR)
  992. list(APPEND ARCHS ${ARGV})
  993. list(REMOVE_AT ARCHS 0)
  994. foreach(ARCH ${ARCHS})
  995. set(ARCH_SUPPORTED_${ARCH} "1")
  996. endforeach()
  997. set(ALL_ARCHS "NONE_NULL")
  998. if(DEFINED ARCH_SUPPORTED_X86_SSE2)
  999. # all x86_64 processors
  1000. list(APPEND ALL_ARCHS "X86_SSE2")
  1001. endif()
  1002. if(DEFINED ARCH_SUPPORTED_X86_SSE3)
  1003. # Since Prescott, Merom (Core architecture)
  1004. # Since later K8 steppings, fully supported since K10
  1005. list(APPEND ALL_ARCHS "X86_SSE3")
  1006. endif()
  1007. if(DEFINED ARCH_SUPPORTED_X86_SSSE3)
  1008. # Since Merom (Core architecture)
  1009. # Since Bobcat and Bulldozer
  1010. list(APPEND ALL_ARCHS "X86_SSSE3")
  1011. endif()
  1012. if(DEFINED ARCH_SUPPORTED_X86_SSE4_1)
  1013. # Since Penryl (45 nm Merom shrink)
  1014. # Since Bulldozer
  1015. list(APPEND ALL_ARCHS "X86_SSE4_1")
  1016. endif()
  1017. if(DEFINED ARCH_SUPPORTED_X86_POPCNT_INSN)
  1018. # Since Nehalem and K10.
  1019. # NOTE: These two architectures are the only that support popcnt and
  1020. # don't support AVX. There's no full overlap of the instruction set
  1021. # support in these architectures, thus these two separate configs were
  1022. # omitted from the default instruction set matrix.
  1023. endif()
  1024. if(DEFINED ARCH_SUPPORTED_X86_AVX)
  1025. # Since Sandy Bridge, Bulldozer, Jaguar
  1026. list(APPEND ALL_ARCHS "X86_AVX,X86_POPCNT_INSN")
  1027. if(DEFINED ARCH_SUPPORTED_X86_FMA3)
  1028. # Since Haswell, Piledriver (later Bulldozer variant)
  1029. # All CPUs in this range support popcnt
  1030. endif()
  1031. if(DEFINED ARCH_SUPPORTED_X86_FMA4)
  1032. # Since Bulldozer until Zen. Jaguar does not support FMA4 nor FMA3
  1033. # All CPUs in this range support popcnt
  1034. list(APPEND ALL_ARCHS "X86_AVX,X86_FMA4,X86_POPCNT_INSN")
  1035. endif()
  1036. endif()
  1037. if(DEFINED ARCH_SUPPORTED_X86_AVX2)
  1038. # Since Haswell and Zen
  1039. # All Intel and AMD CPUs that support AVX2 also support FMA3 and POPCNT,
  1040. # thus separate X86_AVX2 config is not needed.
  1041. if(DEFINED ARCH_SUPPORTED_X86_FMA3)
  1042. list(APPEND ALL_ARCHS "X86_AVX2,X86_FMA3,X86_POPCNT_INSN")
  1043. endif()
  1044. endif()
  1045. if(DEFINED ARCH_SUPPORTED_X86_FMA3)
  1046. # Since Haswell, Piledriver (later Bulldozer variant)
  1047. # All Intel and AMD CPUs that support FMA3 also support AVX, thus
  1048. # separate X86_FMA3 config is not needed
  1049. endif()
  1050. if(DEFINED ARCH_SUPPORTED_X86_FMA4)
  1051. # Since Bulldozer until Zen
  1052. # All AMD CPUs that support FMA4 also support AVX, thus
  1053. # separate X86_FMA4 config is not needed
  1054. endif()
  1055. if(DEFINED ARCH_SUPPORTED_X86_AVX512F)
  1056. # Since Knights Landing, Skylake-X
  1057. # All Intel CPUs that support AVX512F also support FMA3 and POPCNT,
  1058. # thus separate X86_512F config is not needed.
  1059. if(DEFINED ARCH_SUPPORTED_X86_AVX512BW)
  1060. if(DEFINED ARCH_SUPPORTED_X86_AVX512DQ)
  1061. if(DEFINED ARCH_SUPPORTED_X86_AVX512VL)
  1062. # All Intel processors that support AVX512BW also support
  1063. # AVX512DQ and AVX512VL
  1064. list(APPEND ALL_ARCHS "X86_AVX512BW,X86_AVX512DQ,X86_AVX512VL")
  1065. endif()
  1066. endif()
  1067. else()
  1068. list(APPEND ALL_ARCHS "X86_AVX512F,X86_FMA3,X86_POPCNT_INSN")
  1069. endif()
  1070. endif()
  1071. if(DEFINED ARCH_SUPPORTED_X86_XOP)
  1072. list(APPEND ALL_ARCHS "X86_XOP")
  1073. if(DEFINED ARCH_SUPPORTED_X86_AVX)
  1074. list(APPEND ALL_ARCHS "X86_AVX,X86_XOP")
  1075. endif()
  1076. endif()
  1077. if(DEFINED ARCH_SUPPORTED_ARM_NEON)
  1078. list(APPEND ALL_ARCHS "ARM_NEON")
  1079. list(APPEND ALL_ARCHS "ARM_NEON_FLT_SP")
  1080. endif()
  1081. if(DEFINED ARCH_SUPPORTED_ARM64_NEON)
  1082. list(APPEND ALL_ARCHS "ARM64_NEON")
  1083. endif()
  1084. if(DEFINED ARCH_SUPPORTED_MIPS_MSA)
  1085. list(APPEND ALL_ARCHS "MIPS_MSA")
  1086. endif()
  1087. if(DEFINED ARCH_SUPPORTED_POWER_ALTIVEC)
  1088. list(APPEND ALL_ARCHS "POWER_ALTIVEC")
  1089. endif()
  1090. if(DEFINED ARCH_SUPPORTED_POWER_VSX_206)
  1091. list(APPEND ALL_ARCHS "POWER_VSX_206")
  1092. endif()
  1093. if(DEFINED ARCH_SUPPORTED_POWER_VSX_207)
  1094. list(APPEND ALL_ARCHS "POWER_VSX_207")
  1095. endif()
  1096. set(${ALL_ARCHS_VAR} "${ALL_ARCHS}" PARENT_SCOPE)
  1097. endfunction()
  1098. # ------------------------------------------------------------------------------
  1099. #
  1100. # pothos_get_compilable_archs(ARCH_LIST_VAR)
  1101. #
  1102. # Returns a list of architectures that are supported by the current build
  1103. # system. The generated list may be used as an argument to pothos_multiarch.
  1104. #
  1105. # Arguments:
  1106. #
  1107. # * ARCH_LIST_VAR: the name of the variable to put the architecture list to
  1108. #
  1109. function(pothos_get_compilable_archs ARCH_LIST_VAR)
  1110. foreach(ARCH ${POTHOS_ARCHS_PRI})
  1111. set(CMAKE_REQUIRED_FLAGS "${POTHOS_${ARCH}_CXX_FLAGS}")
  1112. check_cxx_source_compiles("${POTHOS_${ARCH}_TEST_CODE}" CAN_COMPILE_${ARCH})
  1113. if(CAN_COMPILE_${ARCH})
  1114. list(APPEND ARCHS ${ARCH})
  1115. endif()
  1116. endforeach()
  1117. pothos_get_arch_perm(ALL_ARCHS "${ARCHS}")
  1118. set(${ARCH_LIST_VAR} "${ALL_ARCHS}" PARENT_SCOPE)
  1119. endfunction()
  1120. # ------------------------------------------------------------------------------
  1121. #
  1122. # pothos_get_runnable_archs(ARCH_LIST_VAR)
  1123. #
  1124. # Returns a list of architectures that are supported by the current build
  1125. # system and the processor. The generated list may be used as an argument to
  1126. # pothos_multiarch.
  1127. #
  1128. # Arguments:
  1129. #
  1130. # * ARCH_LIST_VAR: the name of the variable to put the architecture list to
  1131. #
  1132. function(pothos_get_runnable_archs ARCH_LIST_VAR)
  1133. foreach(ARCH ${POTHOS_ARCHS_PRI})
  1134. set(CMAKE_REQUIRED_FLAGS "${POTHOS_${ARCH}_CXX_FLAGS}")
  1135. check_cxx_source_runs("${POTHOS_${ARCH}_TEST_CODE}" CAN_RUN_${ARCH})
  1136. if(CAN_RUN_${ARCH})
  1137. list(APPEND ARCHS ${ARCH})
  1138. endif()
  1139. endforeach()
  1140. pothos_get_arch_perm(ALL_ARCHS "${ARCHS}")
  1141. set(${ARCH_LIST_VAR} "${ALL_ARCHS}" PARENT_SCOPE)
  1142. endfunction()
  1143. # ------------------------------------------------------------------------------
  1144. # This is the entry point for Pothos modules to invoke this functionality.
  1145. #
  1146. # Arguments:
  1147. #
  1148. # * FileListVariable: the name of the file to put the list of arch-specific files
  1149. # into
  1150. # * JSONInputFile: the path to a JSON file to pass into PothosUtil to generate the
  1151. # dynamic dispatch header file
  1152. #
  1153. function(PothosGenerateSIMDSources FileListVariable JSONInputFile)
  1154. set(SIMDSourceFiles ${ARGV})
  1155. list(REMOVE_AT SIMDSourceFiles 0 1) # Remove non-source parameters
  1156. pothos_get_compilable_archs(SIMDBuildArchs)
  1157. foreach(SrcFile ${SIMDSourceFiles})
  1158. set(SingleFileSIMDSources "")
  1159. pothos_multiarch(SingleFileSIMDSources ArchString ${SrcFile} ${SIMDBuildArchs})
  1160. list(APPEND TempFileList ${SingleFileSIMDSources})
  1161. endforeach()
  1162. # Convert to relative path so PothosUtil will accept the path
  1163. foreach(AbsPath ${TempFileList})
  1164. file(RELATIVE_PATH RelPath ${CMAKE_CURRENT_SOURCE_DIR} ${AbsPath})
  1165. list(APPEND FileList ${RelPath})
  1166. endforeach()
  1167. get_filename_component(JSONInputFilename ${JSONInputFile} NAME_WE)
  1168. get_filename_component(JSONInputFileAbsolute ${JSONInputFile} ABSOLUTE)
  1169. set(outputHeaderPath ${CMAKE_CURRENT_BINARY_DIR}/${JSONInputFilename}_SIMD.hpp)
  1170. add_custom_command(
  1171. OUTPUT ${outputHeaderPath}
  1172. COMMENT "Generating ${JSONInputFilename} SIMD dynamic dispatchers"
  1173. COMMAND ${POTHOS_UTIL_EXE} --simd-arches=${ArchString} --output=${outputHeaderPath} --generate-simd-dispatchers=${JSONInputFileAbsolute}
  1174. DEPENDS ${JSONInputFileAbsolute})
  1175. add_custom_target(${JSONInputFilename}_SIMDDispatcher DEPENDS ${outputHeaderPath})
  1176. set(${FileListVariable} ${FileList} PARENT_SCOPE)
  1177. endfunction()