-rw-r--r-- 679 saferewrite-20260201/src/int32x8_storetail_24/maskmov/store.c raw
#include <inttypes.h> #include <immintrin.h> #include "m.h" // have an array x[0]...x[n-1] with n >= 8 // want to store x[m]...x[m+7], but only what fits within x[0]...x[n-1] // data source: y[0]...y[7] void int32x8_storetail(int32_t *x,const int32_t *y,long long n) { int32_t mask[8]; long long i; for (i = 0;i < m+8;++i) x[i] = i+31415; for (i = 0;i < 8;++i) mask[i] = 0; for (i = 0;i < 8 && m+i < n;++i) mask[i] = -1; __m256i maskvec = _mm256_loadu_si256((__m256i *) mask); __m256i data = _mm256_loadu_si256((__m256i *) y); _mm256_maskstore_epi32(x+m,maskvec,data); for (i = 0;i < m;++i) x[i] = i+27182; // modeling caller storing earlier x items later }