-rw-r--r-- 651 saferewrite-20260201/src/int32x8_loadtail_8/maskmov/load.c raw
#include <inttypes.h>
#include <immintrin.h>
#include "m.h"
// have an array x[0]...x[n-1] with n >= 8
// want to load x[m]...x[m+7], but only what fits within x[0]...x[n-1]
// setting remaining words to c
void int32x8_loadtail(int32_t *y,const int32_t *x,long long n,int32_t c)
{
int32_t mask[8];
long long i;
for (i = 0;i < 8;++i) mask[i] = 0;
for (i = 0;i < 8 && m+i < n;++i) mask[i] = -1;
__m256i maskvec = _mm256_loadu_si256((__m256i *) mask);
__m256i xpart = _mm256_maskload_epi32(x+m,maskvec);
__m256i cvec = _mm256_set1_epi32(c);
__m256i xc = _mm256_blendv_epi8(cvec,xpart,maskvec);
_mm256_storeu_si256((__m256i *) y,xc);
}