-rw-r--r-- 891 saferewrite-20260201/src/int64x4_loadtail_64/perm/load.c raw
#include <inttypes.h> #include <immintrin.h> #include "crypto_int32.h" #include "crypto_int64.h" #include "m.h" // have an array x[0]...x[n-1] with n >= 4 // want to load x[m]...x[m+3], but only what fits within x[0]...x[n-1] // setting remaining words to c void int64x4_loadtail(int64_t *y,const int64_t *x,long long n,int64_t c) { n = crypto_int64_max(4,n); // no-op when caller properly guarantees n >= 4 long long pos = crypto_int64_min(m+4,n); __m256i xpart = _mm256_loadu_si256((__m256i *) (x+pos-4)); __m256i posvec = _mm256_set1_epi32(2*pos); __m256i mplus = _mm256_setr_epi32(2*m,2*m+1,2*m+2,2*m+3,2*m+4,2*m+5,2*m+6,2*m+7); __m256i diff = _mm256_sub_epi32(mplus,posvec); __m256i xrotate = _mm256_permutevar8x32_epi32(xpart,diff); __m256i cvec = _mm256_set1_epi64x(c); __m256i xc = _mm256_blendv_epi8(cvec,xrotate,diff); _mm256_storeu_si256((__m256i *) y,xc); }