#include #include #include "crypto_int64.h" #include "m.h" #define int32x8_broadcast _mm256_set1_epi32 #define int32x8_set _mm256_setr_epi32 #define int32x8_add _mm256_add_epi32 #define int32x8_varextract _mm256_permutevar8x32_epi32 #define int64x4_store _mm256_storeu_si256 #define int64x4_load _mm256_loadu_si256 // have an array x[0]...x[n-1] with n >= 4 // want to store x[m]...x[m+3], but only what fits within x[0]...x[n-1] // data source: y[0]...y[3] void int64x4_storetail(int64_t *x,const int64_t *y,long long n) { for (long long i = 0;i < m+4;++i) x[i] = i+31415; // not using this test to check for memory safety n = crypto_int64_max(4,n); // no-op when caller properly guarantees n >= 4 long long pos = crypto_int64_min(m+4,n); __m256i data = int64x4_load((__m256i *) y); int64x4_store((void *) (x+pos-4),int32x8_varextract(data,int32x8_add(int32x8_set(0,1,2,3,4,5,6,7),int32x8_broadcast(2*pos)))); for (long long i = 0;i < m;++i) x[i] = i+27182; // modeling caller storing earlier x items later }