-rw-r--r-- 1707 nttcompiler-20220411/command/512-speed.c
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include "ntt_512.h"
#include "ntt_cpucycles.h"
#define cpucycles ntt_cpucycles
typedef int16_t int16;
#define ALIGN __attribute((aligned(32)))
ALIGN int16 base[512];
ALIGN int16 M[512*512];
ALIGN int16 f[512*512];
ALIGN int16 g[512*512];
long long qlist[2] = {7681,10753};
void (*nttlist[2])(int16*,long long) = {ntt_512_7681,ntt_512_10753};
void (*invnttlist[2])(int16*,long long) = {ntt_512_7681_inv,ntt_512_10753_inv};
int main()
{
printf("ntt_512_implementation %s\n",ntt_512_implementation);
printf("ntt_512_version %s\n",ntt_512_version);
printf("ntt_512_compiler %s\n",ntt_512_compiler);
printf("ntt_cpucycles_implementation %s\n",ntt_cpucycles_implementation);
fflush(stdout);
for (long long qpos = 0;qpos < 2;++qpos) {
long long q = qlist[qpos];
void (*ntt)(int16*,long long) = nttlist[qpos];
void (*invntt)(int16*,long long) = invnttlist[qpos];
printf("q %lld\n",q);
fflush(stdout);
for (long long j = 0;j < 512*512;++j)
M[j] = 0;
for (long long reps = 0;reps < 8;++reps) {
long long t[31];
for (long long i = 0;i < 15;++i) {
t[i] = cpucycles();
ntt(M,reps);
}
printf("ntt512*%lld:",reps);
for (long long i = 0;i < 14;++i)
printf(" %lld",t[i+1]-t[i]);
printf("\n");
}
for (long long reps = 0;reps < 8;++reps) {
long long t[31];
for (long long i = 0;i < 15;++i) {
t[i] = cpucycles();
invntt(M,reps);
}
printf("invntt512*%lld:",reps);
for (long long i = 0;i < 14;++i)
printf(" %lld",t[i+1]-t[i]);
printf("\n");
}
}
return 0;
}