RISCV-V扩展执行速度测试
...大约 2 分钟RISC-V
测试函数
测试函数为riscv-v1.0扩展官方文档
void saxpy_c(size_t n, const float a, const float *x, float *y)
{
size_t i;
for (i = 0; i < n; i++)
{
y[i] = a * x[i] + y[i];
}
}
用汇编(v扩展)实现该函数saxpy.s
.global saxpy
saxpy:
vsetvli a4, a0, e32, m8,ta,ma
vle32.v v0, (a1)
sub a0, a0, a4
slli a4, a4, 2
add a1, a1, a4
vle32.v v8, (a2)
vfmacc.vf v8, fa0, v0
vse32.v v8, (a2)
add a2, a2, a4
bnez a0, saxpy
ret
主函数
普通实现
#include <stddef.h> #include <stdio.h> #include <time.h> void saxpy_c(size_t n, const float a, const float *x, float *y) { size_t i; for (i = 0; i < n; i++) { y[i] = a * x[i] + y[i]; } } int main() { size_t size = 1000; const float a = 1.0; int loop = 1000; float array1[size]; float array2[size]; for (int i = 0; i < size; i++) { array1[i] = i + 1; array2[i] = i + 1; } clock_t start, end; double duration; start = clock(); for (int i = 0; i < loop; i++) saxpy_c(size, a, array1, array2); end = clock(); printf("size: %d\n", size); printf("loop: %d\n", loop); printf("time: %dms\n", end - start); return 0; }
分别使用x86的编译器和risc-v的编译器编译并执行,测试执行时间
- x86
gcc callsaxpy.c -o test_x86
./test_x86
- riscv
riscv64-unknown-elf-gcc callsaxpy.c -o test_rv
spike --isa=RV64GCV pk test_rv
riscv-v扩展实现
#include <stddef.h> #include <stdio.h> #include <time.h> extern void saxpy(size_t n, const float a, const float *x, float *y); int main() { size_t size = 1000; const float a = 1.0; int loop = 1000; float array1[size]; float array2[size]; clock_t start, end; for (int i = 0; i < size; i++) { array1[i] = i + 1; array2[i] = i + 1; } //调用saxpy函数 start = clock(); for (int i = 0; i < loop; i++) saxpy(size, a, array1, array2); end = clock(); printf("size: %d\n", size); printf("loop: %d\n", loop); printf("time: %dms\n", end - start); return 0; }
spike --isa=RV64GCV pk test_rv_v
aa.png
深度测试
分别对不同数据量(10、100、1000)进行测试,为突出效果,每次循环1000遍。
size=10 | size=100 | size=1000 | |
---|---|---|---|
x86 | 23ms | 203ms | 2052ms |
RISCV | 270ms | 2430ms | 24030ms |
RISCV-V | 24ms | 54ms | 334ms |