93 lines
2.2 KiB
C
93 lines
2.2 KiB
C
|
/* { dg-do run } */
|
||
|
/* { dg-options "-O3 -save-temps" } */
|
||
|
#include <stdint.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#pragma GCC target "+nosve"
|
||
|
|
||
|
#define ARR_SIZE 1024
|
||
|
|
||
|
/* Should produce an uaddl */
|
||
|
void uadd_opt (uint32_t *foo, uint16_t *a, uint16_t *b)
|
||
|
{
|
||
|
for( int i = 0; i < ARR_SIZE - 3;i=i+4)
|
||
|
{
|
||
|
foo[i] = a[i] + b[i];
|
||
|
foo[i+1] = a[i+1] + b[i+1];
|
||
|
foo[i+2] = a[i+2] + b[i+2];
|
||
|
foo[i+3] = a[i+3] + b[i+3];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
__attribute__((optimize (0)))
|
||
|
void uadd_nonopt (uint32_t *foo, uint16_t *a, uint16_t *b)
|
||
|
{
|
||
|
for( int i = 0; i < ARR_SIZE - 3;i=i+4)
|
||
|
{
|
||
|
foo[i] = a[i] + b[i];
|
||
|
foo[i+1] = a[i+1] + b[i+1];
|
||
|
foo[i+2] = a[i+2] + b[i+2];
|
||
|
foo[i+3] = a[i+3] + b[i+3];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Should produce an saddl */
|
||
|
void sadd_opt (int32_t *foo, int16_t *a, int16_t *b)
|
||
|
{
|
||
|
for( int i = 0; i < ARR_SIZE - 3;i=i+4)
|
||
|
{
|
||
|
foo[i] = a[i] + b[i];
|
||
|
foo[i+1] = a[i+1] + b[i+1];
|
||
|
foo[i+2] = a[i+2] + b[i+2];
|
||
|
foo[i+3] = a[i+3] + b[i+3];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
__attribute__((optimize (0)))
|
||
|
void sadd_nonopt (int32_t *foo, int16_t *a, int16_t *b)
|
||
|
{
|
||
|
for( int i = 0; i < ARR_SIZE - 3;i=i+4)
|
||
|
{
|
||
|
foo[i] = a[i] + b[i];
|
||
|
foo[i+1] = a[i+1] + b[i+1];
|
||
|
foo[i+2] = a[i+2] + b[i+2];
|
||
|
foo[i+3] = a[i+3] + b[i+3];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
void __attribute__((optimize (0)))
|
||
|
init(uint16_t *a, uint16_t *b)
|
||
|
{
|
||
|
for( int i = 0; i < ARR_SIZE;i++)
|
||
|
{
|
||
|
a[i] = i;
|
||
|
b[i] = 2*i;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int __attribute__((optimize (0)))
|
||
|
main()
|
||
|
{
|
||
|
uint32_t foo_arr[ARR_SIZE];
|
||
|
uint32_t bar_arr[ARR_SIZE];
|
||
|
uint16_t a[ARR_SIZE];
|
||
|
uint16_t b[ARR_SIZE];
|
||
|
|
||
|
init(a, b);
|
||
|
uadd_opt(foo_arr, a, b);
|
||
|
uadd_nonopt(bar_arr, a, b);
|
||
|
if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0)
|
||
|
return 1;
|
||
|
sadd_opt((int32_t*) foo_arr, (int16_t*) a, (int16_t*) b);
|
||
|
sadd_nonopt((int32_t*) bar_arr, (int16_t*) a, (int16_t*) b);
|
||
|
if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0)
|
||
|
return 1;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* { dg-final { scan-assembler-times {\tuaddl\t} 1} } */
|
||
|
/* { dg-final { scan-assembler-times {\tuaddl2\t} 1} } */
|
||
|
/* { dg-final { scan-assembler-times {\tsaddl\t} 1} } */
|
||
|
/* { dg-final { scan-assembler-times {\tsaddl2\t} 1} } */
|