59 lines
1.6 KiB
C
59 lines
1.6 KiB
C
|
/* PR81833: This used to fail due to improper implementation of vec_msum. */
|
||
|
/* Test case relies on -mcpu=power7 or later. Currently we don't have
|
||
|
machinery to express that, so we have two separate tests for -mcpu=power7
|
||
|
and -mcpu=power8 to catch 32-bit BE on P7 and 64-bit BE/LE on P8. */
|
||
|
|
||
|
/* { dg-do run } */
|
||
|
/* { dg-require-effective-target p8vector_hw } */
|
||
|
/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
|
||
|
|
||
|
#include <altivec.h>
|
||
|
|
||
|
#define vec_u8 vector unsigned char
|
||
|
#define vec_s8 vector signed char
|
||
|
#define vec_u16 vector unsigned short
|
||
|
#define vec_s16 vector signed short
|
||
|
#define vec_u32 vector unsigned int
|
||
|
#define vec_s32 vector signed int
|
||
|
#define vec_f vector float
|
||
|
|
||
|
#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8 (0)
|
||
|
|
||
|
#define zero_u8v (vec_u8) zerov
|
||
|
#define zero_s8v (vec_s8) zerov
|
||
|
#define zero_u16v (vec_u16) zerov
|
||
|
#define zero_s16v (vec_s16) zerov
|
||
|
#define zero_u32v (vec_u32) zerov
|
||
|
#define zero_s32v (vec_s32) zerov
|
||
|
|
||
|
signed int __attribute__((noinline))
|
||
|
scalarproduct_int16_vsx (const signed short *v1, const signed short *v2,
|
||
|
int order)
|
||
|
{
|
||
|
int i;
|
||
|
LOAD_ZERO;
|
||
|
register vec_s16 vec1;
|
||
|
register vec_s32 res = vec_splat_s32 (0), t;
|
||
|
signed int ires;
|
||
|
|
||
|
for (i = 0; i < order; i += 8) {
|
||
|
vec1 = vec_vsx_ld (0, v1);
|
||
|
t = vec_msum (vec1, vec_vsx_ld (0, v2), zero_s32v);
|
||
|
res = vec_sums (t, res);
|
||
|
v1 += 8;
|
||
|
v2 += 8;
|
||
|
}
|
||
|
res = vec_splat (res, 3);
|
||
|
vec_ste (res, 0, &ires);
|
||
|
|
||
|
return ires;
|
||
|
}
|
||
|
|
||
|
int main(void)
|
||
|
{
|
||
|
const signed short test_vec[] = { 1, 1, 1, 1, 1, 1, 1, 1 };
|
||
|
if (scalarproduct_int16_vsx (test_vec, test_vec, 8) != 8)
|
||
|
__builtin_abort ();
|
||
|
return 0;
|
||
|
}
|