ubuntu-buildroot/output/build/glibc-2.36-81-g4f4d7a13edfd.../sysdeps/x86_64/fpu/Makefile

ifeq ($(subdir),mathvec)
libmvec-double-func-list = \
  2_core \
  4_core \
  4_core_avx \
  8_core
libmvec-float-func-list = \
  f4_core \
  f8_core \
  f8_core_avx \
  f16_core
libmvec-support += \
  svml_d_exp_data \
  svml_d_log_data \
  svml_d_pow_data \
  svml_d_trig_data \
  svml_s_expf_data \
  svml_s_logf_data \
  svml_s_powf_data \
  svml_s_trig_data \
  $(foreach l,$(libmvec-double-func-list), \
    $(addprefix svml_d_,$(addsuffix $(l),$(libmvec-funcs)))) \
  $(foreach l,$(libmvec-float-func-list), \
    $(addprefix svml_s_,$(addsuffix $(l),$(libmvec-funcs))))
endif

# Do not run libmvec tests if multiarch not enabled.
ifneq ($(multi-arch),no)
# Variables for libmvec tests.
ifeq ($(subdir)$(build-mathvec),mathyes)
libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \
		 float-vlen4 float-vlen8 float-vlen8-avx2 \
		 double-vlen8 float-vlen16
tests += \
  $(libmvec-abi-func-tests) \
  $(libmvec-abi-func-avx-tests) \
  $(libmvec-abi-func-avx2-tests) \
  $(libmvec-abi-func-avx512f-tests)

double-vlen2-funcs = $(libmvec-funcs)
double-vlen4-funcs = $(libmvec-funcs)
double-vlen4-avx2-funcs = $(libmvec-funcs)
double-vlen8-funcs = $(libmvec-funcs)
float-vlen4-funcs = $(libmvec-funcs)
float-vlen8-funcs = $(libmvec-funcs)
float-vlen8-avx2-funcs = $(libmvec-funcs)
float-vlen16-funcs = $(libmvec-funcs)

double-vlen4-arch-ext-cflags = -mavx
double-vlen4-arch-ext2-cflags = -mavx2
double-vlen8-arch-ext-cflags = -mavx512f

float-vlen8-arch-ext-cflags = -mavx
float-vlen8-arch-ext2-cflags = -mavx2
float-vlen16-arch-ext-cflags = -mavx512f

libmvec-abi-test-cflags = \
  $(libm-test-fast-math-cflags) \
  -fno-inline -fopenmp -Wno-unknown-pragmas

CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags)

CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags)
endif

ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes)
# When compiled with -O3 -march=skylake, GCC 8 and 9 optimize some loops
# in branred.c with 256-bit vector instructions, which leads to store
# forward stall:
#
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579
#
# Limit vector width to 128 bits to work around this issue.  It improves
# performance of sin and cos by more than 40% on Skylake.
CFLAGS-branred.c = -mprefer-vector-width=128
endif

ifeq ($(subdir)$(build-mathvec),benchtestsyes)
double-vlen4-arch-ext-cflags = -mavx
double-vlen4-arch-ext2-cflags = -mavx2
double-vlen8-arch-ext-cflags = -mavx512f

float-vlen8-arch-ext-cflags = -mavx
float-vlen8-arch-ext2-cflags = -mavx2
float-vlen16-arch-ext-cflags = -mavx512f

bench-libmvec := $(bench-libmvec-double) $(bench-libmvec-float)

ifeq (${STATIC-BENCHTESTS},yes)
libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a $(common-objpfx)math/libm.a
else
libmvec-benchtests = $(libmvec) $(libm)
endif

$(addprefix $(objpfx)bench-,$(bench-libmvec-double)): $(libmvec-benchtests)
$(addprefix $(objpfx)bench-,$(bench-libmvec-float)): $(libmvec-benchtests)
bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c bench-timing.h Makefile

$(objpfx)bench-float-%.c: $(bench-libmvec-deps)
	{ if [ -n "$($*-INCLUDE)" ]; then \
	  cat $($*-INCLUDE); \
	fi; \
	$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp
	mv -f $@-tmp $@

$(objpfx)bench-double-%.c: $(bench-libmvec-deps)
	{ if [ -n "$($*-INCLUDE)" ]; then \
	  cat $($*-INCLUDE); \
	fi; \
	$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp
	mv -f $@-tmp $@
endif
endif
1 2024-04-01 15:19:46 +00:00			`ifeq ($(subdir),mathvec)`
			`libmvec-double-func-list = \`
			`2_core \`
			`4_core \`
			`4_core_avx \`
			`8_core`
			`libmvec-float-func-list = \`
			`f4_core \`
			`f8_core \`
			`f8_core_avx \`
			`f16_core`
			`libmvec-support += \`
			`svml_d_exp_data \`
			`svml_d_log_data \`
			`svml_d_pow_data \`
			`svml_d_trig_data \`
			`svml_s_expf_data \`
			`svml_s_logf_data \`
			`svml_s_powf_data \`
			`svml_s_trig_data \`
			`$(foreach l,$(libmvec-double-func-list), \`
			`$(addprefix svml_d_,$(addsuffix $(l),$(libmvec-funcs)))) \`
			`$(foreach l,$(libmvec-float-func-list), \`
			`$(addprefix svml_s_,$(addsuffix $(l),$(libmvec-funcs))))`
			`endif`

			`# Do not run libmvec tests if multiarch not enabled.`
			`ifneq ($(multi-arch),no)`
			`# Variables for libmvec tests.`
			`ifeq ($(subdir)$(build-mathvec),mathyes)`
			`libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \`
			`float-vlen4 float-vlen8 float-vlen8-avx2 \`
			`double-vlen8 float-vlen16`
			`tests += \`
			`$(libmvec-abi-func-tests) \`
			`$(libmvec-abi-func-avx-tests) \`
			`$(libmvec-abi-func-avx2-tests) \`
			`$(libmvec-abi-func-avx512f-tests)`

			`double-vlen2-funcs = $(libmvec-funcs)`
			`double-vlen4-funcs = $(libmvec-funcs)`
			`double-vlen4-avx2-funcs = $(libmvec-funcs)`
			`double-vlen8-funcs = $(libmvec-funcs)`
			`float-vlen4-funcs = $(libmvec-funcs)`
			`float-vlen8-funcs = $(libmvec-funcs)`
			`float-vlen8-avx2-funcs = $(libmvec-funcs)`
			`float-vlen16-funcs = $(libmvec-funcs)`

			`double-vlen4-arch-ext-cflags = -mavx`
			`double-vlen4-arch-ext2-cflags = -mavx2`
			`double-vlen8-arch-ext-cflags = -mavx512f`

			`float-vlen8-arch-ext-cflags = -mavx`
			`float-vlen8-arch-ext2-cflags = -mavx2`
			`float-vlen16-arch-ext-cflags = -mavx512f`

			`libmvec-abi-test-cflags = \`
			`$(libm-test-fast-math-cflags) \`
			`-fno-inline -fopenmp -Wno-unknown-pragmas`

			`CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags)`

			`CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags)`
			`endif`

			`ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes)`
			`# When compiled with -O3 -march=skylake, GCC 8 and 9 optimize some loops`
			`# in branred.c with 256-bit vector instructions, which leads to store`
			`# forward stall:`
			`#`
			`# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579`
			`#`
			`# Limit vector width to 128 bits to work around this issue. It improves`
			`# performance of sin and cos by more than 40% on Skylake.`
			`CFLAGS-branred.c = -mprefer-vector-width=128`
			`endif`

			`ifeq ($(subdir)$(build-mathvec),benchtestsyes)`
			`double-vlen4-arch-ext-cflags = -mavx`
			`double-vlen4-arch-ext2-cflags = -mavx2`
			`double-vlen8-arch-ext-cflags = -mavx512f`

			`float-vlen8-arch-ext-cflags = -mavx`
			`float-vlen8-arch-ext2-cflags = -mavx2`
			`float-vlen16-arch-ext-cflags = -mavx512f`

			`bench-libmvec := $(bench-libmvec-double) $(bench-libmvec-float)`

			`ifeq (${STATIC-BENCHTESTS},yes)`
			`libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a $(common-objpfx)math/libm.a`
			`else`
			`libmvec-benchtests = $(libmvec) $(libm)`
			`endif`

			`$(addprefix $(objpfx)bench-,$(bench-libmvec-double)): $(libmvec-benchtests)`
			`$(addprefix $(objpfx)bench-,$(bench-libmvec-float)): $(libmvec-benchtests)`
			`bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c bench-timing.h Makefile`

			`$(objpfx)bench-float-%.c: $(bench-libmvec-deps)`
			`{ if [ -n "$($*-INCLUDE)" ]; then \`
			`cat $($*-INCLUDE); \`
			`fi; \`
			`$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp`
			`mv -f $@-tmp $@`

			`$(objpfx)bench-double-%.c: $(bench-libmvec-deps)`
			`{ if [ -n "$($*-INCLUDE)" ]; then \`
			`cat $($*-INCLUDE); \`
			`fi; \`
			`$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp`
			`mv -f $@-tmp $@`
			`endif`
			`endif`