/* ---------------------------------------------------------------------- * Project: CMSIS DSP Library * Title: arm_barycenter_f32.c * Description: Barycenter * * $Date: 23 April 2021 * $Revision: V1.9.0 * * Target Processor: Cortex-M and Cortex-A cores * -------------------------------------------------------------------- */ /* * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the License); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "dsp/support_functions.h" #include #include /** @ingroup barycenter */ /** * @brief Barycenter * * * @param[in] *in List of vectors * @param[in] *weights Weights of the vectors * @param[out] *out Barycenter * @param[in] nbVectors Number of vectors * @param[in] vecDim Dimension of space (vector dimension) * @return None * */ #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors, uint32_t vecDim) { const float32_t *pIn, *pW; const float32_t *pIn1, *pIn2, *pIn3, *pIn4; float32_t *pOut; uint32_t blkCntVector, blkCntSample; float32_t accum, w; blkCntVector = nbVectors; blkCntSample = vecDim; accum = 0.0f; pW = weights; pIn = in; arm_fill_f32(0.0f, out, vecDim); /* Sum */ pIn1 = pIn; pIn2 = pIn1 + vecDim; pIn3 = pIn2 + vecDim; pIn4 = pIn3 + vecDim; blkCntVector = nbVectors >> 2; while (blkCntVector > 0) { f32x4_t outV, inV1, inV2, inV3, inV4; float32_t w1, w2, w3, w4; pOut = out; w1 = *pW++; w2 = *pW++; w3 = *pW++; w4 = *pW++; accum += w1 + w2 + w3 + w4; blkCntSample = vecDim >> 2; while (blkCntSample > 0) { outV = vld1q((const float32_t *) pOut); inV1 = vld1q(pIn1); inV2 = vld1q(pIn2); inV3 = vld1q(pIn3); inV4 = vld1q(pIn4); outV = vfmaq(outV, inV1, w1); outV = vfmaq(outV, inV2, w2); outV = vfmaq(outV, inV3, w3); outV = vfmaq(outV, inV4, w4); vst1q(pOut, outV); pOut += 4; pIn1 += 4; pIn2 += 4; pIn3 += 4; pIn4 += 4; blkCntSample--; } blkCntSample = vecDim & 3; while (blkCntSample > 0) { *pOut = *pOut + *pIn1++ * w1; *pOut = *pOut + *pIn2++ * w2; *pOut = *pOut + *pIn3++ * w3; *pOut = *pOut + *pIn4++ * w4; pOut++; blkCntSample--; } pIn1 += 3 * vecDim; pIn2 += 3 * vecDim; pIn3 += 3 * vecDim; pIn4 += 3 * vecDim; blkCntVector--; } pIn = pIn1; blkCntVector = nbVectors & 3; while (blkCntVector > 0) { f32x4_t inV, outV; pOut = out; w = *pW++; accum += w; blkCntSample = vecDim >> 2; while (blkCntSample > 0) { outV = vld1q_f32(pOut); inV = vld1q_f32(pIn); outV = vfmaq(outV, inV, w); vst1q_f32(pOut, outV); pOut += 4; pIn += 4; blkCntSample--; } blkCntSample = vecDim & 3; while (blkCntSample > 0) { *pOut = *pOut + *pIn++ * w; pOut++; blkCntSample--; } blkCntVector--; } /* Normalize */ pOut = out; accum = 1.0f / accum; blkCntSample = vecDim >> 2; while (blkCntSample > 0) { f32x4_t tmp; tmp = vld1q((const float32_t *) pOut); tmp = vmulq(tmp, accum); vst1q(pOut, tmp); pOut += 4; blkCntSample--; } blkCntSample = vecDim & 3; while (blkCntSample > 0) { *pOut = *pOut * accum; pOut++; blkCntSample--; } } #else #if defined(ARM_MATH_NEON) #include "NEMath.h" void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim) { const float32_t *pIn,*pW, *pIn1, *pIn2, *pIn3, *pIn4; float32_t *pOut; uint32_t blkCntVector,blkCntSample; float32_t accum, w,w1,w2,w3,w4; float32x4_t tmp, inV,outV, inV1, inV2, inV3, inV4; blkCntVector = nbVectors; blkCntSample = vecDim; accum = 0.0f; pW = weights; pIn = in; /* Set counters to 0 */ tmp = vdupq_n_f32(0.0f); pOut = out; blkCntSample = vecDim >> 2; while(blkCntSample > 0) { vst1q_f32(pOut, tmp); pOut += 4; blkCntSample--; } blkCntSample = vecDim & 3; while(blkCntSample > 0) { *pOut = 0.0f; pOut++; blkCntSample--; } /* Sum */ pIn1 = pIn; pIn2 = pIn1 + vecDim; pIn3 = pIn2 + vecDim; pIn4 = pIn3 + vecDim; blkCntVector = nbVectors >> 2; while(blkCntVector > 0) { pOut = out; w1 = *pW++; w2 = *pW++; w3 = *pW++; w4 = *pW++; accum += w1 + w2 + w3 + w4; blkCntSample = vecDim >> 2; while(blkCntSample > 0) { outV = vld1q_f32(pOut); inV1 = vld1q_f32(pIn1); inV2 = vld1q_f32(pIn2); inV3 = vld1q_f32(pIn3); inV4 = vld1q_f32(pIn4); outV = vmlaq_n_f32(outV,inV1,w1); outV = vmlaq_n_f32(outV,inV2,w2); outV = vmlaq_n_f32(outV,inV3,w3); outV = vmlaq_n_f32(outV,inV4,w4); vst1q_f32(pOut, outV); pOut += 4; pIn1 += 4; pIn2 += 4; pIn3 += 4; pIn4 += 4; blkCntSample--; } blkCntSample = vecDim & 3; while(blkCntSample > 0) { *pOut = *pOut + *pIn1++ * w1; *pOut = *pOut + *pIn2++ * w2; *pOut = *pOut + *pIn3++ * w3; *pOut = *pOut + *pIn4++ * w4; pOut++; blkCntSample--; } pIn1 += 3*vecDim; pIn2 += 3*vecDim; pIn3 += 3*vecDim; pIn4 += 3*vecDim; blkCntVector--; } pIn = pIn1; blkCntVector = nbVectors & 3; while(blkCntVector > 0) { pOut = out; w = *pW++; accum += w; blkCntSample = vecDim >> 2; while(blkCntSample > 0) { outV = vld1q_f32(pOut); inV = vld1q_f32(pIn); outV = vmlaq_n_f32(outV,inV,w); vst1q_f32(pOut, outV); pOut += 4; pIn += 4; blkCntSample--; } blkCntSample = vecDim & 3; while(blkCntSample > 0) { *pOut = *pOut + *pIn++ * w; pOut++; blkCntSample--; } blkCntVector--; } /* Normalize */ pOut = out; accum = 1.0f / accum; blkCntSample = vecDim >> 2; while(blkCntSample > 0) { tmp = vld1q_f32(pOut); tmp = vmulq_n_f32(tmp,accum); vst1q_f32(pOut, tmp); pOut += 4; blkCntSample--; } blkCntSample = vecDim & 3; while(blkCntSample > 0) { *pOut = *pOut * accum; pOut++; blkCntSample--; } } #else void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim) { const float32_t *pIn,*pW; float32_t *pOut; uint32_t blkCntVector,blkCntSample; float32_t accum, w; blkCntVector = nbVectors; blkCntSample = vecDim; accum = 0.0f; pW = weights; pIn = in; /* Set counters to 0 */ blkCntSample = vecDim; pOut = out; while(blkCntSample > 0) { *pOut = 0.0f; pOut++; blkCntSample--; } /* Sum */ while(blkCntVector > 0) { pOut = out; w = *pW++; accum += w; blkCntSample = vecDim; while(blkCntSample > 0) { *pOut = *pOut + *pIn++ * w; pOut++; blkCntSample--; } blkCntVector--; } /* Normalize */ blkCntSample = vecDim; pOut = out; while(blkCntSample > 0) { *pOut = *pOut / accum; pOut++; blkCntSample--; } } #endif #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** * @} end of barycenter group */