stm32f407-openocd/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance_templa...

552 lines
9.7 KiB
C
Raw Normal View History

2024-06-12 08:32:58 +00:00
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_boolean_distance.c
* Description: Templates for boolean distances
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @defgroup DISTANCEF Distance Functions
*
* Computes Distances between vectors.
*
* Distance functions are useful in a lot of algorithms.
*
*/
/**
* @addtogroup DISTANCEF
* @{
*/
#define _FUNC(A,B) A##B
#define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
/**
* @brief Elements of boolean distances
*
* Different values which are used to compute boolean distances
*
* @param[in] pA First vector of packed booleans
* @param[in] pB Second vector of packed booleans
* @param[in] numberOfBools Number of booleans
* @return None
*
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_common_tables.h"
void FUNC(EXT)(const uint32_t *pA
, const uint32_t *pB
, uint32_t numberOfBools
#ifdef TT
, uint32_t *cTT
#endif
#ifdef FF
, uint32_t *cFF
#endif
#ifdef TF
, uint32_t *cTF
#endif
#ifdef FT
, uint32_t *cFT
#endif
)
{
#ifdef TT
uint32_t _ctt=0;
#endif
#ifdef FF
uint32_t _cff=0;
#endif
#ifdef TF
uint32_t _ctf=0;
#endif
#ifdef FT
uint32_t _cft=0;
#endif
uint32_t a, b, ba, bb;
int shift;
const uint8_t *pA8 = (const uint8_t *) pA;
const uint8_t *pB8 = (const uint8_t *) pB;
/* handle vector blocks */
uint32_t blkCnt = numberOfBools / 128;
while (blkCnt > 0U) {
uint8x16_t vecA = vld1q((const uint8_t *) pA8);
uint8x16_t vecB = vld1q((const uint8_t *) pB8);
#ifdef TT
uint8x16_t vecTT = vecA & vecB;
vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
_ctt += vaddvq(vecTT);
#endif
#ifdef FF
uint8x16_t vecFF = vmvnq(vecA) & vmvnq(vecB);
vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
_cff += vaddvq(vecFF);
#endif
#ifdef TF
uint8x16_t vecTF = vecA & vmvnq(vecB);
vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
_ctf += vaddvq(vecTF);
#endif
#ifdef FT
uint8x16_t vecFT = vmvnq(vecA) & vecB;
vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
_cft += vaddvq(vecFT);
#endif
pA8 += 16;
pB8 += 16;
blkCnt--;
}
pA = (const uint32_t *)pA8;
pB = (const uint32_t *)pB8;
blkCnt = numberOfBools & 0x7F;
while(blkCnt >= 32)
{
a = *pA++;
b = *pB++;
shift = 0;
while(shift < 32)
{
ba = a & 1;
bb = b & 1;
a = a >> 1;
b = b >> 1;
#ifdef TT
_ctt += (ba && bb);
#endif
#ifdef FF
_cff += ((1 ^ ba) && (1 ^ bb));
#endif
#ifdef TF
_ctf += (ba && (1 ^ bb));
#endif
#ifdef FT
_cft += ((1 ^ ba) && bb);
#endif
shift ++;
}
blkCnt -= 32;
}
a = *pA++;
b = *pB++;
a = a >> (32 - blkCnt);
b = b >> (32 - blkCnt);
while(blkCnt > 0)
{
ba = a & 1;
bb = b & 1;
a = a >> 1;
b = b >> 1;
#ifdef TT
_ctt += (ba && bb);
#endif
#ifdef FF
_cff += ((1 ^ ba) && (1 ^ bb));
#endif
#ifdef TF
_ctf += (ba && (1 ^ bb));
#endif
#ifdef FT
_cft += ((1 ^ ba) && bb);
#endif
blkCnt --;
}
#ifdef TT
*cTT = _ctt;
#endif
#ifdef FF
*cFF = _cff;
#endif
#ifdef TF
*cTF = _ctf;
#endif
#ifdef FT
*cFT = _cft;
#endif
}
#else
#if defined(ARM_MATH_NEON)
void FUNC(EXT)(const uint32_t *pA
, const uint32_t *pB
, uint32_t numberOfBools
#ifdef TT
, uint32_t *cTT
#endif
#ifdef FF
, uint32_t *cFF
#endif
#ifdef TF
, uint32_t *cTF
#endif
#ifdef FT
, uint32_t *cFT
#endif
)
{
#ifdef TT
uint32_t _ctt=0;
#endif
#ifdef FF
uint32_t _cff=0;
#endif
#ifdef TF
uint32_t _ctf=0;
#endif
#ifdef FT
uint32_t _cft=0;
#endif
uint32_t nbBoolBlock;
uint32_t a,b,ba,bb;
int shift;
uint32x4_t aV, bV;
#ifdef TT
uint32x4_t cttV;
#endif
#ifdef FF
uint32x4_t cffV;
#endif
#ifdef TF
uint32x4_t ctfV;
#endif
#ifdef FT
uint32x4_t cftV;
#endif
uint8x16_t tmp;
uint16x8_t tmp2;
uint32x4_t tmp3;
uint64x2_t tmp4;
#ifdef TT
uint64x2_t tmp4tt;
#endif
#ifdef FF
uint64x2_t tmp4ff;
#endif
#ifdef TF
uint64x2_t tmp4tf;
#endif
#ifdef FT
uint64x2_t tmp4ft;
#endif
#ifdef TT
tmp4tt = vdupq_n_u64(0);
#endif
#ifdef FF
tmp4ff = vdupq_n_u64(0);
#endif
#ifdef TF
tmp4tf = vdupq_n_u64(0);
#endif
#ifdef FT
tmp4ft = vdupq_n_u64(0);
#endif
nbBoolBlock = numberOfBools >> 7;
while(nbBoolBlock > 0)
{
aV = vld1q_u32(pA);
bV = vld1q_u32(pB);
pA += 4;
pB += 4;
#ifdef TT
cttV = vandq_u32(aV,bV);
#endif
#ifdef FF
cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
#endif
#ifdef TF
ctfV = vandq_u32(aV,vmvnq_u32(bV));
#endif
#ifdef FT
cftV = vandq_u32(vmvnq_u32(aV),bV);
#endif
#ifdef TT
tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
tmp2 = vpaddlq_u8(tmp);
tmp3 = vpaddlq_u16(tmp2);
tmp4 = vpaddlq_u32(tmp3);
tmp4tt = vaddq_u64(tmp4tt, tmp4);
#endif
#ifdef FF
tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
tmp2 = vpaddlq_u8(tmp);
tmp3 = vpaddlq_u16(tmp2);
tmp4 = vpaddlq_u32(tmp3);
tmp4ff = vaddq_u64(tmp4ff, tmp4);
#endif
#ifdef TF
tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
tmp2 = vpaddlq_u8(tmp);
tmp3 = vpaddlq_u16(tmp2);
tmp4 = vpaddlq_u32(tmp3);
tmp4tf = vaddq_u64(tmp4tf, tmp4);
#endif
#ifdef FT
tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
tmp2 = vpaddlq_u8(tmp);
tmp3 = vpaddlq_u16(tmp2);
tmp4 = vpaddlq_u32(tmp3);
tmp4ft = vaddq_u64(tmp4ft, tmp4);
#endif
nbBoolBlock --;
}
#ifdef TT
_ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
#endif
#ifdef FF
_cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
#endif
#ifdef TF
_ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
#endif
#ifdef FT
_cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
#endif
nbBoolBlock = numberOfBools & 0x7F;
while(nbBoolBlock >= 32)
{
a = *pA++;
b = *pB++;
shift = 0;
while(shift < 32)
{
ba = a & 1;
bb = b & 1;
a = a >> 1;
b = b >> 1;
#ifdef TT
_ctt += (ba && bb);
#endif
#ifdef FF
_cff += ((1 ^ ba) && (1 ^ bb));
#endif
#ifdef TF
_ctf += (ba && (1 ^ bb));
#endif
#ifdef FT
_cft += ((1 ^ ba) && bb);
#endif
shift ++;
}
nbBoolBlock -= 32;
}
a = *pA++;
b = *pB++;
a = a >> (32 - nbBoolBlock);
b = b >> (32 - nbBoolBlock);
while(nbBoolBlock > 0)
{
ba = a & 1;
bb = b & 1;
a = a >> 1;
b = b >> 1;
#ifdef TT
_ctt += (ba && bb);
#endif
#ifdef FF
_cff += ((1 ^ ba) && (1 ^ bb));
#endif
#ifdef TF
_ctf += (ba && (1 ^ bb));
#endif
#ifdef FT
_cft += ((1 ^ ba) && bb);
#endif
nbBoolBlock --;
}
#ifdef TT
*cTT = _ctt;
#endif
#ifdef FF
*cFF = _cff;
#endif
#ifdef TF
*cTF = _ctf;
#endif
#ifdef FT
*cFT = _cft;
#endif
}
#else
void FUNC(EXT)(const uint32_t *pA
, const uint32_t *pB
, uint32_t numberOfBools
#ifdef TT
, uint32_t *cTT
#endif
#ifdef FF
, uint32_t *cFF
#endif
#ifdef TF
, uint32_t *cTF
#endif
#ifdef FT
, uint32_t *cFT
#endif
)
{
#ifdef TT
uint32_t _ctt=0;
#endif
#ifdef FF
uint32_t _cff=0;
#endif
#ifdef TF
uint32_t _ctf=0;
#endif
#ifdef FT
uint32_t _cft=0;
#endif
uint32_t a,b,ba,bb;
int shift;
while(numberOfBools >= 32)
{
a = *pA++;
b = *pB++;
shift = 0;
while(shift < 32)
{
ba = a & 1;
bb = b & 1;
a = a >> 1;
b = b >> 1;
#ifdef TT
_ctt += (ba && bb);
#endif
#ifdef FF
_cff += ((1 ^ ba) && (1 ^ bb));
#endif
#ifdef TF
_ctf += (ba && (1 ^ bb));
#endif
#ifdef FT
_cft += ((1 ^ ba) && bb);
#endif
shift ++;
}
numberOfBools -= 32;
}
a = *pA++;
b = *pB++;
a = a >> (32 - numberOfBools);
b = b >> (32 - numberOfBools);
while(numberOfBools > 0)
{
ba = a & 1;
bb = b & 1;
a = a >> 1;
b = b >> 1;
#ifdef TT
_ctt += (ba && bb);
#endif
#ifdef FF
_cff += ((1 ^ ba) && (1 ^ bb));
#endif
#ifdef TF
_ctf += (ba && (1 ^ bb));
#endif
#ifdef FT
_cft += ((1 ^ ba) && bb);
#endif
numberOfBools --;
}
#ifdef TT
*cTT = _ctt;
#endif
#ifdef FF
*cFF = _cff;
#endif
#ifdef TF
*cTF = _ctf;
#endif
#ifdef FT
*cFT = _cft;
#endif
}
#endif
#endif /* defined(ARM_MATH_MVEI) */
/**
* @} end of DISTANCEF group
*/