/* arm_neon.h
*
* Copyright 2005 ARM Limited. All rights reserved.
*
* RCS $Revision: 185729 $
* Checkin $Date: 2014-06-12 17:04:12 +0100 (Thu, 12 Jun 2014) $
* Revising $Author: ransin01 $
* $Id: arm_neon.h 185729 2014-06-12 16:04:12Z ransin01 $
* $URL: $
*
* Note: this file is auto-generated from neon_intrinsics_spec.xml
*/
#ifndef __NEON_H_
#define __NEON_H_
#ifdef __cplusplus
#define __STDC_LIMIT_MACROS 1
#define __STDC_FORMAT_MACROS 1
#define __STDC_CONSTANT_MACROS 1
#endif
#include <stdint.h>
/* typedef double quad; */
#ifdef __TARGET_ARCH_AARCH64
#define SIMD128_ALIGN __attribute__((aligned(16)))
#else
#define SIMD128_ALIGN
#endif
#define __MKVECTORSTRUCT64(T,N,T2) struct __simd64_##T { double __private; }; \
struct __library_simd64_##T { T2 a[N]; }
#define __MKVECTORSTRUCT128(T,N,T2) struct __simd128_##T { SIMD128_ALIGN double __private1; \
double __private2; }; \
struct __library_simd128_##T { T2 a[N]; }
#ifdef __BUILDING_NEON_LIBRARY
#define __VECTORSTRUCT64(T,N) struct __library_simd64_##T
#define __VECTORSTRUCT128(T,N) struct __library_simd128_##T
#else
#define __VECTORSTRUCT64(T,N) struct __simd64_##T
#define __VECTORSTRUCT128(T,N) struct __simd128_##T
#endif
/* annotate type argument which must be compile time constant */
#define __constrange(min,max)
#define __transfersize(size)
#ifdef __CC_ARM
#define __VALUE_IN_REGS __value_in_regs
#define __PURE __pure
#else
#define __VALUE_IN_REGS
#define __PURE
#endif
#ifdef __cplusplus
#define __EXTERNC extern "C"
#else
#define __EXTERNC
#endif
/* generate the vector type definitions */
typedef uint8_t poly8_t;
typedef uint16_t poly16_t;
typedef uint64_t poly64_t;
typedef uint32_t __encoding_t;
typedef float float32_t;
typedef double float64_t;
#define __VECNAME(vs, t, l) t##x##l##_t
#define __VECTOR64int(typ, l, typ2) __MKVECTORSTRUCT64(typ##_t, l, typ2); \
typedef __VECTORSTRUCT64(typ##_t, l) __VECNAME(64,typ,l)
#define __VECTOR128int(typ, l,typ2) __MKVECTORSTRUCT128(typ##_t, l, typ2); \
typedef __VECTORSTRUCT128(typ##_t, l) __VECNAME(128,typ,l)
#define __VECTOR64(T,L) __VECTOR64int(T,L,T ## _t)
#define __VECTOR128(T,L) __VECTOR128int(T,L,T ## _t)
#if (!defined(__TARGET_FEATURE_NEON_FP16) || !defined(__ARM_FP16))
#define __ARM_NEON_FP16_INTRINSICS 0
#else
#define __ARM_NEON_FP16_INTRINSICS 1
#endif
__VECTOR64(int8, 8);
__VECTOR64(int16, 4);
__VECTOR64(int32, 2);
__VECTOR64(int64, 1);
__VECTOR64(uint8, 8);
__VECTOR64(uint16, 4);
__VECTOR64(uint32, 2);
__VECTOR64(uint64, 1);
__VECTOR64(float32, 2);
__VECTOR64(float64, 1);
__VECTOR64(poly8, 8);
__VECTOR64(poly16, 4);
__VECTOR64(poly64, 1);
__VECTOR128(int8, 16);
__VECTOR128(int16, 8);
__VECTOR128(int32, 4);
__VECTOR128(int64, 2);
__VECTOR128(uint8, 16);
__VECTOR128(uint16, 8);
__VECTOR128(uint32, 4);
__VECTOR128(uint64, 2);
__VECTOR128(float32, 4);
__VECTOR128(float64, 2);
__VECTOR128(poly8, 16);
__VECTOR128(poly16, 8);
__VECTOR128(poly64, 2);
#if defined (__ARM_FP16)
__VECTOR64int(float16, 4, __fp16);
__VECTOR128int(float16, 8, __fp16);
#endif
#define __ARRAYNAME(N, V, T, L) T##x##L##x##N##_t
#define __MKARRAYSTRUCT(N, V, T, L) \
struct __simd_array##N##_##T##x##L##_t { struct __simd##V##_##T##_t val[N]; }; \
struct __library_simd_array##N##_##T##x##L##_t { struct __library_simd##V##_##T##_t val[N]; }; \
#ifdef __BUILDING_NEON_LIBRARY
#define __ARRAYSTRUCT(N, V, T, L) struct __library_simd_array##N##_##T##x##L##_t
#else
#define __ARRAYSTRUCT(N, V, T, L) struct __simd_array##N##_##T##x##L##_t
#endif
#define __ARRAY(N, V, T, L) __MKARRAYSTRUCT(N,V,T,L) typedef __ARRAYSTRUCT(N,V,T,L) __ARRAYNAME(N,V,T,L)
#define __ALLARRAYS(V,T,L) \
__ARRAY(2,V,T,L); \
__ARRAY(3,V,T,L); \
__ARRAY(4,V,T,L);
__ALLARRAYS(64, poly8, 8)
__ALLARRAYS(128, poly8, 16)
__ALLARRAYS(64, poly16, 4)
__ALLARRAYS(128, poly16, 8)
__ALLARRAYS(64, poly64, 1)
__ALLARRAYS(128, poly64, 2)
__ALLARRAYS(64, float32, 2)
__ALLARRAYS(128, float32, 4)
__ALLARRAYS(64, float64, 1)
__ALLARRAYS(128, float64, 2)
#if defined(__ARM_FP16)
__ALLARRAYS(64, float16, 4)
__ALLARRAYS(128, float16, 8)
#endif
__ALLARRAYS(64, int8, 8)
__ALLARRAYS(128, int8, 16)
__ALLARRAYS(64, int16, 4)
__ALLARRAYS(128, int16, 8)
__ALLARRAYS(64, int32, 2)
__ALLARRAYS(128, int32, 4)
__ALLARRAYS(64, int64, 1)
__ALLARRAYS(128, int64, 2)
__ALLARRAYS(64, uint8, 8)
__ALLARRAYS(128, uint8, 16)
__ALLARRAYS(64, uint16 , 4)
__ALLARRAYS(128, uint16, 8)
__ALLARRAYS(64, uint32, 2)
__ALLARRAYS(128, uint32, 4)
__ALLARRAYS(64, uint64, 1)
__ALLARRAYS(128, uint64, 2)
#ifdef __TARGET_FEATURE_NEON
#define vadd_s8(a,b) __ndp1_wrr_ddd_s8((a),(b),0xf2000800) /* VADD.I8 d0,d0,d0 */
#define vadd_s16(a,b) __ndp1_wrr_ddd_s16((a),(b),0xf2100800) /* VADD.I16 d0,d0,d0 */
#define vadd_s32(a,b) __ndp1_wrr_ddd_s32((a),(b),0xf2200800) /* VADD.I32 d0,d0,d0 */
#define vadd_s64(a,b) __ndp1_wrr_ddd_s64((a),(b),0xf2300800) /* VADD.I64 d0,d0,d0 */
#define vadd_f32(a,b) __ndp1_wrr_ddd_f32((a),(b),0xf2000d00) /* VADD.F32 d0,d0,d0 */
#define vadd_u8(a,b) __ndp1_wrr_ddd_u8((a),(b),0xf2000800) /* VADD.I8 d0,d0,d0 */
#define vadd_u16(a,b) __ndp1_wrr_ddd_u16((a),(b),0xf2100800) /* VADD.I16 d0,d0,d0 */
#define vadd_u32(a,b) __ndp1_wrr_ddd_u32((a),(b),0xf2200800) /* VADD.I32 d0,d0,d0 */
#define vadd_u64(a,b) __ndp1_wrr_ddd_u64((a),(b),0xf2300800) /* VADD.I64 d0,d0,d0 */
#define vaddq_s8(a,b) __ndp1_wrr_qqq_s8((a),(b),0xf2000840) /* VADD.I8 q0,q0,q0 */
#define vaddq_s16(a,b) __ndp1_wrr_qqq_s16((a),(b),0xf2100840) /* VADD.I16 q0,q0,q0 */
#define vaddq_s32(a,b) __ndp1_wrr_qqq_s32((a),(b),0xf2200840) /* VADD.I32 q0,q0,q0 */
#define vaddq_s64(a,b) __ndp1_wrr_qqq_s64((a),(b),0xf2300840) /* VADD.I64 q0,q0,q0 */
#define vaddq_f32(a,b) __ndp1_wrr_qqq_f32((a),(b),0xf2000d40) /* VADD.F32 q0,q0,q0 */
#define vaddq_u8(a,b) __ndp1_wrr_qqq_u8((a),(b),0xf2000840) /* VADD.I8 q0,q0,q0 */
#define vaddq_u16(a,b) __ndp1_wrr_qqq_u16((a),(b),0xf2100840) /* VADD.I16 q0,q0,q0 */
#define vaddq_u32(a,b) __ndp1_wrr_qqq_u32((a),(b),0xf2200840) /* VADD.I32 q0,q0,q0 */
#define vaddq_u64(a,b) __ndp1_wrr_qqq_u64((a),(b),0xf2300840) /* VADD.I64 q0,q0,q0 */
#define vaddl_s8(a,b) __ndp4_wrr_qdd_s16_s8((a),(b),0xf2800000) /* VADDL.S8 q0,d0,d0 */
#define vaddl_s16(a,b) __ndp4_wrr_qdd_s32_s16((a),(b),0xf2900000) /* VADDL.S16 q0,d0,d0 */
#define vaddl_s32(a,b) __ndp4_wrr_qdd_s64_s32((a),(b),0xf2a00000) /* VADDL.S32 q0,d0,d0 */
#define vaddl_u8(a,b) __ndp4_wrr_qdd_u16_u8((a),(b),0xf3800000) /* VADDL.U8 q0,d0,d0 */
#define vaddl_u16(a,b) __ndp4_wrr_qdd_u32_u16((a),(b),0xf3900000) /* VADDL.U16 q0,d0,d0 */
#define vaddl_u32(a,b) __ndp4_wrr_qdd_u64_u32((a),(b),0xf3a00000) /* VADDL.U32 q0,d0,d0 */
#define vaddw_s8(a,b) __ndp4_wrr_qqd_s16_s16_s8((a),(b),0xf2800100) /* VADDW.S8 q0,q0,d0 */
#define vaddw_s16(a,b) __ndp4_wrr_qqd_s32_s32_s16((a),(b),0xf2900100) /* VADDW.S16 q0,q0,d0 */
#define vaddw_s32(a,b) __ndp4_wrr_qqd_s64_s64_s32((a),(b),0xf2a00100) /* VADDW.S32 q0,q0,d0 */
#define vaddw_u8(a,b) __ndp4_wrr_qqd_u16_u16_u8((a),(b),0xf3800100) /* VADDW.U8 q0,q0,d0 */
#define vaddw_u16(a,b) __ndp4_wrr_qqd_u32_u32_u16((a),(b),0xf3900100) /* VADDW.U16 q0,q0,d0 */
#define vaddw_u32(a,b) __ndp4_wrr_qqd_u64_u64_u32((a),(b),0xf3a00100) /* VADDW.U32 q0,q0,d0 */
#define vhadd_s8(a,b) __ndp1_wrr_ddd_s8((a),(b),0xf2000000) /* VHADD.S8 d0,d0,d0 */
#define vhadd_s16(a,b) __ndp1_wrr_ddd_s16((a),(b),0xf2100000) /* VHADD.S16 d0,d0,d0 */
#define vhadd_s32(a,b) __ndp1_wrr_ddd_s32((a),(b),0xf2200000) /* VHADD.S32 d0,d0,d0 */
#define vhadd_u8(a,b) __ndp1_wrr_ddd_u8((a),(b),0xf3000000) /* VHADD.U8 d0,d0,d0 */
#define vhad