use crate::{
core_arch::{simd::*, simd_llvm::*, x86::*},
mem::transmute,
};
#[cfg(test)]
use stdarch_test::assert_instr;
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vpbroadcast))] pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
_mm512_set1_epi32(k as i32)
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))] pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
_mm256_set1_epi32(k as i32)
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))] pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
_mm_set1_epi32(k as i32)
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vpbroadcast))] pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
_mm512_set1_epi64(k as i64)
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))] pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
_mm256_set1_epi64x(k as i64)
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))] pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
_mm_set1_epi64x(k as i64)
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
transmute(vpconflictd(a.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
let conflict = _mm512_conflict_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, conflict, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
let conflict = _mm512_conflict_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, conflict, zero))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
transmute(vpconflictd256(a.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let conflict = _mm256_conflict_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, conflict, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
let conflict = _mm256_conflict_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, conflict, zero))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i {
transmute(vpconflictd128(a.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let conflict = _mm_conflict_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, conflict, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
let conflict = _mm_conflict_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, conflict, zero))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
transmute(vpconflictq(a.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
let conflict = _mm512_conflict_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, conflict, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
let conflict = _mm512_conflict_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, conflict, zero))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
transmute(vpconflictq256(a.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let conflict = _mm256_conflict_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, conflict, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
let conflict = _mm256_conflict_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, conflict, zero))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i {
transmute(vpconflictq128(a.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let conflict = _mm_conflict_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, conflict, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
let conflict = _mm_conflict_epi64(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, conflict, zero))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
transmute(vplzcntd(a.as_i32x16(), false))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, zerocount, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, zerocount, zero))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
transmute(vplzcntd256(a.as_i32x8(), false))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, zerocount, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, zerocount, zero))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
transmute(vplzcntd128(a.as_i32x4(), false))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, zerocount, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, zerocount, zero))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
transmute(vplzcntq(a.as_i64x8(), false))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, zerocount, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512cd")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, zerocount, zero))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
transmute(vplzcntq256(a.as_i64x4(), false))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, zerocount, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, zerocount, zero))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
transmute(vplzcntq128(a.as_i64x2(), false))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, zerocount, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512cd,avx512vl")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, zerocount, zero))
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512.conflict.d.512"]
fn vpconflictd(a: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.conflict.d.256"]
fn vpconflictd256(a: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx512.conflict.d.128"]
fn vpconflictd128(a: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx512.conflict.q.512"]
fn vpconflictq(a: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.conflict.q.256"]
fn vpconflictq256(a: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx512.conflict.q.128"]
fn vpconflictq128(a: i64x2) -> i64x2;
#[link_name = "llvm.ctlz.v16i32"]
fn vplzcntd(a: i32x16, nonzero: bool) -> i32x16;
#[link_name = "llvm.ctlz.v8i32"]
fn vplzcntd256(a: i32x8, nonzero: bool) -> i32x8;
#[link_name = "llvm.ctlz.v4i32"]
fn vplzcntd128(a: i32x4, nonzero: bool) -> i32x4;
#[link_name = "llvm.ctlz.v8i64"]
fn vplzcntq(a: i64x8, nonzero: bool) -> i64x8;
#[link_name = "llvm.ctlz.v4i64"]
fn vplzcntq256(a: i64x4, nonzero: bool) -> i64x4;
#[link_name = "llvm.ctlz.v2i64"]
fn vplzcntq128(a: i64x2, nonzero: bool) -> i64x2;
}
#[cfg(test)]
mod tests {
use crate::core_arch::x86::*;
use stdarch_test::simd_test;
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_broadcastmw_epi32() {
let a: __mmask16 = 2;
let r = _mm512_broadcastmw_epi32(a);
let e = _mm512_set1_epi32(2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_broadcastmw_epi32() {
let a: __mmask16 = 2;
let r = _mm256_broadcastmw_epi32(a);
let e = _mm256_set1_epi32(2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_broadcastmw_epi32() {
let a: __mmask16 = 2;
let r = _mm_broadcastmw_epi32(a);
let e = _mm_set1_epi32(2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_broadcastmb_epi64() {
let a: __mmask8 = 2;
let r = _mm512_broadcastmb_epi64(a);
let e = _mm512_set1_epi64(2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_broadcastmb_epi64() {
let a: __mmask8 = 2;
let r = _mm256_broadcastmb_epi64(a);
let e = _mm256_set1_epi64x(2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_broadcastmb_epi64() {
let a: __mmask8 = 2;
let r = _mm_broadcastmb_epi64(a);
let e = _mm_set1_epi64x(2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_conflict_epi32() {
let a = _mm512_set1_epi32(1);
let r = _mm512_conflict_epi32(a);
let e = _mm512_set_epi32(
1 << 14
| 1 << 13
| 1 << 12
| 1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 13
| 1 << 12
| 1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 12
| 1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 2 | 1 << 1 | 1 << 0,
1 << 1 | 1 << 0,
1 << 0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_mask_conflict_epi32() {
let a = _mm512_set1_epi32(1);
let r = _mm512_mask_conflict_epi32(a, 0, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_conflict_epi32(a, 0b11111111_11111111, a);
let e = _mm512_set_epi32(
1 << 14
| 1 << 13
| 1 << 12
| 1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 13
| 1 << 12
| 1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 12
| 1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 2 | 1 << 1 | 1 << 0,
1 << 1 | 1 << 0,
1 << 0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_maskz_conflict_epi32() {
let a = _mm512_set1_epi32(1);
let r = _mm512_maskz_conflict_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_conflict_epi32(0b11111111_11111111, a);
let e = _mm512_set_epi32(
1 << 14
| 1 << 13
| 1 << 12
| 1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 13
| 1 << 12
| 1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 12
| 1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 11
| 1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 10
| 1 << 9
| 1 << 8
| 1 << 7
| 1 << 6
| 1 << 5
| 1 << 4
| 1 << 3
| 1 << 2
| 1 << 1
| 1 << 0,
1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 2 | 1 << 1 | 1 << 0,
1 << 1 | 1 << 0,
1 << 0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_conflict_epi32() {
let a = _mm256_set1_epi32(1);
let r = _mm256_conflict_epi32(a);
let e = _mm256_set_epi32(
1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 2 | 1 << 1 | 1 << 0,
1 << 1 | 1 << 0,
1 << 0,
0,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_mask_conflict_epi32() {
let a = _mm256_set1_epi32(1);
let r = _mm256_mask_conflict_epi32(a, 0, a);
assert_eq_m256i(r, a);
let r = _mm256_mask_conflict_epi32(a, 0b11111111, a);
let e = _mm256_set_epi32(
1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 2 | 1 << 1 | 1 << 0,
1 << 1 | 1 << 0,
1 << 0,
0,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_maskz_conflict_epi32() {
let a = _mm256_set1_epi32(1);
let r = _mm256_maskz_conflict_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_conflict_epi32(0b11111111, a);
let e = _mm256_set_epi32(
1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 2 | 1 << 1 | 1 << 0,
1 << 1 | 1 << 0,
1 << 0,
0,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_conflict_epi32() {
let a = _mm_set1_epi32(1);
let r = _mm_conflict_epi32(a);
let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_mask_conflict_epi32() {
let a = _mm_set1_epi32(1);
let r = _mm_mask_conflict_epi32(a, 0, a);
assert_eq_m128i(r, a);
let r = _mm_mask_conflict_epi32(a, 0b00001111, a);
let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_maskz_conflict_epi32() {
let a = _mm_set1_epi32(1);
let r = _mm_maskz_conflict_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_conflict_epi32(0b00001111, a);
let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_conflict_epi64() {
let a = _mm512_set1_epi64(1);
let r = _mm512_conflict_epi64(a);
let e = _mm512_set_epi64(
1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 2 | 1 << 1 | 1 << 0,
1 << 1 | 1 << 0,
1 << 0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_mask_conflict_epi64() {
let a = _mm512_set1_epi64(1);
let r = _mm512_mask_conflict_epi64(a, 0, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_conflict_epi64(a, 0b11111111, a);
let e = _mm512_set_epi64(
1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 2 | 1 << 1 | 1 << 0,
1 << 1 | 1 << 0,
1 << 0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_maskz_conflict_epi64() {
let a = _mm512_set1_epi64(1);
let r = _mm512_maskz_conflict_epi64(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_conflict_epi64(0b11111111, a);
let e = _mm512_set_epi64(
1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
1 << 2 | 1 << 1 | 1 << 0,
1 << 1 | 1 << 0,
1 << 0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_conflict_epi64() {
let a = _mm256_set1_epi64x(1);
let r = _mm256_conflict_epi64(a);
let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_mask_conflict_epi64() {
let a = _mm256_set1_epi64x(1);
let r = _mm256_mask_conflict_epi64(a, 0, a);
assert_eq_m256i(r, a);
let r = _mm256_mask_conflict_epi64(a, 0b00001111, a);
let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_maskz_conflict_epi64() {
let a = _mm256_set1_epi64x(1);
let r = _mm256_maskz_conflict_epi64(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_conflict_epi64(0b00001111, a);
let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_conflict_epi64() {
let a = _mm_set1_epi64x(1);
let r = _mm_conflict_epi64(a);
let e = _mm_set_epi64x(1 << 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_mask_conflict_epi64() {
let a = _mm_set1_epi64x(1);
let r = _mm_mask_conflict_epi64(a, 0, a);
assert_eq_m128i(r, a);
let r = _mm_mask_conflict_epi64(a, 0b00000011, a);
let e = _mm_set_epi64x(1 << 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_maskz_conflict_epi64() {
let a = _mm_set1_epi64x(1);
let r = _mm_maskz_conflict_epi64(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_conflict_epi64(0b00000011, a);
let e = _mm_set_epi64x(1 << 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_lzcnt_epi32() {
let a = _mm512_set1_epi32(1);
let r = _mm512_lzcnt_epi32(a);
let e = _mm512_set1_epi32(31);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_mask_lzcnt_epi32() {
let a = _mm512_set1_epi32(1);
let r = _mm512_mask_lzcnt_epi32(a, 0, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_lzcnt_epi32(a, 0b11111111_11111111, a);
let e = _mm512_set1_epi32(31);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_maskz_lzcnt_epi32() {
let a = _mm512_set1_epi32(2);
let r = _mm512_maskz_lzcnt_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_lzcnt_epi32(0b11111111_11111111, a);
let e = _mm512_set1_epi32(30);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_lzcnt_epi32() {
let a = _mm256_set1_epi32(1);
let r = _mm256_lzcnt_epi32(a);
let e = _mm256_set1_epi32(31);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_mask_lzcnt_epi32() {
let a = _mm256_set1_epi32(1);
let r = _mm256_mask_lzcnt_epi32(a, 0, a);
assert_eq_m256i(r, a);
let r = _mm256_mask_lzcnt_epi32(a, 0b11111111, a);
let e = _mm256_set1_epi32(31);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_maskz_lzcnt_epi32() {
let a = _mm256_set1_epi32(1);
let r = _mm256_maskz_lzcnt_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_lzcnt_epi32(0b11111111, a);
let e = _mm256_set1_epi32(31);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_lzcnt_epi32() {
let a = _mm_set1_epi32(1);
let r = _mm_lzcnt_epi32(a);
let e = _mm_set1_epi32(31);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_mask_lzcnt_epi32() {
let a = _mm_set1_epi32(1);
let r = _mm_mask_lzcnt_epi32(a, 0, a);
assert_eq_m128i(r, a);
let r = _mm_mask_lzcnt_epi32(a, 0b00001111, a);
let e = _mm_set1_epi32(31);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_maskz_lzcnt_epi32() {
let a = _mm_set1_epi32(1);
let r = _mm_maskz_lzcnt_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_lzcnt_epi32(0b00001111, a);
let e = _mm_set1_epi32(31);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_lzcnt_epi64() {
let a = _mm512_set1_epi64(1);
let r = _mm512_lzcnt_epi64(a);
let e = _mm512_set1_epi64(63);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_mask_lzcnt_epi64() {
let a = _mm512_set1_epi64(1);
let r = _mm512_mask_lzcnt_epi64(a, 0, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_lzcnt_epi64(a, 0b11111111, a);
let e = _mm512_set1_epi64(63);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd")]
unsafe fn test_mm512_maskz_lzcnt_epi64() {
let a = _mm512_set1_epi64(2);
let r = _mm512_maskz_lzcnt_epi64(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_lzcnt_epi64(0b11111111, a);
let e = _mm512_set1_epi64(62);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_lzcnt_epi64() {
let a = _mm256_set1_epi64x(1);
let r = _mm256_lzcnt_epi64(a);
let e = _mm256_set1_epi64x(63);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_mask_lzcnt_epi64() {
let a = _mm256_set1_epi64x(1);
let r = _mm256_mask_lzcnt_epi64(a, 0, a);
assert_eq_m256i(r, a);
let r = _mm256_mask_lzcnt_epi64(a, 0b00001111, a);
let e = _mm256_set1_epi64x(63);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm256_maskz_lzcnt_epi64() {
let a = _mm256_set1_epi64x(1);
let r = _mm256_maskz_lzcnt_epi64(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_lzcnt_epi64(0b00001111, a);
let e = _mm256_set1_epi64x(63);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_lzcnt_epi64() {
let a = _mm_set1_epi64x(1);
let r = _mm_lzcnt_epi64(a);
let e = _mm_set1_epi64x(63);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_mask_lzcnt_epi64() {
let a = _mm_set1_epi64x(1);
let r = _mm_mask_lzcnt_epi64(a, 0, a);
assert_eq_m128i(r, a);
let r = _mm_mask_lzcnt_epi64(a, 0b00001111, a);
let e = _mm_set1_epi64x(63);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512cd,avx512vl")]
unsafe fn test_mm_maskz_lzcnt_epi64() {
let a = _mm_set1_epi64x(1);
let r = _mm_maskz_lzcnt_epi64(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_lzcnt_epi64(0b00001111, a);
let e = _mm_set1_epi64x(63);
assert_eq_m128i(r, e);
}
}