328 lines
8.1 KiB
Rust
328 lines
8.1 KiB
Rust
use criterion::{criterion_group, criterion_main, Bencher, BenchmarkId, Criterion};
|
|
use half::prelude::*;
|
|
use std::{f32, f64, iter};
|
|
|
|
const SIMD_LARGE_BENCH_SLICE_LEN: usize = 1024;
|
|
|
|
fn bench_f32_to_f16(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("Convert f16 From f32");
|
|
for val in &[
|
|
0.,
|
|
-0.,
|
|
1.,
|
|
f32::MIN,
|
|
f32::MAX,
|
|
f32::MIN_POSITIVE,
|
|
f32::NEG_INFINITY,
|
|
f32::INFINITY,
|
|
f32::NAN,
|
|
f32::consts::E,
|
|
f32::consts::PI,
|
|
] {
|
|
group.bench_with_input(BenchmarkId::new("f16::from_f32", val), val, |b, i| {
|
|
b.iter(|| f16::from_f32(*i))
|
|
});
|
|
}
|
|
}
|
|
|
|
fn bench_f64_to_f16(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("Convert f16 From f64");
|
|
for val in &[
|
|
0.,
|
|
-0.,
|
|
1.,
|
|
f64::MIN,
|
|
f64::MAX,
|
|
f64::MIN_POSITIVE,
|
|
f64::NEG_INFINITY,
|
|
f64::INFINITY,
|
|
f64::NAN,
|
|
f64::consts::E,
|
|
f64::consts::PI,
|
|
] {
|
|
group.bench_with_input(BenchmarkId::new("f16::from_f64", val), val, |b, i| {
|
|
b.iter(|| f16::from_f64(*i))
|
|
});
|
|
}
|
|
}
|
|
|
|
fn bench_f16_to_f32(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("Convert f16 to f32");
|
|
for val in &[
|
|
f16::ZERO,
|
|
f16::NEG_ZERO,
|
|
f16::ONE,
|
|
f16::MIN,
|
|
f16::MAX,
|
|
f16::MIN_POSITIVE,
|
|
f16::NEG_INFINITY,
|
|
f16::INFINITY,
|
|
f16::NAN,
|
|
f16::E,
|
|
f16::PI,
|
|
] {
|
|
group.bench_with_input(BenchmarkId::new("f16::to_f32", val), val, |b, i| {
|
|
b.iter(|| i.to_f32())
|
|
});
|
|
}
|
|
}
|
|
|
|
fn bench_f16_to_f64(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("Convert f16 to f64");
|
|
for val in &[
|
|
f16::ZERO,
|
|
f16::NEG_ZERO,
|
|
f16::ONE,
|
|
f16::MIN,
|
|
f16::MAX,
|
|
f16::MIN_POSITIVE,
|
|
f16::NEG_INFINITY,
|
|
f16::INFINITY,
|
|
f16::NAN,
|
|
f16::E,
|
|
f16::PI,
|
|
] {
|
|
group.bench_with_input(BenchmarkId::new("f16::to_f64", val), val, |b, i| {
|
|
b.iter(|| i.to_f64())
|
|
});
|
|
}
|
|
}
|
|
|
|
criterion_group!(
|
|
f16_sisd,
|
|
bench_f32_to_f16,
|
|
bench_f64_to_f16,
|
|
bench_f16_to_f32,
|
|
bench_f16_to_f64
|
|
);
|
|
|
|
fn bench_slice_f32_to_f16(c: &mut Criterion) {
|
|
let mut constant_buffer = [f16::ZERO; 11];
|
|
let constants = [
|
|
0.,
|
|
-0.,
|
|
1.,
|
|
f32::MIN,
|
|
f32::MAX,
|
|
f32::MIN_POSITIVE,
|
|
f32::NEG_INFINITY,
|
|
f32::INFINITY,
|
|
f32::NAN,
|
|
f32::consts::E,
|
|
f32::consts::PI,
|
|
];
|
|
c.bench_function(
|
|
"HalfFloatSliceExt::convert_from_f32_slice/constants",
|
|
|b: &mut Bencher<'_>| b.iter(|| constant_buffer.convert_from_f32_slice(&constants)),
|
|
);
|
|
|
|
let large: Vec<_> = iter::repeat(0)
|
|
.enumerate()
|
|
.map(|(i, _)| i as f32)
|
|
.take(SIMD_LARGE_BENCH_SLICE_LEN)
|
|
.collect();
|
|
let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN];
|
|
c.bench_function(
|
|
"HalfFloatSliceExt::convert_from_f32_slice/large",
|
|
|b: &mut Bencher<'_>| b.iter(|| large_buffer.convert_from_f32_slice(&large)),
|
|
);
|
|
}
|
|
|
|
fn bench_slice_f64_to_f16(c: &mut Criterion) {
|
|
let mut constant_buffer = [f16::ZERO; 11];
|
|
let constants = [
|
|
0.,
|
|
-0.,
|
|
1.,
|
|
f64::MIN,
|
|
f64::MAX,
|
|
f64::MIN_POSITIVE,
|
|
f64::NEG_INFINITY,
|
|
f64::INFINITY,
|
|
f64::NAN,
|
|
f64::consts::E,
|
|
f64::consts::PI,
|
|
];
|
|
c.bench_function(
|
|
"HalfFloatSliceExt::convert_from_f64_slice/constants",
|
|
|b: &mut Bencher<'_>| b.iter(|| constant_buffer.convert_from_f64_slice(&constants)),
|
|
);
|
|
|
|
let large: Vec<_> = iter::repeat(0)
|
|
.enumerate()
|
|
.map(|(i, _)| i as f64)
|
|
.take(SIMD_LARGE_BENCH_SLICE_LEN)
|
|
.collect();
|
|
let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN];
|
|
c.bench_function(
|
|
"HalfFloatSliceExt::convert_from_f64_slice/large",
|
|
|b: &mut Bencher<'_>| b.iter(|| large_buffer.convert_from_f64_slice(&large)),
|
|
);
|
|
}
|
|
|
|
fn bench_slice_f16_to_f32(c: &mut Criterion) {
|
|
let mut constant_buffer = [0f32; 11];
|
|
let constants = [
|
|
f16::ZERO,
|
|
f16::NEG_ZERO,
|
|
f16::ONE,
|
|
f16::MIN,
|
|
f16::MAX,
|
|
f16::MIN_POSITIVE,
|
|
f16::NEG_INFINITY,
|
|
f16::INFINITY,
|
|
f16::NAN,
|
|
f16::E,
|
|
f16::PI,
|
|
];
|
|
c.bench_function(
|
|
"HalfFloatSliceExt::convert_to_f32_slice/constants",
|
|
|b: &mut Bencher<'_>| b.iter(|| constants.convert_to_f32_slice(&mut constant_buffer)),
|
|
);
|
|
|
|
let large: Vec<_> = iter::repeat(0)
|
|
.enumerate()
|
|
.map(|(i, _)| f16::from_f32(i as f32))
|
|
.take(SIMD_LARGE_BENCH_SLICE_LEN)
|
|
.collect();
|
|
let mut large_buffer = [0f32; SIMD_LARGE_BENCH_SLICE_LEN];
|
|
c.bench_function(
|
|
"HalfFloatSliceExt::convert_to_f32_slice/large",
|
|
|b: &mut Bencher<'_>| b.iter(|| large.convert_to_f32_slice(&mut large_buffer)),
|
|
);
|
|
}
|
|
|
|
fn bench_slice_f16_to_f64(c: &mut Criterion) {
|
|
let mut constant_buffer = [0f64; 11];
|
|
let constants = [
|
|
f16::ZERO,
|
|
f16::NEG_ZERO,
|
|
f16::ONE,
|
|
f16::MIN,
|
|
f16::MAX,
|
|
f16::MIN_POSITIVE,
|
|
f16::NEG_INFINITY,
|
|
f16::INFINITY,
|
|
f16::NAN,
|
|
f16::E,
|
|
f16::PI,
|
|
];
|
|
c.bench_function(
|
|
"HalfFloatSliceExt::convert_to_f64_slice/constants",
|
|
|b: &mut Bencher<'_>| b.iter(|| constants.convert_to_f64_slice(&mut constant_buffer)),
|
|
);
|
|
|
|
let large: Vec<_> = iter::repeat(0)
|
|
.enumerate()
|
|
.map(|(i, _)| f16::from_f64(i as f64))
|
|
.take(SIMD_LARGE_BENCH_SLICE_LEN)
|
|
.collect();
|
|
let mut large_buffer = [0f64; SIMD_LARGE_BENCH_SLICE_LEN];
|
|
c.bench_function(
|
|
"HalfFloatSliceExt::convert_to_f64_slice/large",
|
|
|b: &mut Bencher<'_>| b.iter(|| large.convert_to_f64_slice(&mut large_buffer)),
|
|
);
|
|
}
|
|
|
|
criterion_group!(
|
|
f16_simd,
|
|
bench_slice_f32_to_f16,
|
|
bench_slice_f64_to_f16,
|
|
bench_slice_f16_to_f32,
|
|
bench_slice_f16_to_f64
|
|
);
|
|
|
|
fn bench_f32_to_bf16(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("Convert bf16 From f32");
|
|
for val in &[
|
|
0.,
|
|
-0.,
|
|
1.,
|
|
f32::MIN,
|
|
f32::MAX,
|
|
f32::MIN_POSITIVE,
|
|
f32::NEG_INFINITY,
|
|
f32::INFINITY,
|
|
f32::NAN,
|
|
f32::consts::E,
|
|
f32::consts::PI,
|
|
] {
|
|
group.bench_with_input(BenchmarkId::new("bf16::from_f32", val), val, |b, i| {
|
|
b.iter(|| bf16::from_f32(*i))
|
|
});
|
|
}
|
|
}
|
|
|
|
fn bench_f64_to_bf16(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("Convert bf16 From f64");
|
|
for val in &[
|
|
0.,
|
|
-0.,
|
|
1.,
|
|
f64::MIN,
|
|
f64::MAX,
|
|
f64::MIN_POSITIVE,
|
|
f64::NEG_INFINITY,
|
|
f64::INFINITY,
|
|
f64::NAN,
|
|
f64::consts::E,
|
|
f64::consts::PI,
|
|
] {
|
|
group.bench_with_input(BenchmarkId::new("bf16::from_f64", val), val, |b, i| {
|
|
b.iter(|| bf16::from_f64(*i))
|
|
});
|
|
}
|
|
}
|
|
|
|
fn bench_bf16_to_f32(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("Convert bf16 to f32");
|
|
for val in &[
|
|
bf16::ZERO,
|
|
bf16::NEG_ZERO,
|
|
bf16::ONE,
|
|
bf16::MIN,
|
|
bf16::MAX,
|
|
bf16::MIN_POSITIVE,
|
|
bf16::NEG_INFINITY,
|
|
bf16::INFINITY,
|
|
bf16::NAN,
|
|
bf16::E,
|
|
bf16::PI,
|
|
] {
|
|
group.bench_with_input(BenchmarkId::new("bf16::to_f32", val), val, |b, i| {
|
|
b.iter(|| i.to_f32())
|
|
});
|
|
}
|
|
}
|
|
|
|
fn bench_bf16_to_f64(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("Convert bf16 to f64");
|
|
for val in &[
|
|
bf16::ZERO,
|
|
bf16::NEG_ZERO,
|
|
bf16::ONE,
|
|
bf16::MIN,
|
|
bf16::MAX,
|
|
bf16::MIN_POSITIVE,
|
|
bf16::NEG_INFINITY,
|
|
bf16::INFINITY,
|
|
bf16::NAN,
|
|
bf16::E,
|
|
bf16::PI,
|
|
] {
|
|
group.bench_with_input(BenchmarkId::new("bf16::to_f64", val), val, |b, i| {
|
|
b.iter(|| i.to_f64())
|
|
});
|
|
}
|
|
}
|
|
|
|
criterion_group!(
|
|
bf16_sisd,
|
|
bench_f32_to_bf16,
|
|
bench_f64_to_bf16,
|
|
bench_bf16_to_f32,
|
|
bench_bf16_to_f64
|
|
);
|
|
|
|
criterion_main!(f16_sisd, bf16_sisd, f16_simd);
|