29 lines
862 B
C++
29 lines
862 B
C++
#pragma once
|
|
#include <cstdint>
|
|
|
|
#include <c10/macros/Macros.h>
|
|
|
|
/// Defines the Float4_e2m1fn_x2 type (4-bit floating-point, two elements packed
|
|
/// into one byte). This is the FP4 dtype from the OCP MX format spec
|
|
/// (https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf,
|
|
/// Section 5.3.3)
|
|
///
|
|
/// Given two high precision values val0 and val1, here is the
|
|
/// binary configuration of their packed representation, from MSB to LSB:
|
|
///
|
|
/// original value | val1 : val0
|
|
/// ========================================
|
|
/// bit index (MSB==7, LSB==0) | 7654 : 3210
|
|
/// sign/exponent/mantissa | seem : seem
|
|
///
|
|
|
|
namespace c10 {
|
|
|
|
struct alignas(1) Float4_e2m1fn_x2 {
|
|
uint8_t val_;
|
|
Float4_e2m1fn_x2() = default;
|
|
C10_HOST_DEVICE explicit Float4_e2m1fn_x2(uint8_t val) : val_(val) {}
|
|
};
|
|
|
|
} // namespace c10
|