fp/simd/
generic.rs

1use crate::{
2    blas::block::{MatrixBlock, MatrixBlockSlice},
3    limb::Limb,
4};
5
6pub(super) fn add_simd(target: &mut [Limb], source: &[Limb], min_limb: usize) {
7    for (target_limb, source_limb) in target.iter_mut().zip(source.iter()).skip(min_limb) {
8        *target_limb ^= source_limb
9    }
10}
11
12pub(super) fn gather_block_simd(slice: MatrixBlockSlice) -> MatrixBlock {
13    let mut limbs = [0; 64];
14    for (i, limb) in slice.iter().enumerate() {
15        limbs[i] = *limb;
16    }
17    MatrixBlock::new(limbs)
18}
19
20/// Scalar (non-SIMD) implementation of 64 x 64 block GEMM over F_2.
21///
22/// Computes `C = A * B + C` where all arithmetic is in F_2 (XOR for addition, AND for
23/// multiplication).
24///
25/// # Algorithm
26///
27/// ```text
28/// For each row i of A:
29///   For each bit position k in A[i]:
30///     If A[i][k] == 1:
31///       C[i] ^= B[k]  (XOR row k of B into row i of C)
32/// ```
33///
34/// Note that this is not quite the standard algorithm for matrix multiplication. The standard
35/// algorithm would require us to iterate over a column of A for every output bit. The three loops
36/// in the algorithm are independent, so we can instead iterate over outputs and *then* move down
37/// the columns of A. This way, we only need to consider one limb of A at a time, and we don't need
38/// to do bit extractions (except for iterating over the bits of a limb).
39pub fn gemm_block_simd(a: MatrixBlock, b: MatrixBlock, c: &mut MatrixBlock) {
40    // For each row of A
41    for (result_limb, a_limb) in c.iter_mut().zip(a.iter()) {
42        let a_limb_iter = BitIterator::new(*a_limb);
43        // For each bit in this row of A, XOR the corresponding row of B into C
44        for (b_limb, a_bit) in b.iter().zip(a_limb_iter) {
45            *result_limb ^= *b_limb * (a_bit as Limb);
46        }
47    }
48}
49
50struct BitIterator {
51    limb: Limb,
52    bit_index: usize,
53}
54
55impl BitIterator {
56    fn new(limb: Limb) -> Self {
57        Self { limb, bit_index: 0 }
58    }
59}
60
61impl Iterator for BitIterator {
62    type Item = bool;
63
64    fn next(&mut self) -> Option<Self::Item> {
65        if self.bit_index >= crate::constants::BITS_PER_LIMB {
66            return None;
67        }
68        let result = self.limb & 1 == 1;
69        self.limb >>= 1;
70        self.bit_index += 1;
71        Some(result)
72    }
73}