[linalg] - C++23 → Trunk

Files changed (1) hide show

tmp/tmp2jy82_1w/{from.md → to.md} +3998 -0

tmp/tmp2jy82_1w/{from.md → to.md} RENAMED Viewed

	@@ -0,0 +1,3998 @@

+## Basic linear algebra algorithms <a id="linalg">[[linalg]]</a>
+### Overview <a id="linalg.overview">[[linalg.overview]]</a>
+Subclause [[linalg]] defines basic linear algebra algorithms. The
+algorithms that access the elements of arrays view those elements
+through `mdspan` [[views.multidim]].
+### Header `<linalg>` synopsis <a id="linalg.syn">[[linalg.syn]]</a>
+``` cpp
+namespace std::linalg {
+  // [linalg.tags.order], storage order tags
+  struct column_major_t;
+  inline constexpr column_major_t column_major;
+  struct row_major_t;
+  inline constexpr row_major_t row_major;
+  // [linalg.tags.triangle], triangle tags
+  struct upper_triangle_t;
+  inline constexpr upper_triangle_t upper_triangle;
+  struct lower_triangle_t;
+  inline constexpr lower_triangle_t lower_triangle;
+  // [linalg.tags.diagonal], diagonal tags
+  struct implicit_unit_diagonal_t;
+  inline constexpr implicit_unit_diagonal_t implicit_unit_diagonal;
+  struct explicit_diagonal_t;
+  inline constexpr explicit_diagonal_t explicit_diagonal;
+  // [linalg.layout.packed], class template layout_blas_packed
+  template<class Triangle, class StorageOrder>
+    class layout_blas_packed;
+  // [linalg.helpers], exposition-only helpers
+  // [linalg.helpers.concepts], linear algebra argument concepts
+  template<class T>
+    constexpr bool is-mdspan = see below;               // exposition only
+  template<class T>
+    concept in-vector = see below;                      // exposition only
+  template<class T>
+    concept out-vector = see below;                     // exposition only
+  template<class T>
+    concept inout-vector = see below;                   // exposition only
+  template<class T>
+    concept in-matrix = see below;                      // exposition only
+  template<class T>
+    concept out-matrix = see below;                     // exposition only
+  template<class T>
+    concept inout-matrix = see below;                   // exposition only
+  template<class T>
+    concept possibly-packed-inout-matrix = see below;   // exposition only
+  template<class T>
+    concept in-object = see below;                      // exposition only
+  template<class T>
+    concept out-object = see below;                     // exposition only
+  template<class T>
+    concept inout-object = see below;                   // exposition only
+  // [linalg.scaled], scaled in-place transformation
+  // [linalg.scaled.scaledaccessor], class template scaled_accessor
+  template<class ScalingFactor, class NestedAccessor>
+    class scaled_accessor;
+  // [linalg.scaled.scaled], function template scaled
+  template<class ScalingFactor,
+           class ElementType, class Extents, class Layout, class Accessor>
+    constexpr auto scaled(ScalingFactor alpha, mdspan<ElementType, Extents, Layout, Accessor> x);
+  // [linalg.conj], conjugated in-place transformation
+  // [linalg.conj.conjugatedaccessor], class template conjugated_accessor
+  template<class NestedAccessor>
+    class conjugated_accessor;
+  // [linalg.conj.conjugated], function template conjugated
+  template<class ElementType, class Extents, class Layout, class Accessor>
+    constexpr auto conjugated(mdspan<ElementType, Extents, Layout, Accessor> a);
+  // [linalg.transp], transpose in-place transformation
+  // [linalg.transp.layout.transpose], class template layout_transpose
+  template<class Layout>
+    class layout_transpose;
+  // [linalg.transp.transposed], function template transposed
+  template<class ElementType, class Extents, class Layout, class Accessor>
+    constexpr auto transposed(mdspan<ElementType, Extents, Layout, Accessor> a);
+  // [linalg.conjtransposed], conjugated transpose in-place transformation
+  template<class ElementType, class Extents, class Layout, class Accessor>
+    constexpr auto conjugate_transposed(mdspan<ElementType, Extents, Layout, Accessor> a);
+  // [linalg.algs.blas1], BLAS 1 algorithms
+  // [linalg.algs.blas1.givens], Givens rotations
+  // [linalg.algs.blas1.givens.lartg], compute Givens rotation
+  template<class Real>
+    struct setup_givens_rotation_result {
+      Real c;
+      Real s;
+      Real r;
+    };
+  template<class Real>
+    struct setup_givens_rotation_result<complex<Real>> {
+      Real c;
+      complex<Real> s;
+      complex<Real> r;
+    };
+  template<class Real>
+    setup_givens_rotation_result<Real> setup_givens_rotation(Real a, Real b) noexcept;
+  template<class Real>
+    setup_givens_rotation_result<complex<Real>>
+      setup_givens_rotation(complex<Real> a, complex<Real> b) noexcept;
+  // [linalg.algs.blas1.givens.rot], apply computed Givens rotation
+  template<inout-vector InOutVec1, inout-vector InOutVec2, class Real>
+    void apply_givens_rotation(InOutVec1 x, InOutVec2 y, Real c, Real s);
+  template<class ExecutionPolicy, inout-vector InOutVec1, inout-vector InOutVec2, class Real>
+    void apply_givens_rotation(ExecutionPolicy&& exec,
+                               InOutVec1 x, InOutVec2 y, Real c, Real s);
+  template<inout-vector InOutVec1, inout-vector InOutVec2, class Real>
+    void apply_givens_rotation(InOutVec1 x, InOutVec2 y, Real c, complex<Real> s);
+  template<class ExecutionPolicy, inout-vector InOutVec1, inout-vector InOutVec2, class Real>
+    void apply_givens_rotation(ExecutionPolicy&& exec,
+                               InOutVec1 x, InOutVec2 y, Real c, complex<Real> s);
+  // [linalg.algs.blas1.swap], swap elements
+  template<inout-object InOutObj1, inout-object InOutObj2>
+    void swap_elements(InOutObj1 x, InOutObj2 y);
+  template<class ExecutionPolicy, inout-object InOutObj1, inout-object InOutObj2>
+    void swap_elements(ExecutionPolicy&& exec, InOutObj1 x, InOutObj2 y);
+  // [linalg.algs.blas1.scal], multiply elements by scalar
+  template<class Scalar, inout-object InOutObj>
+    void scale(Scalar alpha, InOutObj x);
+  template<class ExecutionPolicy, class Scalar, inout-object InOutObj>
+    void scale(ExecutionPolicy&& exec, Scalar alpha, InOutObj x);
+  // [linalg.algs.blas1.copy], copy elements
+  template<in-object InObj, out-object OutObj>
+    void copy(InObj x, OutObj y);
+  template<class ExecutionPolicy, in-object InObj, out-object OutObj>
+    void copy(ExecutionPolicy&& exec, InObj x, OutObj y);
+  // [linalg.algs.blas1.add], add elementwise
+  template<in-object InObj1, in-object InObj2, out-object OutObj>
+    void add(InObj1 x, InObj2 y, OutObj z);
+  template<class ExecutionPolicy, in-object InObj1, in-object InObj2, out-object OutObj>
+    void add(ExecutionPolicy&& exec, InObj1 x, InObj2 y, OutObj z);
+  // [linalg.algs.blas1.dot], dot product of two vectors
+  template<in-vector InVec1, in-vector InVec2, class Scalar>
+    Scalar dot(InVec1 v1, InVec2 v2, Scalar init);
+  template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2, class Scalar>
+    Scalar dot(ExecutionPolicy&& exec, InVec1 v1, InVec2 v2, Scalar init);
+  template<in-vector InVec1, in-vector InVec2>
+    auto dot(InVec1 v1, InVec2 v2);
+  template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2>
+    auto dot(ExecutionPolicy&& exec, InVec1 v1, InVec2 v2);
+  template<in-vector InVec1, in-vector InVec2, class Scalar>
+    Scalar dotc(InVec1 v1, InVec2 v2, Scalar init);
+  template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2, class Scalar>
+    Scalar dotc(ExecutionPolicy&& exec, InVec1 v1, InVec2 v2, Scalar init);
+  template<in-vector InVec1, in-vector InVec2>
+    auto dotc(InVec1 v1, InVec2 v2);
+  template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2>
+    auto dotc(ExecutionPolicy&& exec, InVec1 v1, InVec2 v2);
+  // [linalg.algs.blas1.ssq], scaled sum of squares of a vector's elements
+  template<class Scalar>
+    struct sum_of_squares_result {
+      Scalar scaling_factor;
+      Scalar scaled_sum_of_squares;
+    };
+  template<in-vector InVec, class Scalar>
+    sum_of_squares_result<Scalar>
+      vector_sum_of_squares(InVec v, sum_of_squares_result<Scalar> init);
+  template<class ExecutionPolicy, in-vector InVec, class Scalar>
+    sum_of_squares_result<Scalar>
+      vector_sum_of_squares(ExecutionPolicy&& exec,
+                            InVec v, sum_of_squares_result<Scalar> init);
+  // [linalg.algs.blas1.nrm2], Euclidean norm of a vector
+  template<in-vector InVec, class Scalar>
+    Scalar vector_two_norm(InVec v, Scalar init);
+  template<class ExecutionPolicy, in-vector InVec, class Scalar>
+    Scalar vector_two_norm(ExecutionPolicy&& exec, InVec v, Scalar init);
+  template<in-vector InVec>
+    auto vector_two_norm(InVec v);
+  template<class ExecutionPolicy, in-vector InVec>
+    auto vector_two_norm(ExecutionPolicy&& exec, InVec v);
+  // [linalg.algs.blas1.asum], sum of absolute values of vector elements
+  template<in-vector InVec, class Scalar>
+    Scalar vector_abs_sum(InVec v, Scalar init);
+  template<class ExecutionPolicy, in-vector InVec, class Scalar>
+    Scalar vector_abs_sum(ExecutionPolicy&& exec, InVec v, Scalar init);
+  template<in-vector InVec>
+    auto vector_abs_sum(InVec v);
+  template<class ExecutionPolicy, in-vector InVec>
+    auto vector_abs_sum(ExecutionPolicy&& exec, InVec v);
+  // [linalg.algs.blas1.iamax], index of maximum absolute value of vector elements
+  template<in-vector InVec>
+    typename InVec::extents_type vector_idx_abs_max(InVec v);
+  template<class ExecutionPolicy, in-vector InVec>
+    typename InVec::extents_type vector_idx_abs_max(ExecutionPolicy&& exec, InVec v);
+  // [linalg.algs.blas1.matfrobnorm], Frobenius norm of a matrix
+  template<in-matrix InMat, class Scalar>
+    Scalar matrix_frob_norm(InMat A, Scalar init);
+  template<class ExecutionPolicy, in-matrix InMat, class Scalar>
+    Scalar matrix_frob_norm(ExecutionPolicy&& exec, InMat A, Scalar init);
+  template<in-matrix InMat>
+    auto matrix_frob_norm(InMat A);
+  template<class ExecutionPolicy, in-matrix InMat>
+    auto matrix_frob_norm(ExecutionPolicy&& exec, InMat A);
+  // [linalg.algs.blas1.matonenorm], one norm of a matrix
+  template<in-matrix InMat, class Scalar>
+    Scalar matrix_one_norm(InMat A, Scalar init);
+  template<class ExecutionPolicy, in-matrix InMat, class Scalar>
+    Scalar matrix_one_norm(ExecutionPolicy&& exec, InMat A, Scalar init);
+  template<in-matrix InMat>
+    auto matrix_one_norm(InMat A);
+  template<class ExecutionPolicy, in-matrix InMat>
+    auto matrix_one_norm(ExecutionPolicy&& exec, InMat A);
+  // [linalg.algs.blas1.matinfnorm], infinity norm of a matrix
+  template<in-matrix InMat, class Scalar>
+    Scalar matrix_inf_norm(InMat A, Scalar init);
+  template<class ExecutionPolicy, in-matrix InMat, class Scalar>
+    Scalar matrix_inf_norm(ExecutionPolicy&& exec, InMat A, Scalar init);
+  template<in-matrix InMat>
+    auto matrix_inf_norm(InMat A);
+  template<class ExecutionPolicy, in-matrix InMat>
+    auto matrix_inf_norm(ExecutionPolicy&& exec, InMat A);
+  // [linalg.algs.blas2], BLAS 2 algorithms
+  // [linalg.algs.blas2.gemv], general matrix-vector product
+  template<in-matrix InMat, in-vector InVec, out-vector OutVec>
+    void matrix_vector_product(InMat A, InVec x, OutVec y);
+  template<class ExecutionPolicy, in-matrix InMat, in-vector InVec, out-vector OutVec>
+    void matrix_vector_product(ExecutionPolicy&& exec, InMat A, InVec x, OutVec y);
+  template<in-matrix InMat, in-vector InVec1, in-vector InVec2, out-vector OutVec>
+    void matrix_vector_product(InMat A, InVec1 x, InVec2 y, OutVec z);
+  template<class ExecutionPolicy,
+           in-matrix InMat, in-vector InVec1, in-vector InVec2, out-vector OutVec>
+    void matrix_vector_product(ExecutionPolicy&& exec, InMat A, InVec1 x, InVec2 y, OutVec z);
+  // [linalg.algs.blas2.symv], symmetric matrix-vector product
+  template<in-matrix InMat, class Triangle, in-vector InVec, out-vector OutVec>
+    void symmetric_matrix_vector_product(InMat A, Triangle t, InVec x, OutVec y);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, in-vector InVec, out-vector OutVec>
+    void symmetric_matrix_vector_product(ExecutionPolicy&& exec,
+                                         InMat A, Triangle t, InVec x, OutVec y);
+  template<in-matrix InMat, class Triangle, in-vector InVec1, in-vector InVec2,
+           out-vector OutVec>
+    void symmetric_matrix_vector_product(InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, in-vector InVec1, in-vector InVec2,
+           out-vector OutVec>
+    void symmetric_matrix_vector_product(ExecutionPolicy&& exec,
+                                         InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z);
+  // [linalg.algs.blas2.hemv], Hermitian matrix-vector product
+  template<in-matrix InMat, class Triangle, in-vector InVec, out-vector OutVec>
+    void hermitian_matrix_vector_product(InMat A, Triangle t, InVec x, OutVec y);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, in-vector InVec, out-vector OutVec>
+    void hermitian_matrix_vector_product(ExecutionPolicy&& exec,
+                                         InMat A, Triangle t, InVec x, OutVec y);
+  template<in-matrix InMat, class Triangle, in-vector InVec1, in-vector InVec2,
+           out-vector OutVec>
+    void hermitian_matrix_vector_product(InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, in-vector InVec1, in-vector InVec2,
+           out-vector OutVec>
+    void hermitian_matrix_vector_product(ExecutionPolicy&& exec,
+                                         InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z);
+  // [linalg.algs.blas2.trmv], triangular matrix-vector product
+  // Overwriting triangular matrix-vector product
+  template<in-matrix InMat, class Triangle, class DiagonalStorage, in-vector InVec,
+           out-vector OutVec>
+    void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d,
+                                          InVec x, OutVec y);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage, in-vector InVec,
+           out-vector OutVec>
+    void triangular_matrix_vector_product(ExecutionPolicy&& exec,
+                                          InMat A, Triangle t, DiagonalStorage d,
+                                          InVec x, OutVec y);
+  // In-place triangular matrix-vector product
+  template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-vector InOutVec>
+    void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, InOutVec y);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage, inout-vector InOutVec>
+    void triangular_matrix_vector_product(ExecutionPolicy&& exec,
+                                          InMat A, Triangle t, DiagonalStorage d, InOutVec y);
+  // Updating triangular matrix-vector product
+  template<in-matrix InMat, class Triangle, class DiagonalStorage,
+           in-vector InVec1, in-vector InVec2, out-vector OutVec>
+    void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d,
+                                          InVec1 x, InVec2 y, OutVec z);
+  template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+           in-vector InVec1, in-vector InVec2, out-vector OutVec>
+    void triangular_matrix_vector_product(ExecutionPolicy&& exec,
+                                          InMat A, Triangle t, DiagonalStorage d,
+                                          InVec1 x, InVec2 y, OutVec z);
+  // [linalg.algs.blas2.trsv], solve a triangular linear system
+  // Solve a triangular linear system, not in place
+  template<in-matrix InMat, class Triangle, class DiagonalStorage,
+           in-vector InVec, out-vector OutVec, class BinaryDivideOp>
+    void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d,
+                                        InVec b, OutVec x, BinaryDivideOp divide);
+  template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+           in-vector InVec, out-vector OutVec, class BinaryDivideOp>
+    void triangular_matrix_vector_solve(ExecutionPolicy&& exec,
+                                        InMat A, Triangle t, DiagonalStorage d,
+                                        InVec b, OutVec x, BinaryDivideOp divide);
+  template<in-matrix InMat, class Triangle, class DiagonalStorage,
+           in-vector InVec, out-vector OutVec>
+    void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d,
+                                        InVec b, OutVec x);
+  template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+           in-vector InVec, out-vector OutVec>
+    void triangular_matrix_vector_solve(ExecutionPolicy&& exec,
+                                        InMat A, Triangle t, DiagonalStorage d,
+                                        InVec b, OutVec x);
+  // Solve a triangular linear system, in place
+  template<in-matrix InMat, class Triangle, class DiagonalStorage,
+           inout-vector InOutVec, class BinaryDivideOp>
+    void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d,
+                                        InOutVec b, BinaryDivideOp divide);
+  template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+           inout-vector InOutVec, class BinaryDivideOp>
+    void triangular_matrix_vector_solve(ExecutionPolicy&& exec,
+                                        InMat A, Triangle t, DiagonalStorage d,
+                                        InOutVec b, BinaryDivideOp divide);
+  template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-vector InOutVec>
+    void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, InOutVec b);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage, inout-vector InOutVec>
+    void triangular_matrix_vector_solve(ExecutionPolicy&& exec,
+                                        InMat A, Triangle t, DiagonalStorage d, InOutVec b);
+  // [linalg.algs.blas2.rank1], nonsymmetric rank-1 matrix update
+  template<in-vector InVec1, in-vector InVec2, inout-matrix InOutMat>
+    void matrix_rank_1_update(InVec1 x, InVec2 y, InOutMat A);
+  template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2, inout-matrix InOutMat>
+    void matrix_rank_1_update(ExecutionPolicy&& exec, InVec1 x, InVec2 y, InOutMat A);
+  template<in-vector InVec1, in-vector InVec2, inout-matrix InOutMat>
+    void matrix_rank_1_update_c(InVec1 x, InVec2 y, InOutMat A);
+  template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2, inout-matrix InOutMat>
+    void matrix_rank_1_update_c(ExecutionPolicy&& exec, InVec1 x, InVec2 y, InOutMat A);
+  // [linalg.algs.blas2.symherrank1], symmetric or Hermitian rank-1 matrix update
+  template<class Scalar, in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_1_update(Scalar alpha, InVec x, InOutMat A, Triangle t);
+  template<class ExecutionPolicy,
+           class Scalar, in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_1_update(ExecutionPolicy&& exec,
+                                        Scalar alpha, InVec x, InOutMat A, Triangle t);
+  template<in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_1_update(InVec x, InOutMat A, Triangle t);
+  template<class ExecutionPolicy,
+           in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_1_update(ExecutionPolicy&& exec, InVec x, InOutMat A, Triangle t);
+  template<class Scalar, in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_1_update(Scalar alpha, InVec x, InOutMat A, Triangle t);
+  template<class ExecutionPolicy,
+           class Scalar, in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_1_update(ExecutionPolicy&& exec,
+                                        Scalar alpha, InVec x, InOutMat A, Triangle t);
+  template<in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_1_update(InVec x, InOutMat A, Triangle t);
+  template<class ExecutionPolicy,
+           in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_1_update(ExecutionPolicy&& exec, InVec x, InOutMat A, Triangle t);
+  // [linalg.algs.blas2.rank2], symmetric and Hermitian rank-2 matrix updates
+  // symmetric rank-2 matrix update
+  template<in-vector InVec1, in-vector InVec2,
+           possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_2_update(InVec1 x, InVec2 y, InOutMat A, Triangle t);
+  template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2,
+           possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_2_update(ExecutionPolicy&& exec,
+                                        InVec1 x, InVec2 y, InOutMat A, Triangle t);
+  // Hermitian rank-2 matrix update
+  template<in-vector InVec1, in-vector InVec2,
+           possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_2_update(InVec1 x, InVec2 y, InOutMat A, Triangle t);
+  template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2,
+           possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_2_update(ExecutionPolicy&& exec,
+                                        InVec1 x, InVec2 y, InOutMat A, Triangle t);
+  // [linalg.algs.blas3], BLAS 3 algorithms
+  // [linalg.algs.blas3.gemm], general matrix-matrix product
+  template<in-matrix InMat1, in-matrix InMat2, out-matrix OutMat>
+    void matrix_product(InMat1 A, InMat2 B, OutMat C);
+  template<class ExecutionPolicy, in-matrix InMat1, in-matrix InMat2, out-matrix OutMat>
+    void matrix_product(ExecutionPolicy&& exec,
+                        InMat1 A, InMat2 B, OutMat C);
+  template<in-matrix InMat1, in-matrix InMat2, in-matrix InMat3, out-matrix OutMat>
+    void matrix_product(InMat1 A, InMat2 B, InMat3 E, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, in-matrix InMat3, out-matrix OutMat>
+    void matrix_product(ExecutionPolicy&& exec,
+                        InMat1 A, InMat2 B, InMat3 E, OutMat C);
+  // [linalg.algs.blas3.xxmm], symmetric, Hermitian, and triangular matrix-matrix product
+  template<in-matrix InMat1, class Triangle, in-matrix InMat2, out-matrix OutMat>
+    void symmetric_matrix_product(InMat1 A, Triangle t, InMat2 B, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, in-matrix InMat2, out-matrix OutMat>
+    void symmetric_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, Triangle t, InMat2 B, OutMat C);
+  template<in-matrix InMat1, class Triangle, in-matrix InMat2, out-matrix OutMat>
+    void hermitian_matrix_product(InMat1 A, Triangle t, InMat2 B, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, in-matrix InMat2, out-matrix OutMat>
+    void hermitian_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, Triangle t, InMat2 B, OutMat C);
+  template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat>
+    void triangular_matrix_product(InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat>
+    void triangular_matrix_product(ExecutionPolicy&& exec,
+                                   InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, OutMat C);
+  template<in-matrix InMat1, in-matrix InMat2, class Triangle, out-matrix OutMat>
+    void symmetric_matrix_product(InMat1 A, InMat2 B, Triangle t, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, class Triangle, out-matrix OutMat>
+    void symmetric_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, InMat2 B, Triangle t, OutMat C);
+  template<in-matrix InMat1, in-matrix InMat2, class Triangle, out-matrix OutMat>
+    void hermitian_matrix_product(InMat1 A, InMat2 B, Triangle t, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, class Triangle, out-matrix OutMat>
+    void hermitian_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, InMat2 B, Triangle t, OutMat C);
+  template<in-matrix InMat1, in-matrix InMat2, class Triangle, class DiagonalStorage,
+           out-matrix OutMat>
+    void triangular_matrix_product(InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, class Triangle, class DiagonalStorage,
+           out-matrix OutMat>
+    void triangular_matrix_product(ExecutionPolicy&& exec,
+                                   InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, OutMat C);
+  template<in-matrix InMat1, class Triangle, in-matrix InMat2, in-matrix InMat3,
+           out-matrix OutMat>
+    void symmetric_matrix_product(InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, in-matrix InMat2, in-matrix InMat3,
+           out-matrix OutMat>
+    void symmetric_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C);
+  template<in-matrix InMat1, class Triangle, in-matrix InMat2, in-matrix InMat3,
+           out-matrix OutMat>
+    void hermitian_matrix_product(InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, in-matrix InMat2, in-matrix InMat3,
+           out-matrix OutMat>
+    void hermitian_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C);
+  template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, in-matrix InMat3, out-matrix OutMat>
+    void triangular_matrix_product(InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, InMat3 E,
+                                   OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, in-matrix InMat3, out-matrix OutMat>
+    void triangular_matrix_product(ExecutionPolicy&& exec,
+                                   InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, InMat3 E,
+                                   OutMat C);
+  template<in-matrix InMat1, in-matrix InMat2, class Triangle, in-matrix InMat3,
+           out-matrix OutMat>
+    void symmetric_matrix_product(InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, class Triangle, in-matrix InMat3,
+           out-matrix OutMat>
+    void symmetric_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C);
+  template<in-matrix InMat1, in-matrix InMat2, class Triangle, in-matrix InMat3,
+           out-matrix OutMat>
+    void hermitian_matrix_product(InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, class Triangle, in-matrix InMat3,
+           out-matrix OutMat>
+    void hermitian_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C);
+  template<in-matrix InMat1, in-matrix InMat2, class Triangle, class DiagonalStorage,
+           in-matrix InMat3, out-matrix OutMat>
+    void triangular_matrix_product(InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, InMat3 E,
+                                   OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, class Triangle, class DiagonalStorage,
+           in-matrix InMat3, out-matrix OutMat>
+    void triangular_matrix_product(ExecutionPolicy&& exec,
+                                   InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, InMat3 E,
+                                   OutMat C);
+  // [linalg.algs.blas3.trmm], in-place triangular matrix-matrix product
+  template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+    void triangular_matrix_left_product(InMat A, Triangle t, DiagonalStorage d, InOutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+    void triangular_matrix_left_product(ExecutionPolicy&& exec,
+                                        InMat A, Triangle t, DiagonalStorage d, InOutMat C);
+  template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+    void triangular_matrix_right_product(InMat A, Triangle t, DiagonalStorage d, InOutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+    void triangular_matrix_right_product(ExecutionPolicy&& exec,
+                                         InMat A, Triangle t, DiagonalStorage d, InOutMat C);
+  // [linalg.algs.blas3.rankk], rank-k update of a symmetric or Hermitian matrix
+  // rank-k symmetric matrix update
+  template<class Scalar, in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_k_update(Scalar alpha, InMat A, InOutMat C, Triangle t);
+  template<class ExecutionPolicy, class Scalar,
+           in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_k_update(ExecutionPolicy&& exec,
+                                        Scalar alpha, InMat A, InOutMat C, Triangle t);
+  template<in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_k_update(InMat A, InOutMat C, Triangle t);
+  template<class ExecutionPolicy,
+           in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_k_update(ExecutionPolicy&& exec,
+                                        InMat A, InOutMat C, Triangle t);
+  // rank-k Hermitian matrix update
+  template<class Scalar, in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_k_update(Scalar alpha, InMat A, InOutMat C, Triangle t);
+  template<class ExecutionPolicy,
+           class Scalar, in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_k_update(ExecutionPolicy&& exec,
+                                        Scalar alpha, InMat A, InOutMat C, Triangle t);
+  template<in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_k_update(InMat A, InOutMat C, Triangle t);
+  template<class ExecutionPolicy,
+           in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_k_update(ExecutionPolicy&& exec,
+                                        InMat A, InOutMat C, Triangle t);
+  // [linalg.algs.blas3.rank2k], rank-2k update of a symmetric or Hermitian matrix
+  // rank-2k symmetric matrix update
+  template<in-matrix InMat1, in-matrix InMat2,
+           possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_2k_update(InMat1 A, InMat2 B, InOutMat C, Triangle t);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2,
+           possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_2k_update(ExecutionPolicy&& exec,
+                                         InMat1 A, InMat2 B, InOutMat C, Triangle t);
+  // rank-2k Hermitian matrix update
+  template<in-matrix InMat1, in-matrix InMat2,
+           possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_2k_update(InMat1 A, InMat2 B, InOutMat C, Triangle t);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2,
+           possibly-packed-inout-matrix InOutMat, class Triangle>
+    void hermitian_matrix_rank_2k_update(ExecutionPolicy&& exec,
+                                         InMat1 A, InMat2 B, InOutMat C, Triangle t);
+  // [linalg.algs.blas3.trsm], solve multiple triangular linear systems
+  // solve multiple triangular systems on the left, not-in-place
+  template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat, class BinaryDivideOp>
+    void triangular_matrix_matrix_left_solve(InMat1 A, Triangle t, DiagonalStorage d,
+                                             InMat2 B, OutMat X, BinaryDivideOp divide);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat, class BinaryDivideOp>
+    void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec,
+                                             InMat1 A, Triangle t, DiagonalStorage d,
+                                             InMat2 B, OutMat X, BinaryDivideOp divide);
+  template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat>
+    void triangular_matrix_matrix_left_solve(InMat1 A, Triangle t, DiagonalStorage d,
+                                             InMat2 B, OutMat X);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat>
+    void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec,
+                                             InMat1 A, Triangle t, DiagonalStorage d,
+                                             InMat2 B, OutMat X);
+  // solve multiple triangular systems on the right, not-in-place
+  template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat, class BinaryDivideOp>
+    void triangular_matrix_matrix_right_solve(InMat1 A, Triangle t, DiagonalStorage d,
+                                              InMat2 B, OutMat X, BinaryDivideOp divide);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat, class BinaryDivideOp>
+    void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec,
+                                              InMat1 A, Triangle t, DiagonalStorage d,
+                                              InMat2 B, OutMat X, BinaryDivideOp divide);
+  template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat>
+    void triangular_matrix_matrix_right_solve(InMat1 A, Triangle t, DiagonalStorage d,
+                                              InMat2 B, OutMat X);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, class Triangle, class DiagonalStorage,
+           in-matrix InMat2, out-matrix OutMat>
+    void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec,
+                                              InMat1 A, Triangle t, DiagonalStorage d,
+                                              InMat2 B, OutMat X);
+  // solve multiple triangular systems on the left, in-place
+  template<in-matrix InMat, class Triangle, class DiagonalStorage,
+           inout-matrix InOutMat, class BinaryDivideOp>
+    void triangular_matrix_matrix_left_solve(InMat A, Triangle t, DiagonalStorage d,
+                                             InOutMat B, BinaryDivideOp divide);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage,
+           inout-matrix InOutMat, class BinaryDivideOp>
+    void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec,
+                                             InMat A, Triangle t, DiagonalStorage d,
+                                             InOutMat B, BinaryDivideOp divide);
+  template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+    void triangular_matrix_matrix_left_solve(InMat A, Triangle t, DiagonalStorage d,
+                                             InOutMat B);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+    void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec,
+                                             InMat A, Triangle t, DiagonalStorage d,
+                                             InOutMat B);
+  // solve multiple triangular systems on the right, in-place
+  template<in-matrix InMat, class Triangle, class DiagonalStorage,
+           inout-matrix InOutMat, class BinaryDivideOp>
+    void triangular_matrix_matrix_right_solve(InMat A, Triangle t, DiagonalStorage d,
+                                              InOutMat B, BinaryDivideOp divide);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage,
+           inout-matrix InOutMat, class BinaryDivideOp>
+    void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec,
+                                              InMat A, Triangle t, DiagonalStorage d,
+                                              InOutMat B, BinaryDivideOp divide);
+  template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+    void triangular_matrix_matrix_right_solve(InMat A, Triangle t, DiagonalStorage d,
+                                              InOutMat B);
+  template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+    void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec,
+                                              InMat A, Triangle t, DiagonalStorage d,
+                                              InOutMat B);
+}
+```
+### General <a id="linalg.general">[[linalg.general]]</a>
+For the effects of all functions in [[linalg]], when the effects are
+described as “computes R = E” or “compute R = E” (for some R and
+mathematical expression E), the following apply:
+- E has the conventional mathematical meaning as written.
+- The pattern $x^T$ should be read as “the transpose of x.”
+- The pattern $x^H$ should be read as “the conjugate transpose of x.”
+- When R is the same name as a function parameter whose type is a
+  template parameter with `Out` in its name, the intent is that the
+  result of the computation is written to the elements of the function
+  parameter `R`.
+Some of the functions and types in [[linalg]] distinguish between the
+“rows” and the “columns” of a matrix. For a matrix `A` and a
+multidimensional index `i, j` in `A.extents()`,
+- *row* `i` of `A` is the set of elements `A[i, k1]` for all `k1` such
+  that `i, k1` is in `A.extents()`; and
+- *column* `j` of `A` is the set of elements `A[k0, j]` for all `k0`
+  such that `k0, j` is in `A.extents()`.
+Some of the functions in [[linalg]] distinguish between the “upper
+triangle,” “lower triangle,” and “diagonal” of a matrix.
+- The *diagonal* is the set of all elements of `A` accessed by `A[i,i]`
+  for 0 ≤ `i` \< min(`A.extent(0)`, `A.extent(1)`).
+- The *upper triangle* of a matrix `A` is the set of all elements of `A`
+  accessed by `A[i,j]` with `i` ≤ `j`. It includes the diagonal.
+- The *lower triangle* of `A` is the set of all elements of `A` accessed
+  by `A[i,j]` with `i` ≥ `j`. It includes the diagonal.
+For any function `F` that takes a parameter named `t`, `t` applies to
+accesses done through the parameter preceding `t` in the parameter list
+of `F`. Let `m` be such an access-modified function parameter. `F` will
+only access the triangle of `m` specified by `t`. For accesses `m[i, j]`
+outside the triangle specified by `t`, `F` will use the value
+- `conj-if-needed(m[j, i])` if the name of `F` starts with `hermitian`,
+- `m[j, i]` if the name of `F` starts with `symmetric`, or
+- the additive identity if the name of `F` starts with `triangular`.
+[*Example 1*:
+Small vector product accessing only specified triangle. It would not be
+a precondition violation for the non-accessed matrix element to be
+non-zero.
+``` cpp
+template<class Triangle>
+void triangular_matrix_vector_2x2_product(
+       mdspan<const float, extents<int, 2, 2>> m,
+       Triangle t,
+       mdspan<const float, extents<int, 2>> x,
+       mdspan<float, extents<int, 2>> y) {
+  static_assert(is_same_v<Triangle, lower_triangle_t> ||
+                is_same_v<Triangle, upper_triangle_t>);
+  if constexpr (is_same_v<Triangle, lower_triangle_t>) {
+    y[0] = m[0,0] * x[0];       // + 0 * x[1]
+    y[1] = m[1,0] * x[0] + m[1,1] * x[1];
+  } else {                      // upper_triangle_t
+    y[0] = m[0,0] * x[0] + m[0,1] * x[1];
+    y[1] = /* 0 * x[0] + */ m[1,1] * x[1];
+  }
+}
+```
+— *end example*]
+For any function `F` that takes a parameter named `d`, `d` applies to
+accesses done through the previous-of-the-previous parameter of `d` in
+the parameter list of `F`. Let `m` be such an access-modified function
+parameter. If `d` specifies that an implicit unit diagonal is to be
+assumed, then
+- `F` will not access the diagonal of `m`; and
+- the algorithm will interpret `m` as if it has a unit diagonal, that
+  is, a diagonal each of whose elements behaves as a two-sided
+  multiplicative identity (even if `m`’s value type does not have a
+  two-sided multiplicative identity).
+Otherwise, if `d` specifies that an explicit diagonal is to be assumed,
+then `F` will access the diagonal of `m`.
+Within all the functions in [[linalg]], any calls to `abs`, `conj`,
+`imag`, and `real` are unqualified.
+Two `mdspan` objects `x` and `y` *alias* each other, if they have the
+same extents `e`, and for every pack of integers `i` which is a
+multidimensional index in `e`, `x[i...]` and `y[i...]` refer to the same
+element.
+[*Note 1*: This means that `x` and `y` view the same elements in the
+same order. — *end note*]
+Two `mdspan` objects `x` and `y` *overlap* each other, if for some pack
+of integers `i` that is a multidimensional index in `x.extents()`, there
+exists a pack of integers `j` that is a multidimensional index in
+`y.extents()`, such that `x[i...]` and `y[j...]` refer to the same
+element.
+[*Note 2*: Aliasing is a special case of overlapping. If `x` and `y` do
+not overlap, then they also do not alias each other. — *end note*]
+### Requirements <a id="linalg.reqs">[[linalg.reqs]]</a>
+#### Linear algebra value types <a id="linalg.reqs.val">[[linalg.reqs.val]]</a>
+Throughout [[linalg]], the following types are *linear algebra value
+types*:
+- the `value_type` type alias of any input or output `mdspan`
+  parameter(s) of any function in [[linalg]]; and
+- the `Scalar` template parameter (if any) of any function or class in
+  [[linalg]].
+Linear algebra value types shall model `semiregular`.
+A value-initialized object of linear algebra value type shall act as the
+additive identity.
+#### Algorithm and class requirements <a id="linalg.reqs.alg">[[linalg.reqs.alg]]</a>
+[[linalg.reqs.alg]] lists common requirements for all algorithms and
+classes in [[linalg]].
+All of the following statements presume that the algorithm’s asymptotic
+complexity requirements, if any, are satisfied.
+- The function may make arbitrarily many objects of any linear algebra
+  value type, value-initializing or direct-initializing them with any
+  existing object of that type.
+- The *triangular solve algorithms* in [[linalg.algs.blas2.trsv]],
+  [[linalg.algs.blas3.trmm]], [[linalg.algs.blas3.trsm]], and
+  [[linalg.algs.blas3.inplacetrsm]] either have a `BinaryDivideOp`
+  template parameter (see [[linalg.algs.reqs]]) and a binary function
+  object parameter `divide` of that type, or they have effects
+  equivalent to invoking such an algorithm. Triangular solve algorithms
+  interpret `divide(a, b)` as `a` times the multiplicative inverse of
+  `b`. Each triangular solve algorithm uses a sequence of evaluations of
+  `*`, `*=`, `divide`, unary `+`, binary `+`, `+=`, unary `-`, binary
+  `-`, `-=`, and `=` operators that would produce the result specified
+  by the algorithm’s *Effects* and *Remarks* when operating on elements
+  of a field with noncommutative multiplication. It is a precondition of
+  the algorithm that any addend, any subtrahend, any partial sum of
+  addends in any order (treating any difference as a sum with the second
+  term negated), any factor, any partial product of factors respecting
+  their order, any numerator (first argument of `divide`), any
+  denominator (second argument of `divide`), and any assignment is a
+  well-formed expression.
+- Each function in [[linalg.algs.blas1]], [[linalg.algs.blas2]], and
+  [[linalg.algs.blas3]] that is not a triangular solve algorithm will
+  use a sequence of evaluations of `*`, `*=`, `+`, `+=`, and `=`
+  operators that would produce the result specified by the algorithm’s
+  *Effects* and *Remarks* when operating on elements of a semiring with
+  noncommutative multiplication. It is a precondition of the algorithm
+  that any addend, any partial sum of addends in any order, any factor,
+  any partial product of factors respecting their order, and any
+  assignment is a well-formed expression.
+- If the function has an output `mdspan`, then all addends, subtrahends
+  (for the triangular solve algorithms), or results of the `divide`
+  parameter on intermediate terms (if the function takes a `divide`
+  parameter) are assignable and convertible to the output `mdspan`’s
+  `value_type`.
+- The function may reorder addends and partial sums arbitrarily.
+  \[*Note 1*: Factors in each product are not reordered; multiplication
+  is not necessarily commutative. — *end note*]
+[*Note 2*: The above requirements do not prohibit implementation
+approaches and optimization techniques which are not user-observable. In
+particular, if for all input and output arguments the `value_type` is a
+floating-point type, implementers are free to leverage approximations,
+use arithmetic operations not explicitly listed above, and compute
+floating-point sums in any way that improves their
+accuracy. — *end note*]
+[*Note 3*:
+For all functions in [[linalg]], suppose that all input and output
+`mdspan` have as `value_type` a floating-point type, and any `Scalar`
+template argument has a floating-point type. Then, functions can do all
+of the following:
+- compute floating-point sums in any way that improves their accuracy
+  for arbitrary input;
+- perform additional arithmetic operations (other than those specified
+  by the function’s wording and [[linalg.reqs.alg]]) in order to improve
+  performance or accuracy; and
+- use approximations (that might not be exact even if computing with
+  real numbers), instead of computations that would be exact if it were
+  possible to compute without rounding error;
+as long as
+- the function satisfies the complexity requirements; and
+- the function is logarithmically stable, as defined in Demmel 2007.
+  Strassen’s algorithm for matrix-matrix multiply is an example of a
+  logarithmically stable algorithm.
+— *end note*]
+### Tag classes <a id="linalg.tags">[[linalg.tags]]</a>
+#### Storage order tags <a id="linalg.tags.order">[[linalg.tags.order]]</a>
+The storage order tags describe the order of elements in an `mdspan`
+with `layout_blas_packed` [[linalg.layout.packed]] layout.
+``` cpp
+struct column_major_t {
+  explicit column_major_t() = default;
+};
+inline constexpr column_major_t column_major{};
+struct row_major_t {
+  explicit row_major_t() = default;
+};
+inline constexpr row_major_t row_major{};
+```
+`column_major_t` indicates a column-major order, and `row_major_t`
+indicates a row-major order.
+#### Triangle tags <a id="linalg.tags.triangle">[[linalg.tags.triangle]]</a>
+``` cpp
+struct upper_triangle_t {
+  explicit upper_triangle_t() = default;
+};
+inline constexpr upper_triangle_t upper_triangle{};
+struct lower_triangle_t {
+  explicit lower_triangle_t() = default;
+};
+inline constexpr lower_triangle_t lower_triangle{};
+```
+These tag classes specify whether algorithms and other users of a matrix
+(represented as an `mdspan`) access the upper triangle
+(`upper_triangle_t`) or lower triangle (`lower_triangle_t`) of the
+matrix (see also [[linalg.general]]). This is also subject to the
+restrictions of `implicit_unit_diagonal_t` if that tag is also used as a
+function argument; see below.
+#### Diagonal tags <a id="linalg.tags.diagonal">[[linalg.tags.diagonal]]</a>
+``` cpp
+struct implicit_unit_diagonal_t {
+  explicit implicit_unit_diagonal_t() = default;
+};
+inline constexpr implicit_unit_diagonal_t implicit_unit_diagonal{};
+struct explicit_diagonal_t {
+  explicit explicit_diagonal_t() = default;
+};
+inline constexpr explicit_diagonal_t explicit_diagonal{};
+```
+These tag classes specify whether algorithms access the matrix’s
+diagonal entries, and if not, then how algorithms interpret the matrix’s
+implicitly represented diagonal values.
+The `implicit_unit_diagonal_t` tag indicates that an implicit unit
+diagonal is to be assumed [[linalg.general]].
+The `explicit_diagonal_t` tag indicates that an explicit diagonal is
+used [[linalg.general]].
+### Layouts for packed matrix types <a id="linalg.layout.packed">[[linalg.layout.packed]]</a>
+#### Overview <a id="linalg.layout.packed.overview">[[linalg.layout.packed.overview]]</a>
+`layout_blas_packed` is an `mdspan` layout mapping policy that
+represents a square matrix that stores only the entries in one triangle,
+in a packed contiguous format. Its `Triangle` template parameter
+determines whether an `mdspan` with this layout stores the upper or
+lower triangle of the matrix. Its `StorageOrder` template parameter
+determines whether the layout packs the matrix’s elements in
+column-major or row-major order.
+A `StorageOrder` of `column_major_t` indicates column-major ordering.
+This packs matrix elements starting with the leftmost (least column
+index) column, and proceeding column by column, from the top entry
+(least row index).
+A `StorageOrder` of `row_major_t` indicates row-major ordering. This
+packs matrix elements starting with the topmost (least row index) row,
+and proceeding row by row, from the leftmost (least column index) entry.
+[*Note 1*: `layout_blas_packed` describes the data layout used by the
+BLAS’ Symmetric Packed (SP), Hermitian Packed (HP), and Triangular
+Packed (TP) matrix types. — *end note*]
+``` cpp
+namespace std::linalg {
+  template<class Triangle, class StorageOrder>
+  class layout_blas_packed {
+  public:
+    using triangle_type = Triangle;
+    using storage_order_type = StorageOrder;
+    template<class Extents>
+    struct mapping {
+    public:
+      using extents_type = Extents;
+      using index_type = extents_type::index_type;
+      using size_type = extents_type::size_type;
+      using rank_type = extents_type::rank_type;
+      using layout_type = layout_blas_packed;
+      // [linalg.layout.packed.cons], constructors
+      constexpr mapping() noexcept = default;
+      constexpr mapping(const mapping&) noexcept = default;
+      constexpr mapping(const extents_type&) noexcept;
+      template<class OtherExtents>
+        constexpr explicit(!is_convertible_v<OtherExtents, extents_type>)
+          mapping(const mapping<OtherExtents>& other) noexcept;
+      constexpr mapping& operator=(const mapping&) noexcept = default;
+      // [linalg.layout.packed.obs], observers
+      constexpr const extents_type& extents() const noexcept { return extents_; }
+      constexpr index_type required_span_size() const noexcept;
+      template<class Index0, class Index1>
+        constexpr index_type operator() (Index0 ind0, Index1 ind1) const noexcept;
+      static constexpr bool is_always_unique() noexcept {
+        return (extents_type::static_extent(0) != dynamic_extent &&
+                extents_type::static_extent(0) < 2) ||
+               (extents_type::static_extent(1) != dynamic_extent &&
+                extents_type::static_extent(1) < 2);
+      }
+      static constexpr bool is_always_exhaustive() noexcept { return true; }
+      static constexpr bool is_always_strided() noexcept
+        { return is_always_unique(); }
+      constexpr bool is_unique() const noexcept {
+        return extents_.extent(0) < 2;
+      }
+      constexpr bool is_exhaustive() const noexcept { return true; }
+      constexpr bool is_strided() const noexcept {
+        return extents_.extent(0) < 2;
+      }
+      constexpr index_type stride(rank_type) const noexcept;
+      template<class OtherExtents>
+        friend constexpr bool operator==(const mapping&, const mapping<OtherExtents>&) noexcept;
+    private:
+      extents_type extents_{};     // exposition only
+    };
+  };
+}
+```
+*Mandates:*
+- `Triangle` is either `upper_triangle_t` or `lower_triangle_t`,
+- `StorageOrder` is either `column_major_t` or `row_major_t`,
+- `Extents` is a specialization of `std::extents`,
+- `Extents::rank()` equals 2,
+- one of
+  ``` cpp
+  extents_type::static_extent(0) == dynamic_extent,
+  extents_type::static_extent(1) == dynamic_extent, or
+  extents_type::static_extent(0) == extents_type::static_extent(1)
+  ```
+  is `true`, and
+- if `Extents::rank_dynamic() == 0` is `true`, let Nₛ be equal to
+  `Extents::static_extent(0)`; then, Nₛ × (Nₛ + 1) is representable as a
+  value of type `index_type`.
+`layout_blas_packed<T, SO>::mapping<E>` is a trivially copyable type
+that models `regular` for each `T`, `SO`, and `E`.
+#### Constructors <a id="linalg.layout.packed.cons">[[linalg.layout.packed.cons]]</a>
+``` cpp
+constexpr mapping(const extents_type& e) noexcept;
+```
+*Preconditions:*
+- Let N be equal to `e.extent(0)`. Then, N × (N+1) is representable as a
+  value of type `index_type` [[basic.fundamental]].
+- `e.extent(0)` equals `e.extent(1)`.
+*Effects:* Direct-non-list-initializes *extents\_* with `e`.
+``` cpp
+template<class OtherExtents>
+  explicit(!is_convertible_v<OtherExtents, extents_type>)
+    constexpr mapping(const mapping<OtherExtents>& other) noexcept;
+```
+*Constraints:* `is_constructible_v<extents_type, OtherExtents>` is
+`true`.
+*Preconditions:* Let N be `other.extents().extent(0)`. Then, N × (N+1)
+is representable as a value of type `index_type` [[basic.fundamental]].
+*Effects:* Direct-non-list-initializes *extents\_* with
+`other.extents()`.
+#### Observers <a id="linalg.layout.packed.obs">[[linalg.layout.packed.obs]]</a>
+``` cpp
+constexpr index_type required_span_size() const noexcept;
+```
+*Returns:* *`extents_`*`.extent(0) * (`*`extents_`*`.extent(0) + 1)/2`.
+[*Note 1*: For example, a 5 x 5 packed matrix only stores 15 matrix
+elements. — *end note*]
+``` cpp
+template<class Index0, class Index1>
+  constexpr index_type operator() (Index0 ind0, Index1 ind1) const noexcept;
+```
+*Constraints:*
+- `is_convertible_v<Index0, index_type>` is `true`,
+- `is_convertible_v<Index1, index_type>` is `true`,
+- `is_nothrow_constructible_v<index_type, Index0>` is `true`, and
+- `is_nothrow_constructible_v<index_type, Index1>` is `true`.
+Let `i` be `extents_type::`*`index-cast`*`(ind0)`, and let `j` be
+`extents_type::`*`index-cast`*`(ind1)`.
+*Preconditions:* `i, j` is a multidimensional index in
+*extents\_*[[mdspan.overview]].
+*Returns:* Let `N` be *`extents_`*`.extent(0)`. Then
+- `(*this)(j, i)` if `i > j` is `true`; otherwise
+- `i + j * (j + 1)/2` if
+  ``` cpp
+  is_same_v<StorageOrder, column_major_t> && is_same_v<Triangle, upper_triangle_t>
+  ```
+  is `true` or
+  ``` cpp
+  is_same_v<StorageOrder, row_major_t> && is_same_v<Triangle, lower_triangle_t>
+  ```
+  is `true`; otherwise
+- `j + N * i - i * (i + 1)/2`.
+``` cpp
+constexpr index_type stride(rank_type r) const noexcept;
+```
+*Preconditions:*
+- `is_strided()` is `true`, and
+- `r < extents_type::rank()` is `true`.
+*Returns:* `1`.
+``` cpp
+template<class OtherExtents>
+  friend constexpr bool operator==(const mapping& x, const mapping<OtherExtents>& y) noexcept;
+```
+*Effects:* Equivalent to: `return x.extents() == y.extents();`
+### Exposition-only helpers <a id="linalg.helpers">[[linalg.helpers]]</a>
+#### *`abs-if-needed`* <a id="linalg.helpers.abs">[[linalg.helpers.abs]]</a>
+The name *`abs-if-needed`* denotes an exposition-only function object.
+The expression `abs-if-needed(E)` for a subexpression `E` whose type is
+`T` is expression-equivalent to:
+- `E` if `T` is an unsigned integer;
+- otherwise, `std::abs(E)` if `T` is an arithmetic type,
+- otherwise, `abs(E)`, if that expression is valid, with overload
+  resolution performed in a context that includes the declaration
+  ``` cpp
+  template<class U> U abs(U) = delete;
+  ```
+  If the function selected by overload resolution does not return the
+  absolute value of its input, the program is ill-formed, no diagnostic
+  required.
+#### *`conj-if-needed`* <a id="linalg.helpers.conj">[[linalg.helpers.conj]]</a>
+The name *`conj-if-needed`* denotes an exposition-only function object.
+The expression `conj-if-needed(E)` for a subexpression `E` whose type is
+`T` is expression-equivalent to:
+- `conj(E)`, if `T` is not an arithmetic type and the expression
+  `conj(E)` is valid, with overload resolution performed in a context
+  that includes the declaration
+  ``` cpp
+  template<class U> U conj(const U&) = delete;
+  ```
+  If the function selected by overload resolution does not return the
+  complex conjugate of its input, the program is ill-formed, no
+  diagnostic required;
+- otherwise, `E`.
+#### *`real-if-needed`* <a id="linalg.helpers.real">[[linalg.helpers.real]]</a>
+The name *`real-if-needed`* denotes an exposition-only function object.
+The expression `real-if-needed(E)` for a subexpression `E` whose type is
+`T` is expression-equivalent to:
+- `real(E)`, if `T` is not an arithmetic type and the expression
+  `real(E)` is valid, with overload resolution performed in a context
+  that includes the declaration
+  ``` cpp
+  template<class U> U real(const U&) = delete;
+  ```
+  If the function selected by overload resolution does not return the
+  real part of its input, the program is ill-formed, no diagnostic
+  required;
+- otherwise, `E`.
+#### *`imag-if-needed`* <a id="linalg.helpers.imag">[[linalg.helpers.imag]]</a>
+The name *`imag-if-needed`* denotes an exposition-only function object.
+The expression `imag-if-needed(E)` for a subexpression `E` whose type is
+`T` is expression-equivalent to:
+- `imag(E)`, if `T` is not an arithmetic type and the expression
+  `imag(E)` is valid, with overload resolution performed in a context
+  that includes the declaration
+  ``` cpp
+  template<class U> U imag(const U&) = delete;
+  ```
+  If the function selected by overload resolution does not return the
+  imaginary part of its input, the program is ill-formed, no diagnostic
+  required;
+- otherwise, `((void)E, T{})`.
+#### Argument concepts <a id="linalg.helpers.concepts">[[linalg.helpers.concepts]]</a>
+The exposition-only concepts defined in this section constrain the
+algorithms in [[linalg]].
+``` cpp
+template<class T>
+  constexpr bool is-mdspan = false;
+template<class ElementType, class Extents, class Layout, class Accessor>
+  constexpr bool is-mdspan<mdspan<ElementType, Extents, Layout, Accessor>> = true;
+template<class T>
+  concept in-vector =
+    is-mdspan<T> && T::rank() == 1;
+template<class T>
+  concept out-vector =
+    is-mdspan<T> && T::rank() == 1 &&
+    is_assignable_v<typename T::reference, typename T::element_type> && T::is_always_unique();
+template<class T>
+  concept inout-vector =
+    is-mdspan<T> && T::rank() == 1 &&
+    is_assignable_v<typename T::reference, typename T::element_type> && T::is_always_unique();
+template<class T>
+  concept in-matrix =
+    is-mdspan<T> && T::rank() == 2;
+template<class T>
+  concept out-matrix =
+    is-mdspan<T> && T::rank() == 2 &&
+    is_assignable_v<typename T::reference, typename T::element_type> && T::is_always_unique();
+template<class T>
+  concept inout-matrix =
+    is-mdspan<T> && T::rank() == 2 &&
+    is_assignable_v<typename T::reference, typename T::element_type> && T::is_always_unique();
+template<class T>
+  constexpr bool is-layout-blas-packed = false;    // exposition only
+template<class Triangle, class StorageOrder>
+  constexpr bool is-layout-blas-packed<layout_blas_packed<Triangle, StorageOrder>> = true;
+template<class T>
+  concept possibly-packed-inout-matrix =
+    is-mdspan<T> && T::rank() == 2 &&
+    is_assignable_v<typename T::reference, typename T::element_type> &&
+    (T::is_always_unique() || is-layout-blas-packed<typename T::layout_type>);
+template<class T>
+  concept in-object =
+    is-mdspan<T> && (T::rank() == 1 || T::rank() == 2);
+template<class T>
+  concept out-object =
+    is-mdspan<T> && (T::rank() == 1 || T::rank() == 2) &&
+    is_assignable_v<typename T::reference, typename T::element_type> && T::is_always_unique();
+template<class T>
+  concept inout-object =
+    is-mdspan<T> && (T::rank() == 1 || T::rank() == 2) &&
+    is_assignable_v<typename T::reference, typename T::element_type> && T::is_always_unique();
+```
+If a function in [[linalg]] accesses the elements of a parameter
+constrained by `in-vector`, `in-matrix`, or `in-object`, those accesses
+will not modify the elements.
+Unless explicitly permitted, any `inout-vector`, `inout-matrix`,
+`inout-object`, `out-vector`, `out-matrix`, `out-object`, or
+`possibly-packed-inout-matrix` parameter of a function in [[linalg]]
+shall not overlap any other `mdspan` parameter of the function.
+#### Mandates <a id="linalg.helpers.mandates">[[linalg.helpers.mandates]]</a>
+[*Note 1*: These exposition-only helper functions use the less
+constraining input concepts even for the output arguments, because the
+additional constraint for assignability of elements is not necessary,
+and they are sometimes used in a context where the third argument is an
+input type too. — *end note*]
+``` cpp
+template<class MDS1, class MDS2>
+  requires(is-mdspan<MDS1> && is-mdspan<MDS2>)
+  constexpr
+  bool compatible-static-extents(size_t r1, size_t r2) {         // exposition only
+    return MDS1::static_extent(r1) == dynamic_extent ||
+           MDS2::static_extent(r2) == dynamic_extent ||
+           MDS1::static_extent(r1) == MDS2::static_extent(r2);
+  }
+template<in-vector In1, in-vector In2, in-vector Out>
+  constexpr bool possibly-addable() {                            // exposition only
+    return compatible-static-extents<Out, In1>(0, 0) &&
+           compatible-static-extents<Out, In2>(0, 0) &&
+           compatible-static-extents<In1, In2>(0, 0);
+  }
+template<in-matrix In1, in-matrix In2, in-matrix Out>
+  constexpr bool possibly-addable() {                            // exposition only
+    return compatible-static-extents<Out, In1>(0, 0) &&
+           compatible-static-extents<Out, In1>(1, 1) &&
+           compatible-static-extents<Out, In2>(0, 0) &&
+           compatible-static-extents<Out, In2>(1, 1) &&
+           compatible-static-extents<In1, In2>(0, 0) &&
+           compatible-static-extents<In1, In2>(1, 1);
+  }
+template<in-matrix InMat, in-vector InVec, in-vector OutVec>
+  constexpr bool possibly-multipliable() {                       // exposition only
+    return compatible-static-extents<OutVec, InMat>(0, 0) &&
+           compatible-static-extents<InMat, InVec>(1, 0);
+  }
+template<in-vector InVec, in-matrix InMat, in-vector OutVec>
+  constexpr bool possibly-multipliable() {                       // exposition only
+    return compatible-static-extents<OutVec, InMat>(0, 1) &&
+           compatible-static-extents<InMat, InVec>(0, 0);
+  }
+template<in-matrix InMat1, in-matrix InMat2, in-matrix OutMat>
+  constexpr bool possibly-multipliable() {                       // exposition only
+    return compatible-static-extents<OutMat, InMat1>(0, 0) &&
+           compatible-static-extents<OutMat, InMat2>(1, 1) &&
+           compatible-static-extents<InMat1, InMat2>(1, 0);
+  }
+```
+#### Preconditions <a id="linalg.helpers.precond">[[linalg.helpers.precond]]</a>
+[*Note 1*: These exposition-only helper functions use the less
+constraining input concepts even for the output arguments, because the
+additional constraint for assignability of elements is not necessary,
+and they are sometimes used in a context where the third argument is an
+input type too. — *end note*]
+``` cpp
+constexpr bool addable(                                          // exposition only
+  const in-vector auto& in1, const in-vector auto& in2, const in-vector auto& out) {
+  return out.extent(0) == in1.extent(0) && out.extent(0) == in2.extent(0);
+}
+constexpr bool addable(                                          // exposition only
+  const in-matrix auto& in1,  const in-matrix auto& in2, const in-matrix auto& out) {
+  return out.extent(0) == in1.extent(0) && out.extent(1) == in1.extent(1) &&
+         out.extent(0) == in2.extent(0) && out.extent(1) == in2.extent(1);
+}
+constexpr bool multipliable(                                     // exposition only
+  const in-matrix auto& in_mat, const in-vector auto& in_vec, const in-vector auto& out_vec) {
+  return out_vec.extent(0) == in_mat.extent(0) && in_mat.extent(1) == in_vec.extent(0);
+}
+constexpr bool multipliable( // exposition only
+  const in-vector auto& in_vec, const in-matrix auto& in_mat, const in-vector auto& out_vec) {
+  return out_vec.extent(0) == in_mat.extent(1) && in_mat.extent(0) == in_vec.extent(0);
+}
+constexpr bool multipliable(                                     // exposition only
+  const in-matrix auto& in_mat1, const in-matrix auto& in_mat2, const in-matrix auto& out_mat) {
+  return out_mat.extent(0) == in_mat1.extent(0) && out_mat.extent(1) == in_mat2.extent(1) &&
+         in_mat1.extent(1) == in_mat2.extent(0);
+}
+```
+### Scaled in-place transformation <a id="linalg.scaled">[[linalg.scaled]]</a>
+#### Introduction <a id="linalg.scaled.intro">[[linalg.scaled.intro]]</a>
+The `scaled` function takes a value `alpha` and an `mdspan` `x`, and
+returns a new read-only `mdspan` that represents the elementwise product
+of `alpha` with each element of `x`.
+[*Example 1*:
+``` cpp
+using Vec = mdspan<double, dextents<size_t, 1>>;
+// z = alpha * x + y
+void z_equals_alpha_times_x_plus_y(double alpha, Vec x, Vec y, Vec z) {
+  add(scaled(alpha, x), y, z);
+}
+// z = alpha * x + beta * y
+void z_equals_alpha_times_x_plus_beta_times_y(double alpha, Vec x, double beta, Vec y, Vec z) {
+  add(scaled(alpha, x), scaled(beta, y), z);
+}
+```
+— *end example*]
+#### Class template `scaled_accessor` <a id="linalg.scaled.scaledaccessor">[[linalg.scaled.scaledaccessor]]</a>
+The class template `scaled_accessor` is an `mdspan` accessor policy
+which upon access produces scaled elements. It is part of the
+implementation of `scaled` [[linalg.scaled.scaled]].
+``` cpp
+namespace std::linalg {
+  template<class ScalingFactor, class NestedAccessor>
+  class scaled_accessor {
+  public:
+    using element_type =
+      const decltype(declval<ScalingFactor>() * declval<NestedAccessor::element_type>());
+    using reference = remove_const_t<element_type>;
+    using data_handle_type = NestedAccessor::data_handle_type;
+    using offset_policy = scaled_accessor<ScalingFactor, NestedAccessor::offset_policy>;
+    constexpr scaled_accessor() = default;
+    template<class OtherNestedAccessor>
+      explicit(!is_convertible_v<OtherNestedAccessor, NestedAccessor>)
+        constexpr scaled_accessor(const scaled_accessor<ScalingFactor,
+                                                        OtherNestedAccessor>& other);
+    constexpr scaled_accessor(const ScalingFactor& s, const NestedAccessor& a);
+    constexpr reference access(data_handle_type p, size_t i) const;
+    constexpr offset_policy::data_handle_type offset(data_handle_type p, size_t i) const;
+    constexpr const ScalingFactor& scaling_factor() const noexcept { return scaling-factor; }
+    constexpr const NestedAccessor& nested_accessor() const noexcept { return nested-accessor; }
+  private:
+    ScalingFactor scaling-factor{};                              // exposition only
+    NestedAccessor nested-accessor{};                            // exposition only
+  };
+}
+```
+*Mandates:*
+- `element_type` is valid and denotes a type,
+- `is_copy_constructible_v<reference>` is `true`,
+- `is_reference_v<element_type>` is `false`,
+- `ScalingFactor` models `semiregular`, and
+- `NestedAccessor` meets the accessor policy requirements
+  [[mdspan.accessor.reqmts]].
+``` cpp
+template<class OtherNestedAccessor>
+  explicit(!is_convertible_v<OtherNestedAccessor, NestedAccessor>)
+    constexpr scaled_accessor(const scaled_accessor<ScalingFactor, OtherNestedAccessor>& other);
+```
+*Constraints:*
+`is_constructible_v<NestedAccessor, const OtherNestedAccessor&>` is
+`true`.
+*Effects:*
+- Direct-non-list-initializes *scaling-factor* with
+  `other.scaling_factor()`, and
+- direct-non-list-initializes *nested-accessor* with
+  `other.nested_accessor()`.
+``` cpp
+constexpr scaled_accessor(const ScalingFactor& s, const NestedAccessor& a);
+```
+*Effects:*
+- Direct-non-list-initializes *scaling-factor* with `s`, and
+- direct-non-list-initializes *nested-accessor* with `a`.
+``` cpp
+constexpr reference access(data_handle_type p, size_t i) const;
+```
+*Returns:*
+``` cpp
+scaling_factor() * NestedAccessor::element_type(nested-accessor.access(p, i))
+```
+``` cpp
+constexpr offset_policy::data_handle_type offset(data_handle_type p, size_t i) const;
+```
+*Returns:* *`nested-accessor`*`.offset(p, i)`
+#### Function template `scaled` <a id="linalg.scaled.scaled">[[linalg.scaled.scaled]]</a>
+The `scaled` function template takes a scaling factor `alpha` and an
+`mdspan` `x`, and returns a new read-only `mdspan` with the same domain
+as `x`, that represents the elementwise product of `alpha` with each
+element of `x`.
+``` cpp
+template<class ScalingFactor,
+           class ElementType, class Extents, class Layout, class Accessor>
+    constexpr auto scaled(ScalingFactor alpha, mdspan<ElementType, Extents, Layout, Accessor> x);
+```
+Let `SA` be `scaled_accessor<ScalingFactor, Accessor>`.
+*Returns:*
+``` cpp
+mdspan<typename SA::element_type, Extents, Layout, SA>(x.data_handle(), x.mapping(),
+                                                       SA(alpha, x.accessor()))
+```
+[*Example 1*:
+``` cpp
+void test_scaled(mdspan<double, extents<int, 10>> x)
+{
+  auto x_scaled = scaled(5.0, x);
+  for (int i = 0; i < x.extent(0); ++i) {
+    assert(x_scaled[i] == 5.0 * x[i]);
+  }
+}
+```
+— *end example*]
+### Conjugated in-place transformation <a id="linalg.conj">[[linalg.conj]]</a>
+#### Introduction <a id="linalg.conj.intro">[[linalg.conj.intro]]</a>
+The `conjugated` function takes an `mdspan` `x`, and returns a new
+read-only `mdspan` `y` with the same domain as `x`, whose elements are
+the complex conjugates of the corresponding elements of `x`.
+#### Class template `conjugated_accessor` <a id="linalg.conj.conjugatedaccessor">[[linalg.conj.conjugatedaccessor]]</a>
+The class template `conjugated_accessor` is an `mdspan` accessor policy
+which upon access produces conjugate elements. It is part of the
+implementation of `conjugated` [[linalg.conj.conjugated]].
+``` cpp
+namespace std::linalg {
+  template<class NestedAccessor>
+  class conjugated_accessor {
+  public:
+    using element_type =
+      const decltype(conj-if-needed(declval<NestedAccessor::element_type>()));
+    using reference = remove_const_t<element_type>;
+    using data_handle_type = NestedAccessor::data_handle_type;
+    using offset_policy = conjugated_accessor<NestedAccessor::offset_policy>;
+    constexpr conjugated_accessor() = default;
+    constexpr conjugated_accessor(const NestedAccessor& acc);
+    template<class OtherNestedAccessor>
+      explicit(!is_convertible_v<OtherNestedAccessor, NestedAccessor>)
+      constexpr conjugated_accessor(const conjugated_accessor<OtherNestedAccessor>& other);
+    constexpr reference access(data_handle_type p, size_t i) const;
+    constexpr typename offset_policy::data_handle_type
+      offset(data_handle_type p, size_t i) const;
+    constexpr const NestedAccessor& nested_accessor() const noexcept { return nested-accessor_; }
+  private:
+    NestedAccessor nested-accessor_{};                           // exposition only
+  };
+}
+```
+*Mandates:*
+- `element_type` is valid and denotes a type,
+- `is_copy_constructible_v<reference>` is `true`,
+- `is_reference_v<element_type>` is `false`, and
+- `NestedAccessor` meets the accessor policy requirements
+  [[mdspan.accessor.reqmts]].
+``` cpp
+constexpr conjugated_accessor(const NestedAccessor& acc);
+```
+*Effects:* Direct-non-list-initializes *nested-accessor\_* with `acc`.
+``` cpp
+template<class OtherNestedAccessor>
+  explicit(!is_convertible_v<OtherNestedAccessor, NestedAccessor>)
+    constexpr conjugated_accessor(const conjugated_accessor<OtherNestedAccessor>& other);
+```
+*Constraints:*
+`is_constructible_v<NestedAccessor, const OtherNestedAccessor&>` is
+`true`.
+*Effects:* Direct-non-list-initializes *nested-accessor\_* with
+`other.nested_accessor()`.
+``` cpp
+constexpr reference access(data_handle_type p, size_t i) const;
+```
+*Returns:*
+*`conj-if-needed`*`(NestedAccessor::element_type(`*`nested-accessor_`*`.access(p, i)))`
+``` cpp
+constexpr typename offset_policy::data_handle_type offset(data_handle_type p, size_t i) const;
+```
+*Returns:* *`nested-accessor_`*`.offset(p, i)`
+#### Function template `conjugated` <a id="linalg.conj.conjugated">[[linalg.conj.conjugated]]</a>
+``` cpp
+template<class ElementType, class Extents, class Layout, class Accessor>
+    constexpr auto conjugated(mdspan<ElementType, Extents, Layout, Accessor> a);
+```
+Let `A` be
+- `remove_cvref_t<decltype(a.accessor().nested_accessor())>` if
+  `Accessor` is a specialization of `conjugated_accessor`;
+- otherwise, `Accessor` if `remove_cvref_t<ElementType>` is an
+  arithmetic type;
+- otherwise, `conjugated_accessor<Accessor>` if the expression `conj(E)`
+  is valid for any subexpression `E` whose type is
+  `remove_cvref_t<ElementType>` with overload resolution performed in a
+  context that includes the declaration
+  `template<class U> U conj(const U&) = delete;`;
+- otherwise, `Accessor`.
+*Returns:* Let `MD` be
+`mdspan<typename A::element_type, Extents, Layout, A>`.
+- `MD(a.data_handle(), a.mapping(), a.accessor().nested_accessor())` if
+  `Accessor` is a specialization of `conjugated_accessor`;
+- otherwise, `a`, if `is_same_v<A, Accessor>` is `true`;
+- otherwise,
+  `MD(a.data_handle(), a.mapping(), conjugated_accessor(a.accessor()))`.
+[*Example 1*:
+``` cpp
+void test_conjugated_complex(mdspan<complex<double>, extents<int, 10>> a) {
+  auto a_conj = conjugated(a);
+  for (int i = 0; i < a.extent(0); ++i) {
+    assert(a_conj[i] == conj(a[i]);
+  }
+  auto a_conj_conj = conjugated(a_conj);
+  for (int i = 0; i < a.extent(0); ++i) {
+    assert(a_conj_conj[i] == a[i]);
+  }
+}
+void test_conjugated_real(mdspan<double, extents<int, 10>> a) {
+  auto a_conj = conjugated(a);
+  for (int i = 0; i < a.extent(0); ++i) {
+    assert(a_conj[i] == a[i]);
+  }
+  auto a_conj_conj = conjugated(a_conj);
+  for (int i = 0; i < a.extent(0); ++i) {
+    assert(a_conj_conj[i] == a[i]);
+  }
+}
+```
+— *end example*]
+### Transpose in-place transformation <a id="linalg.transp">[[linalg.transp]]</a>
+#### Introduction <a id="linalg.transp.intro">[[linalg.transp.intro]]</a>
+`layout_transpose` is an `mdspan` layout mapping policy that swaps the
+two indices, extents, and strides of any unique `mdspan` layout mapping
+policy.
+The `transposed` function takes an `mdspan` representing a matrix, and
+returns a new `mdspan` representing the transpose of the input matrix.
+#### Exposition-only helpers for `layout_transpose` and `transposed` <a id="linalg.transp.helpers">[[linalg.transp.helpers]]</a>
+The exposition-only *`transpose-extents`* function takes an `extents`
+object representing the extents of a matrix, and returns a new `extents`
+object representing the extents of the transpose of the matrix.
+The exposition-only alias template `transpose-extents-t<InputExtents>`
+gives the type of `transpose-extents(e)` for a given `extents` object
+`e` of type `InputExtents`.
+``` cpp
+template<class IndexType, size_t InputExtent0, size_t InputExtent1>
+  constexpr extents<IndexType, InputExtent1, InputExtent0>
+    transpose-extents(const extents<IndexType, InputExtent0, InputExtent1>& in);   // exposition only
+```
+*Returns:*
+`extents<IndexType, InputExtent1, InputExtent0>(in.extent(1), in.extent(0))`
+``` cpp
+template<class InputExtents>
+  using transpose-extents-t =
+    decltype(transpose-extents(declval<InputExtents>()));        // exposition only
+```
+#### Class template `layout_transpose` <a id="linalg.transp.layout.transpose">[[linalg.transp.layout.transpose]]</a>
+`layout_transpose` is an `mdspan` layout mapping policy that swaps the
+two indices, extents, and strides of any `mdspan` layout mapping policy.
+``` cpp
+namespace std::linalg {
+  template<class Layout>
+  class layout_transpose {
+  public:
+    using nested_layout_type = Layout;
+    template<class Extents>
+    struct mapping {
+    private:
+      using nested-mapping-type =
+        Layout::template mapping<transpose-extents-t<Extents>>;  // exposition only
+    public:
+      using extents_type = Extents;
+      using index_type = extents_type::index_type;
+      using size_type = extents_type::size_type;
+      using rank_type = extents_type::rank_type;
+      using layout_type = layout_transpose;
+      constexpr explicit mapping(const nested-mapping-type&);
+      constexpr const extents_type& extents() const noexcept { return extents_; }
+      constexpr index_type required_span_size() const
+        { return nested-mapping_.required_span_size(); }
+      template<class Index0, class Index1>
+        constexpr index_type operator()(Index0 ind0, Index1 ind1) const
+        { return nested-mapping_(ind1, ind0); }
+      constexpr const nested-mapping-type& nested_mapping() const noexcept
+        { return nested-mapping_; }
+      static constexpr bool is_always_unique() noexcept
+        { return nested-mapping-type::is_always_unique(); }
+      static constexpr bool is_always_exhaustive() noexcept
+        { return nested-mapping-type::is_always_exhaustive(); }
+      static constexpr bool is_always_strided() noexcept
+        { return nested-mapping-type::is_always_strided(); }
+      constexpr bool is_unique() const { return nested-mapping_.is_unique(); }
+      constexpr bool is_exhaustive() const { return nested-mapping_.is_exhaustive(); }
+      constexpr bool is_strided() const { return nested-mapping_.is_strided(); }
+      constexpr index_type stride(size_t r) const;
+      template<class OtherExtents>
+        friend constexpr bool operator==(const mapping& x, const mapping<OtherExtents>& y);
+    private:
+      nested-mapping-type nested-mapping_;                       // exposition only
+      extents_type extents_;                                     // exposition only
+    };
+  };
+}
+```
+`Layout` shall meet the layout mapping policy requirements
+[[mdspan.layout.policy.reqmts]].
+*Mandates:*
+- `Extents` is a specialization of `std::extents`, and
+- `Extents::rank()` equals 2.
+``` cpp
+constexpr explicit mapping(const nested-mapping-type& map);
+```
+*Effects:*
+- Initializes *nested-mapping\_* with `map`, and
+- initializes *extents\_* with *`transpose-extents`*`(map.extents())`.
+``` cpp
+constexpr index_type stride(size_t r) const;
+```
+*Preconditions:*
+- `is_strided()` is `true`, and
+- `r < 2` is `true`.
+*Returns:* *`nested-mapping_`*`.stride(r == 0 ? 1 : 0)`
+``` cpp
+template<class OtherExtents>
+  friend constexpr bool operator==(const mapping& x, const mapping<OtherExtents>& y);
+```
+*Constraints:* The expression
+`x.`*`nested-mapping_`*` == y.`*`nested-mapping_`* is well-formed and
+its result is convertible to `bool`.
+*Returns:* `x.`*`nested-mapping_`*` == y.`*`nested-mapping_`*.
+#### Function template `transposed` <a id="linalg.transp.transposed">[[linalg.transp.transposed]]</a>
+The `transposed` function takes a rank-2 `mdspan` representing a matrix,
+and returns a new `mdspan` representing the input matrix’s transpose.
+The input matrix’s data are not modified, and the returned `mdspan`
+accesses the input matrix’s data in place.
+``` cpp
+template<class ElementType, class Extents, class Layout, class Accessor>
+    constexpr auto transposed(mdspan<ElementType, Extents, Layout, Accessor> a);
+```
+*Mandates:* `Extents::rank() == 2` is `true`.
+Let `ReturnExtents` be *`transpose-extents-t`*`<Extents>`. Let `R` be
+`mdspan<ElementType, ReturnExtents, ReturnLayout, Accessor>`, where
+`ReturnLayout` is:
+- `layout_right` if `Layout` is `layout_left`;
+- otherwise, `layout_left` if `Layout` is `layout_right`;
+- otherwise, `layout_right_padded<PaddingValue>` if `Layout` is
+  `layout_left_padded<PaddingValue>` for some `size_t` value
+  `PaddingValue`;
+- otherwise, `layout_left_padded<PaddingValue>` if `Layout` is
+  `layout_right_padded<PaddingValue>` for some `size_t` value
+  `PaddingValue`;
+- otherwise, `layout_stride` if `Layout` is `layout_stride`;
+- otherwise,
+  `layout_blas_packed<OppositeTriangle, OppositeStorageOrder>`, if
+  `Layout` is `layout_blas_packed<Triangle, StorageOrder>` for some
+  `Triangle` and `StorageOrder`, where
+  - `OppositeTriangle` is
+    ``` cpp
+    conditional_t<is_same_v<Triangle, upper_triangle_t>,
+                  lower_triangle_t, upper_triangle_t>
+    ```
+    and
+  - `OppositeStorageOrder` is
+    ``` cpp
+    conditional_t<is_same_v<StorageOrder, column_major_t>, row_major_t, column_major_t>
+    ```
+- otherwise, `NestedLayout` if `Layout` is
+  `layout_transpose<NestedLayout>` for some `NestedLayout`;
+- otherwise, `layout_transpose<Layout>`.
+*Returns:* With `ReturnMapping` being the type
+`typename ReturnLayout::template mapping<ReturnExtents>`:
+- if `Layout` is `layout_left`, `layout_right`, or a specialization of
+  `layout_blas_packed`,
+  ``` cpp
+  R(a.data_handle(), ReturnMapping(transpose-extents(a.mapping().extents())),
+    a.accessor())
+  ```
+- otherwise,
+  ``` cpp
+  R(a.data_handle(), ReturnMapping(transpose-extents(a.mapping().extents()),
+    a.mapping().stride(1)), a.accessor())
+  ```
+  if `Layout` is `layout_left_padded<PaddingValue>` for some `size_t`
+  value `PaddingValue`;
+- otherwise,
+  ``` cpp
+  R(a.data_handle(), ReturnMapping(transpose-extents(a.mapping().extents()),
+    a.mapping().stride(0)), a.accessor())
+  ```
+  if `Layout` is `layout_right_padded<PaddingValue>` for some `size_t`
+  value `PaddingValue`;
+- otherwise, if `Layout` is `layout_stride`,
+  ``` cpp
+  R(a.data_handle(), ReturnMapping(transpose-extents(a.mapping().extents()),
+    array{a.mapping().stride(1), a.mapping().stride(0)}), a.accessor())
+  ```
+- otherwise, if `Layout` is a specialization of `layout_transpose`,
+  ``` cpp
+  R(a.data_handle(), a.mapping().nested_mapping(), a.accessor())
+  ```
+- otherwise,
+  ``` cpp
+  R(a.data_handle(), ReturnMapping(a.mapping()), a.accessor())
+  ```
+[*Example 1*:
+``` cpp
+void test_transposed(mdspan<double, extents<size_t, 3, 4>> a) {
+  const auto num_rows = a.extent(0);
+  const auto num_cols = a.extent(1);
+  auto a_t = transposed(a);
+  assert(num_rows == a_t.extent(1));
+  assert(num_cols == a_t.extent(0));
+  assert(a.stride(0) == a_t.stride(1));
+  assert(a.stride(1) == a_t.stride(0));
+  for (size_t row = 0; row < num_rows; ++row) {
+    for (size_t col = 0; col < num_rows; ++col) {
+      assert(a[row, col] == a_t[col, row]);
+    }
+  }
+  auto a_t_t = transposed(a_t);
+  assert(num_rows == a_t_t.extent(0));
+  assert(num_cols == a_t_t.extent(1));
+  assert(a.stride(0) == a_t_t.stride(0));
+  assert(a.stride(1) == a_t_t.stride(1));
+  for (size_t row = 0; row < num_rows; ++row) {
+    for (size_t col = 0; col < num_rows; ++col) {
+      assert(a[row, col] == a_t_t[row, col]);
+    }
+  }
+}
+```
+— *end example*]
+### Conjugate transpose in-place transform <a id="linalg.conjtransposed">[[linalg.conjtransposed]]</a>
+The `conjugate_transposed` function returns a conjugate transpose view
+of an object. This combines the effects of `transposed` and
+`conjugated`.
+``` cpp
+template<class ElementType, class Extents, class Layout, class Accessor>
+    constexpr auto conjugate_transposed(mdspan<ElementType, Extents, Layout, Accessor> a);
+```
+*Effects:* Equivalent to: `return conjugated(transposed(a));`
+[*Example 1*:
+``` cpp
+void test_conjugate_transposed(mdspan<complex<double>, extents<size_t, 3, 4>> a) {
+  const auto num_rows = a.extent(0);
+  const auto num_cols = a.extent(1);
+  auto a_ct = conjugate_transposed(a);
+  assert(num_rows == a_ct.extent(1));
+  assert(num_cols == a_ct.extent(0));
+  assert(a.stride(0) == a_ct.stride(1));
+  assert(a.stride(1) == a_ct.stride(0));
+  for (size_t row = 0; row < num_rows; ++row) {
+    for (size_t col = 0; col < num_rows; ++col) {
+      assert(a[row, col] == conj(a_ct[col, row]));
+    }
+  }
+  auto a_ct_ct = conjugate_transposed(a_ct);
+  assert(num_rows == a_ct_ct.extent(0));
+  assert(num_cols == a_ct_ct.extent(1));
+  assert(a.stride(0) == a_ct_ct.stride(0));
+  assert(a.stride(1) == a_ct_ct.stride(1));
+  for (size_t row = 0; row < num_rows; ++row) {
+    for (size_t col = 0; col < num_rows; ++col) {
+      assert(a[row, col] == a_ct_ct[row, col]);
+      assert(conj(a_ct[col, row]) == a_ct_ct[row, col]);
+    }
+  }
+}
+```
+— *end example*]
+### Algorithm requirements based on template parameter name <a id="linalg.algs.reqs">[[linalg.algs.reqs]]</a>
+Throughout [[linalg.algs.blas1]], [[linalg.algs.blas2]], and
+[[linalg.algs.blas3]], where the template parameters are not
+constrained, the names of template parameters are used to express the
+following constraints.
+- `is_execution_policy<ExecutionPolicy>::value` is `true`
+  [[execpol.type]].
+- `Real` is any type such that `complex<Real>` is specified
+  [[complex.numbers.general]].
+- `Triangle` is either `upper_triangle_t` or `lower_triangle_t`.
+- `DiagonalStorage` is either `implicit_unit_diagonal_t` or
+  `explicit_diagonal_t`.
+[*Note 1*: Function templates that have a template parameter named
+`ExecutionPolicy` are parallel algorithms
+[[algorithms.parallel.defns]]. — *end note*]
+### BLAS 1 algorithms <a id="linalg.algs.blas1">[[linalg.algs.blas1]]</a>
+#### Complexity <a id="linalg.algs.blas1.complexity">[[linalg.algs.blas1.complexity]]</a>
+*Complexity:* All algorithms in [[linalg.algs.blas1]] with `mdspan`
+parameters perform a count of `mdspan` array accesses and arithmetic
+operations that is linear in the maximum product of extents of any
+`mdspan` parameter.
+#### Givens rotations <a id="linalg.algs.blas1.givens">[[linalg.algs.blas1.givens]]</a>
+##### Compute Givens rotation <a id="linalg.algs.blas1.givens.lartg">[[linalg.algs.blas1.givens.lartg]]</a>
+``` cpp
+template<class Real>
+  setup_givens_rotation_result<Real> setup_givens_rotation(Real a, Real b) noexcept;
+template<class Real>
+  setup_givens_rotation_result<complex<Real>>
+    setup_givens_rotation(complex<Real> a, complex<Real> b) noexcept;
+```
+These functions compute the Givens plane rotation represented by the two
+values c and s such that the 2 x 2 system of equations
+$$\left[ \begin{matrix}
+c             & s \\
+-\overline{s} & c \\
+\end{matrix} \right]
+\cdot
+\left[ \begin{matrix}
+a \\
+b \\
+\end{matrix} \right]
+=
+\left[ \begin{matrix}
+r \\
+0 \\
+\end{matrix} \right]$$
+holds, where c is always a real scalar, and c² + |s|^2 = 1. That is, c
+and s represent a 2 x 2 matrix, that when multiplied by the right by the
+input vector whose components are a and b, produces a result vector
+whose first component r is the Euclidean norm of the input vector, and
+whose second component is zero.
+[*Note 1*: These functions correspond to the LAPACK function
+`xLARTG`. — *end note*]
+*Returns:* `c, s, r`, where `c` and `s` form the Givens plane rotation
+corresponding to the input `a` and `b`, and `r` is the Euclidean norm of
+the two-component vector formed by `a` and `b`.
+##### Apply a computed Givens rotation to vectors <a id="linalg.algs.blas1.givens.rot">[[linalg.algs.blas1.givens.rot]]</a>
+``` cpp
+template<inout-vector InOutVec1, inout-vector InOutVec2, class Real>
+  void apply_givens_rotation(InOutVec1 x, InOutVec2 y, Real c, Real s);
+template<class ExecutionPolicy, inout-vector InOutVec1, inout-vector InOutVec2, class Real>
+  void apply_givens_rotation(ExecutionPolicy&& exec,
+                             InOutVec1 x, InOutVec2 y, Real c, Real s);
+template<inout-vector InOutVec1, inout-vector InOutVec2, class Real>
+  void apply_givens_rotation(InOutVec1 x, InOutVec2 y, Real c, complex<Real> s);
+template<class ExecutionPolicy, inout-vector InOutVec1, inout-vector InOutVec2, class Real>
+  void apply_givens_rotation(ExecutionPolicy&& exec,
+                             InOutVec1 x, InOutVec2 y, Real c, complex<Real> s);
+```
+[*Note 2*: These functions correspond to the BLAS function
+`xROT`. — *end note*]
+*Mandates:* *`compatible-static-extents`*`<InOutVec1, InOutVec2>(0, 0)`
+is `true`.
+*Preconditions:* `x.extent(0)` equals `y.extent(0)`.
+*Effects:* Applies the plane rotation specified by `c` and `s` to the
+input vectors `x` and `y`, as if the rotation were a 2 x 2 matrix and
+the input vectors were successive rows of a matrix with two rows.
+#### Swap matrix or vector elements <a id="linalg.algs.blas1.swap">[[linalg.algs.blas1.swap]]</a>
+``` cpp
+template<inout-object InOutObj1, inout-object InOutObj2>
+  void swap_elements(InOutObj1 x, InOutObj2 y);
+template<class ExecutionPolicy, inout-object InOutObj1, inout-object InOutObj2>
+  void swap_elements(ExecutionPolicy&& exec, InOutObj1 x, InOutObj2 y);
+```
+[*Note 1*: These functions correspond to the BLAS function
+`xSWAP`. — *end note*]
+*Constraints:* `x.rank()` equals `y.rank()`.
+*Mandates:* For all `r` in the range [0, `x.rank()`),
+``` cpp
+compatible-static-extents<InOutObj1, InOutObj2>(r, r)
+```
+is `true`.
+*Preconditions:* `x.extents()` equals `y.extents()`.
+*Effects:* Swaps all corresponding elements of `x` and `y`.
+#### Multiply the elements of an object in place by a scalar <a id="linalg.algs.blas1.scal">[[linalg.algs.blas1.scal]]</a>
+``` cpp
+template<class Scalar, inout-object InOutObj>
+  void scale(Scalar alpha, InOutObj x);
+template<class ExecutionPolicy, class Scalar, inout-object InOutObj>
+  void scale(ExecutionPolicy&& exec, Scalar alpha, InOutObj x);
+```
+[*Note 1*: These functions correspond to the BLAS function
+`xSCAL`. — *end note*]
+*Effects:* Overwrites x with the result of computing the elementwise
+multiplication α x, where the scalar α is `alpha`.
+#### Copy elements of one matrix or vector into another <a id="linalg.algs.blas1.copy">[[linalg.algs.blas1.copy]]</a>
+``` cpp
+template<in-object InObj, out-object OutObj>
+  void copy(InObj x, OutObj y);
+template<class ExecutionPolicy, in-object InObj, out-object OutObj>
+  void copy(ExecutionPolicy&& exec, InObj x, OutObj y);
+```
+[*Note 1*: These functions correspond to the BLAS function
+`xCOPY`. — *end note*]
+*Constraints:* `x.rank()` equals `y.rank()`.
+*Mandates:* For all `r` in the range [ 0, `x.rank()`),
+``` cpp
+compatible-static-extents<InObj, OutObj>(r, r)
+```
+is `true`.
+*Preconditions:* `x.extents()` equals `y.extents()`.
+*Effects:* Assigns each element of x to the corresponding element of y.
+#### Add vectors or matrices elementwise <a id="linalg.algs.blas1.add">[[linalg.algs.blas1.add]]</a>
+``` cpp
+template<in-object InObj1, in-object InObj2, out-object OutObj>
+  void add(InObj1 x, InObj2 y, OutObj z);
+template<class ExecutionPolicy, in-object InObj1, in-object InObj2, out-object OutObj>
+  void add(ExecutionPolicy&& exec,
+           InObj1 x, InObj2 y, OutObj z);
+```
+[*Note 1*: These functions correspond to the BLAS function
+`xAXPY`. — *end note*]
+*Constraints:* `x.rank()`, `y.rank()`, and `z.rank()` are all equal.
+*Mandates:* *`possibly-addable`*`<InObj1, InObj2, OutObj>()` is `true`.
+*Preconditions:* *`addable`*`(x,y,z)` is `true`.
+*Effects:* Computes z = x + y.
+*Remarks:* `z` may alias `x` or `y`.
+#### Dot product of two vectors <a id="linalg.algs.blas1.dot">[[linalg.algs.blas1.dot]]</a>
+[*Note 1*: The functions in this section correspond to the BLAS
+functions `xDOT`, `xDOTU`, and `xDOTC`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas1.dot]].
+*Mandates:* `compatible-static-extents<InVec1, InVec2>(0, 0)` is `true`.
+*Preconditions:* `v1.extent(0)` equals `v2.extent(0)`.
+``` cpp
+template<in-vector InVec1, in-vector InVec2, class Scalar>
+  Scalar dot(InVec1 v1, InVec2 v2, Scalar init);
+template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2, class Scalar>
+  Scalar dot(ExecutionPolicy&& exec,
+             InVec1 v1, InVec2 v2, Scalar init);
+```
+These functions compute a non-conjugated dot product with an explicitly
+specified result type.
+*Returns:* Let `N` be `v1.extent(0)`.
+- `init` if `N` is zero;
+- otherwise, *GENERALIZED_SUM*(plus\<\>(), init, v1\[0\]\*v2\[0\], …,
+  v1\[N-1\]\*v2\[N-1\]).
+*Remarks:* If `InVec1::value_type`, `InVec2::value_type`, and `Scalar`
+are all floating-point types or specializations of `complex`, and if
+`Scalar` has higher precision than `InVec1::value_type` or
+`InVec2::value_type`, then intermediate terms in the sum use `Scalar`’s
+precision or greater.
+``` cpp
+template<in-vector InVec1, in-vector InVec2>
+    auto dot(InVec1 v1, InVec2 v2);
+  template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2>
+    auto dot(ExecutionPolicy&& exec,
+             InVec1 v1, InVec2 v2);
+```
+These functions compute a non-conjugated dot product with a default
+result type.
+*Effects:* Let `T` be
+`decltype(declval<typename InVec1::value_type>() * declval<typename InVec2::value_type>())`.
+Then,
+- the two-parameter overload is equivalent to:
+  ``` cpp
+  return dot(v1, v2, T{});
+  ```
+  and
+- the three-parameter overload is equivalent to:
+  ``` cpp
+  return dot(std::forward<ExecutionPolicy>(exec), v1, v2, T{});
+  ```
+``` cpp
+template<in-vector InVec1, in-vector InVec2, class Scalar>
+  Scalar dotc(InVec1 v1, InVec2 v2, Scalar init);
+template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2, class Scalar>
+  Scalar dotc(ExecutionPolicy&& exec,
+              InVec1 v1, InVec2 v2, Scalar init);
+```
+These functions compute a conjugated dot product with an explicitly
+specified result type.
+*Effects:*
+- The three-parameter overload is equivalent to:
+  ``` cpp
+  return dot(conjugated(v1), v2, init);
+  ```
+  and
+- the four-parameter overload is equivalent to:
+  ``` cpp
+  return dot(std::forward<ExecutionPolicy>(exec), conjugated(v1), v2, init);
+  ```
+``` cpp
+template<in-vector InVec1, in-vector InVec2>
+  auto dotc(InVec1 v1, InVec2 v2);
+template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2>
+  auto dotc(ExecutionPolicy&& exec,
+            InVec1 v1, InVec2 v2);
+```
+These functions compute a conjugated dot product with a default result
+type.
+*Effects:* Let `T` be
+`decltype(`*`conj-if-needed`*`(declval<typename InVec1::value_type>()) * declval<typename InVec2::value_type>())`.
+Then,
+- the two-parameter overload is equivalent to:
+  ``` cpp
+  return dotc(v1, v2, T{});
+  ```
+  and
+- the three-parameter overload is equivalent to
+  ``` cpp
+  return dotc(std::forward<ExecutionPolicy>(exec), v1, v2, T{});
+  ```
+#### Scaled sum of squares of a vector’s elements <a id="linalg.algs.blas1.ssq">[[linalg.algs.blas1.ssq]]</a>
+``` cpp
+template<in-vector InVec, class Scalar>
+  sum_of_squares_result<Scalar> vector_sum_of_squares(InVec v, sum_of_squares_result<Scalar> init);
+template<class ExecutionPolicy, in-vector InVec, class Scalar>
+  sum_of_squares_result<Scalar> vector_sum_of_squares(ExecutionPolicy&& exec,
+                                                      InVec v, sum_of_squares_result<Scalar> init);
+```
+[*Note 1*: These functions correspond to the LAPACK function
+`xLASSQ`. — *end note*]
+*Mandates:*
+`decltype(`*`abs-if-needed`*`(declval<typename InVec::value_type>()))`
+is convertible to `Scalar`.
+*Effects:* Returns a value `result` such that
+- `result.scaling_factor` is the maximum of `init.scaling_factor` and
+  *`abs-if-needed`*`(x[i])` for all `i` in the domain of `v`; and
+- let `s2init` be
+  ``` cpp
+  init.scaling_factor * init.scaling_factor * init.scaled_sum_of_squares
+  ```
+  then
+  `result.scaling_factor * result.scaling_factor * result.scaled_sum_of_squares`
+  equals the sum of `s2init` and the squares of
+  *`abs-if-needed`*`(x[i])` for all `i` in the domain of `v`.
+*Remarks:* If `InVec::value_type`, and `Scalar` are all floating-point
+types or specializations of `complex`, and if `Scalar` has higher
+precision than `InVec::value_type`, then intermediate terms in the sum
+use `Scalar`’s precision or greater.
+#### Euclidean norm of a vector <a id="linalg.algs.blas1.nrm2">[[linalg.algs.blas1.nrm2]]</a>
+``` cpp
+template<in-vector InVec, class Scalar>
+  Scalar vector_two_norm(InVec v, Scalar init);
+template<class ExecutionPolicy, in-vector InVec, class Scalar>
+  Scalar vector_two_norm(ExecutionPolicy&& exec, InVec v, Scalar init);
+```
+[*Note 1*: These functions correspond to the BLAS function
+`xNRM2`. — *end note*]
+*Mandates:* Let `a` be
+*`abs-if-needed`*`(declval<typename InVec::value_type>())`. Then,
+`decltype(init + a * a` is convertible to `Scalar`.
+*Returns:* The square root of the sum of the square of `init` and the
+squares of the absolute values of the elements of `v`.
+[*Note 2*: For `init` equal to zero, this is the Euclidean norm (also
+called 2-norm) of the vector `v`. — *end note*]
+*Remarks:* If `InVec::value_type`, and `Scalar` are all floating-point
+types or specializations of `complex`, and if `Scalar` has higher
+precision than `InVec::value_type`, then intermediate terms in the sum
+use `Scalar`’s precision or greater.
+[*Note 3*: An implementation of this function for floating-point types
+`T` can use the `scaled_sum_of_squares` result from
+`vector_sum_of_squares(x, {.scaling_factor=1.0, .scaled_sum_of_squares=init})`. — *end note*]
+``` cpp
+template<in-vector InVec>
+  auto vector_two_norm(InVec v);
+template<class ExecutionPolicy, in-vector InVec>
+  auto vector_two_norm(ExecutionPolicy&& exec, InVec v);
+```
+*Effects:* Let `a` be
+*`abs-if-needed`*`(declval<typename InVec::value_type>())`. Let `T` be
+`decltype(a * a)`. Then,
+- the one-parameter overload is equivalent to:
+  ``` cpp
+  return vector_two_norm(v, T{});
+  ```
+  and
+- the two-parameter overload is equivalent to:
+  ``` cpp
+  return vector_two_norm(std::forward<ExecutionPolicy>(exec), v, T{});
+  ```
+#### Sum of absolute values of vector elements <a id="linalg.algs.blas1.asum">[[linalg.algs.blas1.asum]]</a>
+``` cpp
+template<in-vector InVec, class Scalar>
+  Scalar vector_abs_sum(InVec v, Scalar init);
+template<class ExecutionPolicy, in-vector InVec, class Scalar>
+  Scalar vector_abs_sum(ExecutionPolicy&& exec, InVec v, Scalar init);
+```
+[*Note 1*: These functions correspond to the BLAS functions `SASUM`,
+`DASUM`, `SCASUM`, and `DZASUM`. — *end note*]
+*Mandates:*
+``` cpp
+decltype(init + abs-if-needed(real-if-needed(declval<typename InVec::value_type>())) +
+                abs-if-needed(imag-if-needed(declval<typename InVec::value_type>())))
+```
+is convertible to `Scalar`.
+*Returns:* Let `N` be `v.extent(0)`.
+- `init` if `N` is zero;
+- otherwise, if `InVec::value_type` is an arithmetic type,
+  ``` cpp
+  GENERALIZED_SUM(plus<>(), init, abs-if-needed(v[0]), …, abs-if-needed(v[N-1]))
+  ```
+- otherwise,
+  ``` cpp
+  GENERALIZED_SUM(plus<>(), init,
+         abs-if-needed(real-if-needed(v[0])) + abs-if-needed(imag-if-needed(v[0])),
+         …,
+         abs-if-needed(real-if-needed(v[N-1])) + abs-if-needed(imag-if-needed(v[N-1])))
+  ```
+*Remarks:* If `InVec::value_type` and `Scalar` are all floating-point
+types or specializations of `complex`, and if `Scalar` has higher
+precision than `InVec::value_type`, then intermediate terms in the sum
+use `Scalar`’s precision or greater.
+``` cpp
+template<in-vector InVec>
+  auto vector_abs_sum(InVec v);
+template<class ExecutionPolicy, in-vector InVec>
+  auto vector_abs_sum(ExecutionPolicy&& exec, InVec v);
+```
+*Effects:* Let `T` be `typename InVec::value_type`. Then,
+- the one-parameter overload is equivalent to:
+  ``` cpp
+  return vector_abs_sum(v, T{});
+  ```
+  and
+- the two-parameter overload is equivalent to:
+  ``` cpp
+  return vector_abs_sum(std::forward<ExecutionPolicy>(exec), v, T{});
+  ```
+#### Index of maximum absolute value of vector elements <a id="linalg.algs.blas1.iamax">[[linalg.algs.blas1.iamax]]</a>
+``` cpp
+template<in-vector InVec>
+  typename InVec::extents_type vector_idx_abs_max(InVec v);
+template<class ExecutionPolicy, in-vector InVec>
+  typename InVec::extents_type vector_idx_abs_max(ExecutionPolicy&& exec, InVec v);
+```
+[*Note 1*: These functions correspond to the BLAS function
+`IxAMAX`. — *end note*]
+Let `T` be
+``` cpp
+decltype(abs-if-needed(real-if-needed(declval<typename InVec::value_type>())) +
+         abs-if-needed(imag-if-needed(declval<typename InVec::value_type>())))
+```
+*Mandates:* `declval<T>() < declval<T>()` is a valid expression.
+*Returns:*
+- `numeric_limits<typename InVec::size_type>::max()` if `v` has zero
+  elements;
+- otherwise, the index of the first element of `v` having largest
+  absolute value, if `InVec::value_type` is an arithmetic type;
+- otherwise, the index of the first element `vₑ` of `v` for which
+  ``` cpp
+  abs-if-needed(real-if-needed($v_e$)) + abs-if-needed(imag-if-needed($v_e$))
+  ```
+  has the largest value.
+#### Frobenius norm of a matrix <a id="linalg.algs.blas1.matfrobnorm">[[linalg.algs.blas1.matfrobnorm]]</a>
+[*Note 1*: These functions exist in the BLAS standard but are not part
+of the reference implementation. — *end note*]
+``` cpp
+template<in-matrix InMat, class Scalar>
+  Scalar matrix_frob_norm(InMat A, Scalar init);
+template<class ExecutionPolicy, in-matrix InMat, class Scalar>
+  Scalar matrix_frob_norm(ExecutionPolicy&& exec, InMat A, Scalar init);
+```
+*Mandates:* Let `a` be
+*`abs-if-needed`*`(declval<typename InMat::value_type>())`. Then,
+`decltype(init + a * a)` is convertible to `Scalar`.
+*Returns:* The square root of the sum of squares of `init` and the
+absolute values of the elements of `A`.
+[*Note 1*: For `init` equal to zero, this is the Frobenius norm of the
+matrix `A`. — *end note*]
+*Remarks:* If `InMat::value_type` and `Scalar` are all floating-point
+types or specializations of `complex`, and if `Scalar` has higher
+precision than `InMat::value_type`, then intermediate terms in the sum
+use `Scalar`’s precision or greater.
+``` cpp
+template<in-matrix InMat>
+  auto matrix_frob_norm(InMat A);
+template<class ExecutionPolicy, in-matrix InMat>
+  auto matrix_frob_norm(ExecutionPolicy&& exec, InMat A);
+```
+*Effects:* Let `a` be
+*`abs-if-needed`*`(declval<typename InMat::value_type>())`. Let `T` be
+`decltype(a * a)`. Then,
+- the one-parameter overload is equivalent to:
+  ``` cpp
+  return matrix_frob_norm(A, T{});
+  ```
+  and
+- the two-parameter overload is equivalent to:
+  ``` cpp
+  return matrix_frob_norm(std::forward<ExecutionPolicy>(exec), A, T{});
+  ```
+#### One norm of a matrix <a id="linalg.algs.blas1.matonenorm">[[linalg.algs.blas1.matonenorm]]</a>
+[*Note 1*: These functions exist in the BLAS standard but are not part
+of the reference implementation. — *end note*]
+``` cpp
+template<in-matrix InMat, class Scalar>
+  Scalar matrix_one_norm(InMat A, Scalar init);
+template<class ExecutionPolicy, in-matrix InMat, class Scalar>
+  Scalar matrix_one_norm(ExecutionPolicy&& exec, InMat A, Scalar init);
+```
+*Mandates:*
+`decltype(`*`abs-if-needed`*`(declval<typename InMat::value_type>()))`
+is convertible to `Scalar`.
+*Returns:*
+- `init` if `A.extent(1)` is zero;
+- otherwise, the sum of `init` and the one norm of the matrix A.
+[*Note 1*: The one norm of the matrix `A` is the maximum over all
+columns of `A`, of the sum of the absolute values of the elements of the
+column. — *end note*]
+*Remarks:* If `InMat::value_type` and `Scalar` are all floating-point
+types or specializations of `complex`, and if `Scalar` has higher
+precision than `InMat::value_type`, then intermediate terms in the sum
+use `Scalar`’s precision or greater.
+``` cpp
+template<in-matrix InMat>
+  auto matrix_one_norm(InMat A);
+template<class ExecutionPolicy, in-matrix InMat>
+  auto matrix_one_norm(ExecutionPolicy&& exec, InMat A);
+```
+*Effects:* Let `T` be
+`decltype(`*`abs-if-needed`*`(declval<typename InMat::value_type>())`.
+Then,
+- the one-parameter overload is equivalent to:
+  ``` cpp
+  return matrix_one_norm(A, T{});
+  ```
+  and
+- the two-parameter overload is equivalent to:
+  ``` cpp
+  return matrix_one_norm(std::forward<ExecutionPolicy>(exec), A, T{});
+  ```
+#### Infinity norm of a matrix <a id="linalg.algs.blas1.matinfnorm">[[linalg.algs.blas1.matinfnorm]]</a>
+[*Note 1*: These functions exist in the BLAS standard but are not part
+of the reference implementation. — *end note*]
+``` cpp
+template<in-matrix InMat, class Scalar>
+  Scalar matrix_inf_norm(InMat A, Scalar init);
+template<class ExecutionPolicy, in-matrix InMat, class Scalar>
+  Scalar matrix_inf_norm(ExecutionPolicy&& exec, InMat A, Scalar init);
+```
+*Mandates:*
+`decltype(`*`abs-if-needed`*`(declval<typename InMat::value_type>()))`
+is convertible to `Scalar`.
+*Returns:*
+- `init` if `A.extent(0)` is zero;
+- otherwise, the sum of `init` and the infinity norm of the matrix `A`.
+[*Note 1*: The infinity norm of the matrix `A` is the maximum over all
+rows of `A`, of the sum of the absolute values of the elements of the
+row. — *end note*]
+*Remarks:* If `InMat::value_type` and `Scalar` are all floating-point
+types or specializations of `complex`, and if `Scalar` has higher
+precision than `InMat::value_type`, then intermediate terms in the sum
+use `Scalar`’s precision or greater.
+``` cpp
+template<in-matrix InMat>
+  auto matrix_inf_norm(InMat A);
+template<class ExecutionPolicy, in-matrix InMat>
+  auto matrix_inf_norm(ExecutionPolicy&& exec, InMat A);
+```
+*Effects:* Let `T` be
+`decltype(`*`abs-if-needed`*`(declval<typename InMat::value_type>())`.
+Then,
+- the one-parameter overload is equivalent to:
+  ``` cpp
+  return matrix_inf_norm(A, T{});
+  ```
+  and
+- the two-parameter overload is equivalent to:
+  ``` cpp
+  return matrix_inf_norm(std::forward<ExecutionPolicy>(exec), A, T{});
+  ```
+### BLAS 2 algorithms <a id="linalg.algs.blas2">[[linalg.algs.blas2]]</a>
+#### General matrix-vector product <a id="linalg.algs.blas2.gemv">[[linalg.algs.blas2.gemv]]</a>
+[*Note 1*: These functions correspond to the BLAS function
+`xGEMV`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas2.gemv]].
+*Mandates:*
+- `possibly-multipliable<decltype(A), decltype(x), decltype(y)>()` is
+  `true`, and
+- `possibly-addable<decltype(x), decltype(y), decltype(z)>()` is `true`
+  for those overloads that take a `z` parameter.
+*Preconditions:*
+- `multipliable(A,x,y)` is `true`, and
+- `addable(x,y,z)` is `true` for those overloads that take a `z`
+  parameter.
+*Complexity:* 𝑂(`x.extent(0)` × `A.extent(1)`).
+``` cpp
+template<in-matrix InMat, in-vector InVec, out-vector OutVec>
+  void matrix_vector_product(InMat A, InVec x, OutVec y);
+template<class ExecutionPolicy, in-matrix InMat, in-vector InVec, out-vector OutVec>
+  void matrix_vector_product(ExecutionPolicy&& exec, InMat A, InVec x, OutVec y);
+```
+These functions perform an overwriting matrix-vector product.
+*Effects:* Computes y = A x.
+[*Example 1*:
+``` cpp
+constexpr size_t num_rows = 5;
+constexpr size_t num_cols = 6;
+// y = 3.0 * A * x
+void scaled_matvec_1(mdspan<double, extents<size_t, num_rows, num_cols>> A,
+  mdspan<double, extents<size_t, num_cols>> x, mdspan<double, extents<size_t, num_rows>> y) {
+  matrix_vector_product(scaled(3.0, A), x, y);
+}
+// z = 7.0 times the transpose of A, times y
+void scaled_transposed_matvec(mdspan<double, extents<size_t, num_rows, num_cols>> A,
+  mdspan<double, extents<size_t, num_rows>> y, mdspan<double, extents<size_t, num_cols>> z) {
+  matrix_vector_product(scaled(7.0, transposed(A)), y, z);
+}
+```
+— *end example*]
+``` cpp
+template<in-matrix InMat, in-vector InVec1, in-vector InVec2, out-vector OutVec>
+    void matrix_vector_product(InMat A, InVec1 x, InVec2 y, OutVec z);
+  template<class ExecutionPolicy,
+           in-matrix InMat, in-vector InVec1, in-vector InVec2, out-vector OutVec>
+    void matrix_vector_product(ExecutionPolicy&& exec,
+                               InMat A, InVec1 x, InVec2 y, OutVec z);
+```
+These functions perform an updating matrix-vector product.
+*Effects:* Computes z = y + A x.
+*Remarks:* `z` may alias `y`.
+[*Example 2*:
+``` cpp
+// y = 3.0 * A * x + 2.0 * y
+void scaled_matvec_2(mdspan<double, extents<size_t, num_rows, num_cols>> A,
+  mdspan<double, extents<size_t, num_cols>> x, mdspan<double, extents<size_t, num_rows>> y) {
+  matrix_vector_product(scaled(3.0, A), x, scaled(2.0, y), y);
+}
+```
+— *end example*]
+#### Symmetric matrix-vector product <a id="linalg.algs.blas2.symv">[[linalg.algs.blas2.symv]]</a>
+[*Note 1*: These functions correspond to the BLAS functions `xSYMV` and
+`xSPMV`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas2.symv]].
+*Mandates:*
+- If `InMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- `compatible-static-extents<decltype(A), decltype(A)>(0, 1)` is `true`;
+- `possibly-multipliable<decltype(A), decltype(x), decltype(y)>()` is
+  `true`; and
+- `possibly-addable<decltype(x), decltype(y), decltype(z)>()` is `true`
+  for those overloads that take a `z` parameter.
+*Preconditions:*
+- `A.extent(0)` equals `A.extent(1)`,
+- `multipliable(A,x,y)` is `true`, and
+- `addable(x,y,z)` is `true` for those overloads that take a `z`
+  parameter.
+*Complexity:* 𝑂(`x.extent(0)` × `A.extent(1)`).
+``` cpp
+template<in-matrix InMat, class Triangle, in-vector InVec, out-vector OutVec>
+  void symmetric_matrix_vector_product(InMat A, Triangle t, InVec x, OutVec y);
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, in-vector InVec, out-vector OutVec>
+  void symmetric_matrix_vector_product(ExecutionPolicy&& exec,
+                                       InMat A, Triangle t, InVec x, OutVec y);
+```
+These functions perform an overwriting symmetric matrix-vector product,
+taking into account the `Triangle` parameter that applies to the
+symmetric matrix `A` [[linalg.general]].
+*Effects:* Computes y = A x.
+``` cpp
+template<in-matrix InMat, class Triangle, in-vector InVec1, in-vector InVec2, out-vector OutVec>
+  void symmetric_matrix_vector_product(InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z);
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, in-vector InVec1, in-vector InVec2, out-vector OutVec>
+  void symmetric_matrix_vector_product(ExecutionPolicy&& exec,
+                                       InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z);
+```
+These functions perform an updating symmetric matrix-vector product,
+taking into account the `Triangle` parameter that applies to the
+symmetric matrix `A` [[linalg.general]].
+*Effects:* Computes z = y + A x.
+*Remarks:* `z` may alias `y`.
+#### Hermitian matrix-vector product <a id="linalg.algs.blas2.hemv">[[linalg.algs.blas2.hemv]]</a>
+[*Note 1*: These functions correspond to the BLAS functions `xHEMV` and
+`xHPMV`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas2.hemv]].
+*Mandates:*
+- If `InMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- `compatible-static-extents<decltype(A), decltype(A)>(0, 1)` is `true`;
+- `possibly-multipliable<decltype(A), decltype(x), decltype(y)>()` is
+  `true`; and
+- `possibly-addable<decltype(x), decltype(y), decltype(z)>()` is `true`
+  for those overloads that take a `z` parameter.
+*Preconditions:*
+- `A.extent(0)` equals `A.extent(1)`,
+- `multipliable(A, x, y)` is `true`, and
+- `addable(x, y, z)` is `true` for those overloads that take a `z`
+  parameter.
+*Complexity:* 𝑂(`x.extent(0)` × `A.extent(1)`).
+``` cpp
+template<in-matrix InMat, class Triangle, in-vector InVec, out-vector OutVec>
+  void hermitian_matrix_vector_product(InMat A, Triangle t, InVec x, OutVec y);
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, in-vector InVec, out-vector OutVec>
+  void hermitian_matrix_vector_product(ExecutionPolicy&& exec,
+                                       InMat A, Triangle t, InVec x, OutVec y);
+```
+These functions perform an overwriting Hermitian matrix-vector product,
+taking into account the `Triangle` parameter that applies to the
+Hermitian matrix `A` [[linalg.general]].
+*Effects:* Computes y = A x.
+``` cpp
+template<in-matrix InMat, class Triangle, in-vector InVec1, in-vector InVec2, out-vector OutVec>
+  void hermitian_matrix_vector_product(InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z);
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, in-vector InVec1, in-vector InVec2, out-vector OutVec>
+  void hermitian_matrix_vector_product(ExecutionPolicy&& exec,
+                                       InMat A, Triangle t, InVec1 x, InVec2 y, OutVec z);
+```
+These functions perform an updating Hermitian matrix-vector product,
+taking into account the `Triangle` parameter that applies to the
+Hermitian matrix `A` [[linalg.general]].
+*Effects:* Computes z = y + A x.
+*Remarks:* `z` may alias `y`.
+#### Triangular matrix-vector product <a id="linalg.algs.blas2.trmv">[[linalg.algs.blas2.trmv]]</a>
+[*Note 1*: These functions correspond to the BLAS functions `xTRMV` and
+`xTPMV`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas2.trmv]].
+*Mandates:*
+- If `InMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- `compatible-static-extents<decltype(A), decltype(A)>(0, 1)` is `true`;
+- `compatible-static-extents<decltype(A), decltype(y)>(0, 0)` is `true`;
+- `compatible-static-extents<decltype(A), decltype(x)>(0, 0)` is `true`
+  for those overloads that take an `x` parameter; and
+- `compatible-static-extents<decltype(A), decltype(z)>(0, 0)` is `true`
+  for those overloads that take a `z` parameter.
+*Preconditions:*
+- `A.extent(0)` equals `A.extent(1)`,
+- `A.extent(0)` equals `y.extent(0)`,
+- `A.extent(0)` equals `x.extent(0)` for those overloads that take an
+  `x` parameter, and
+- `A.extent(0)` equals `z.extent(0)` for those overloads that take a `z`
+  parameter.
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage, in-vector InVec,
+         out-vector OutVec>
+  void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, InVec x, OutVec y);
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, class DiagonalStorage, in-vector InVec,
+         out-vector OutVec>
+  void triangular_matrix_vector_product(ExecutionPolicy&& exec,
+                                        InMat A, Triangle t, DiagonalStorage d, InVec x, OutVec y);
+```
+These functions perform an overwriting triangular matrix-vector product,
+taking into account the `Triangle` and `DiagonalStorage` parameters that
+apply to the triangular matrix `A` [[linalg.general]].
+*Effects:* Computes y = A x.
+*Complexity:* 𝑂(`x.extent(0)` × `A.extent(1)`).
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-vector InOutVec>
+  void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, InOutVec y);
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, class DiagonalStorage, inout-vector InOutVec>
+  void triangular_matrix_vector_product(ExecutionPolicy&& exec,
+                                        InMat A, Triangle t, DiagonalStorage d, InOutVec y);
+```
+These functions perform an in-place triangular matrix-vector product,
+taking into account the `Triangle` and `DiagonalStorage` parameters that
+apply to the triangular matrix `A` [[linalg.general]].
+[*Note 1*: Performing this operation in place hinders parallelization.
+However, other `ExecutionPolicy` specific optimizations, such as
+vectorization, are still possible. — *end note*]
+*Effects:* Computes a vector y' such that y' = A y, and assigns each
+element of y' to the corresponding element of y.
+*Complexity:* 𝑂(`y.extent(0)` × `A.extent(1)`).
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage,
+         in-vector InVec1, in-vector InVec2, out-vector OutVec>
+  void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d,
+                                        InVec1 x, InVec2 y, OutVec z);
+template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+         in-vector InVec1, in-vector InVec2, out-vector OutVec>
+  void triangular_matrix_vector_product(ExecutionPolicy&& exec,
+                                        InMat A, Triangle t, DiagonalStorage d,
+                                        InVec1 x, InVec2 y, OutVec z);
+```
+These functions perform an updating triangular matrix-vector product,
+taking into account the `Triangle` and `DiagonalStorage` parameters that
+apply to the triangular matrix `A` [[linalg.general]].
+*Effects:* Computes z = y + A x.
+*Complexity:* 𝑂(`x.extent(0)` × `A.extent(1)`).
+*Remarks:* `z` may alias `y`.
+#### Solve a triangular linear system <a id="linalg.algs.blas2.trsv">[[linalg.algs.blas2.trsv]]</a>
+[*Note 1*: These functions correspond to the BLAS functions `xTRSV` and
+`xTPSV`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas2.trsv]].
+*Mandates:*
+- If `InMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- `compatible-static-extents<decltype(A), decltype(A)>(0, 1)` is `true`;
+- `compatible-static-extents<decltype(A), decltype(b)>(0, 0)` is `true`;
+  and
+- `compatible-static-extents<decltype(A), decltype(x)>(0, 0)` is `true`
+  for those overloads that take an `x` parameter.
+*Preconditions:*
+- `A.extent(0)` equals `A.extent(1)`,
+- `A.extent(0)` equals `b.extent(0)`, and
+- `A.extent(0)` equals `x.extent(0)` for those overloads that take an
+  `x` parameter.
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage,
+           in-vector InVec, out-vector OutVec, class BinaryDivideOp>
+    void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d,
+                                        InVec b, OutVec x, BinaryDivideOp divide);
+  template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+           in-vector InVec, out-vector OutVec, class BinaryDivideOp>
+    void triangular_matrix_vector_solve(ExecutionPolicy&& exec,
+                                        InMat A, Triangle t, DiagonalStorage d,
+                                        InVec b, OutVec x, BinaryDivideOp divide);
+```
+These functions perform a triangular solve, taking into account the
+`Triangle` and `DiagonalStorage` parameters that apply to the triangular
+matrix `A` [[linalg.general]].
+*Effects:* Computes a vector x' such that b = A x', and assigns each
+element of x' to the corresponding element of x. If no such x' exists,
+then the elements of `x` are valid but unspecified.
+*Complexity:* 𝑂(`A.extent(1)` × `b.extent(0)`).
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage,
+         in-vector InVec, out-vector OutVec>
+  void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, InVec b, OutVec x);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_vector_solve(A, t, d, b, x, divides<void>{});
+```
+``` cpp
+template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+         in-vector InVec, out-vector OutVec>
+  void triangular_matrix_vector_solve(ExecutionPolicy&& exec,
+                                      InMat A, Triangle t, DiagonalStorage d, InVec b, OutVec x);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_vector_solve(std::forward<ExecutionPolicy>(exec),
+                               A, t, d, b, x, divides<void>{});
+```
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage,
+         inout-vector InOutVec, class BinaryDivideOp>
+  void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d,
+                                      InOutVec b, BinaryDivideOp divide);
+template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+         inout-vector InOutVec, class BinaryDivideOp>
+  void triangular_matrix_vector_solve(ExecutionPolicy&& exec,
+                                      InMat A, Triangle t, DiagonalStorage d,
+                                      InOutVec b, BinaryDivideOp divide);
+```
+These functions perform an in-place triangular solve, taking into
+account the `Triangle` and `DiagonalStorage` parameters that apply to
+the triangular matrix `A` [[linalg.general]].
+[*Note 1*: Performing triangular solve in place hinders
+parallelization. However, other `ExecutionPolicy` specific
+optimizations, such as vectorization, are still possible. — *end note*]
+*Effects:* Computes a vector x' such that b = A x', and assigns each
+element of x' to the corresponding element of b. If no such x' exists,
+then the elements of `b` are valid but unspecified.
+*Complexity:* 𝑂(`A.extent(1)` × `b.extent(0)`).
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-vector InOutVec>
+  void triangular_matrix_vector_solve(InMat A, Triangle t, DiagonalStorage d, InOutVec b);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_vector_solve(A, t, d, b, divides<void>{});
+```
+``` cpp
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, class DiagonalStorage, inout-vector InOutVec>
+  void triangular_matrix_vector_solve(ExecutionPolicy&& exec,
+                                      InMat A, Triangle t, DiagonalStorage d, InOutVec b);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_vector_solve(std::forward<ExecutionPolicy>(exec),
+                               A, t, d, b, divides<void>{});
+```
+#### Rank-1 (outer product) update of a matrix <a id="linalg.algs.blas2.rank1">[[linalg.algs.blas2.rank1]]</a>
+``` cpp
+template<in-vector InVec1, in-vector InVec2, inout-matrix InOutMat>
+  void matrix_rank_1_update(InVec1 x, InVec2 y, InOutMat A);
+template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2, inout-matrix InOutMat>
+  void matrix_rank_1_update(ExecutionPolicy&& exec, InVec1 x, InVec2 y, InOutMat A);
+```
+These functions perform a nonsymmetric nonconjugated rank-1 update.
+[*Note 1*: These functions correspond to the BLAS functions `xGER` (for
+real element types) and `xGERU` (for complex element
+types). — *end note*]
+*Mandates:* *`possibly-multipliable`*`<InOutMat, InVec2, InVec1>()` is
+`true`.
+*Preconditions:* *`multipliable`*`(A, y, x)` is `true`.
+*Effects:* Computes a matrix A' such that $A' = A + x y^T$, and assigns
+each element of A' to the corresponding element of A.
+*Complexity:* 𝑂(`x.extent(0)` × `y.extent(0)`).
+``` cpp
+template<in-vector InVec1, in-vector InVec2, inout-matrix InOutMat>
+  void matrix_rank_1_update_c(InVec1 x, InVec2 y, InOutMat A);
+template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2, inout-matrix InOutMat>
+  void matrix_rank_1_update_c(ExecutionPolicy&& exec, InVec1 x, InVec2 y, InOutMat A);
+```
+These functions perform a nonsymmetric conjugated rank-1 update.
+[*Note 2*: These functions correspond to the BLAS functions `xGER` (for
+real element types) and `xGERC` (for complex element
+types). — *end note*]
+*Effects:*
+- For the overloads without an `ExecutionPolicy` argument, equivalent
+  to:
+  ``` cpp
+  matrix_rank_1_update(x, conjugated(y), A);
+  ```
+- otherwise, equivalent to:
+  ``` cpp
+  matrix_rank_1_update(std::forward<ExecutionPolicy>(exec), x, conjugated(y), A);
+  ```
+#### Symmetric or Hermitian Rank-1 (outer product) update of a matrix <a id="linalg.algs.blas2.symherrank1">[[linalg.algs.blas2.symherrank1]]</a>
+[*Note 1*: These functions correspond to the BLAS functions `xSYR`,
+`xSPR`, `xHER`, and `xHPR`. They have overloads taking a scaling factor
+`alpha`, because it would be impossible to express the update
+$A = A - x x^T$ otherwise. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas2.symherrank1]].
+*Mandates:*
+- If `InOutMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- `compatible-static-extents<decltype(A), decltype(A)>(0, 1)` is `true`;
+  and
+- `compatible-static-extents<decltype(A), decltype(x)>(0, 0)` is `true`.
+*Preconditions:*
+- `A.extent(0)` equals `A.extent(1)`, and
+- `A.extent(0)` equals `x.extent(0)`.
+*Complexity:* 𝑂(`x.extent(0)` × `x.extent(0)`).
+``` cpp
+template<class Scalar, in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_1_update(Scalar alpha, InVec x, InOutMat A, Triangle t);
+template<class ExecutionPolicy,
+         class Scalar, in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_1_update(ExecutionPolicy&& exec,
+                                      Scalar alpha, InVec x, InOutMat A, Triangle t);
+```
+These functions perform a symmetric rank-1 update of the symmetric
+matrix `A`, taking into account the `Triangle` parameter that applies to
+`A` [[linalg.general]].
+*Effects:* Computes a matrix A' such that $A' = A + \alpha x x^T$, where
+the scalar α is `alpha`, and assigns each element of A' to the
+corresponding element of A.
+``` cpp
+template<in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_1_update(InVec x, InOutMat A, Triangle t);
+template<class ExecutionPolicy,
+         in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_1_update(ExecutionPolicy&& exec, InVec x, InOutMat A, Triangle t);
+```
+These functions perform a symmetric rank-1 update of the symmetric
+matrix `A`, taking into account the `Triangle` parameter that applies to
+`A` [[linalg.general]].
+*Effects:* Computes a matrix A' such that $A' = A + x x^T$ and assigns
+each element of A' to the corresponding element of A.
+``` cpp
+template<class Scalar, in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_1_update(Scalar alpha, InVec x, InOutMat A, Triangle t);
+template<class ExecutionPolicy,
+         class Scalar, in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_1_update(ExecutionPolicy&& exec,
+                                      Scalar alpha, InVec x, InOutMat A, Triangle t);
+```
+These functions perform a Hermitian rank-1 update of the Hermitian
+matrix `A`, taking into account the `Triangle` parameter that applies to
+`A` [[linalg.general]].
+*Effects:* Computes A' such that $A' = A + \alpha x x^H$, where the
+scalar α is `alpha`, and assigns each element of A' to the corresponding
+element of A.
+``` cpp
+template<in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_1_update(InVec x, InOutMat A, Triangle t);
+template<class ExecutionPolicy,
+         in-vector InVec, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_1_update(ExecutionPolicy&& exec, InVec x, InOutMat A, Triangle t);
+```
+These functions perform a Hermitian rank-1 update of the Hermitian
+matrix `A`, taking into account the `Triangle` parameter that applies to
+`A` [[linalg.general]].
+*Effects:* Computes a matrix A' such that $A' = A + x x^H$ and assigns
+each element of A' to the corresponding element of A.
+#### Symmetric and Hermitian rank-2 matrix updates <a id="linalg.algs.blas2.rank2">[[linalg.algs.blas2.rank2]]</a>
+[*Note 1*: These functions correspond to the BLAS functions
+`xSYR2`,`xSPR2`, `xHER2` and `xHPR2`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas2.rank2]].
+*Mandates:*
+- If `InOutMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- `compatible-static-extents<decltype(A), decltype(A)>(0, 1)` is `true`;
+  and
+- `possibly-multipliable<decltype(A), decltype(x), decltype(y)>()` is
+  `true`.
+*Preconditions:*
+- `A.extent(0)` equals `A.extent(1)`, and
+- `multipliable(A, x, y)` is `true`.
+*Complexity:* 𝑂(`x.extent(0)` × `y.extent(0)`).
+``` cpp
+template<in-vector InVec1, in-vector InVec2,
+         possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_2_update(InVec1 x, InVec2 y, InOutMat A, Triangle t);
+template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2,
+         possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_2_update(ExecutionPolicy&& exec,
+                                      InVec1 x, InVec2 y, InOutMat A, Triangle t);
+```
+These functions perform a symmetric rank-2 update of the symmetric
+matrix `A`, taking into account the `Triangle` parameter that applies to
+`A` [[linalg.general]].
+*Effects:* Computes A' such that $A' = A + x y^T + y x^T$ and assigns
+each element of A' to the corresponding element of A.
+``` cpp
+template<in-vector InVec1, in-vector InVec2,
+         possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_2_update(InVec1 x, InVec2 y, InOutMat A, Triangle t);
+template<class ExecutionPolicy, in-vector InVec1, in-vector InVec2,
+         possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_2_update(ExecutionPolicy&& exec,
+                                      InVec1 x, InVec2 y, InOutMat A, Triangle t);
+```
+These functions perform a Hermitian rank-2 update of the Hermitian
+matrix `A`, taking into account the `Triangle` parameter that applies to
+`A` [[linalg.general]].
+*Effects:* Computes A' such that $A' = A + x y^H + y x^H$ and assigns
+each element of A' to the corresponding element of A.
+### BLAS 3 algorithms <a id="linalg.algs.blas3">[[linalg.algs.blas3]]</a>
+#### General matrix-matrix product <a id="linalg.algs.blas3.gemm">[[linalg.algs.blas3.gemm]]</a>
+[*Note 1*: These functions correspond to the BLAS function
+`xGEMM`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas3.gemm]] in addition to function-specific elements.
+*Mandates:*
+`possibly-multipliable<decltype(A), decltype(B), decltype(C)>()` is
+`true`.
+*Preconditions:* `multipliable(A, B, C)` is `true`.
+*Complexity:* 𝑂(`A.extent(0)` × `A.extent(1)` × `B.extent(1)`).
+``` cpp
+template<in-matrix InMat1, in-matrix InMat2, out-matrix OutMat>
+    void matrix_product(InMat1 A, InMat2 B, OutMat C);
+  template<class ExecutionPolicy, in-matrix InMat1, in-matrix InMat2, out-matrix OutMat>
+    void matrix_product(ExecutionPolicy&& exec, InMat1 A, InMat2 B, OutMat C);
+```
+*Effects:* Computes C = A B.
+``` cpp
+template<in-matrix InMat1, in-matrix InMat2, in-matrix InMat3, out-matrix OutMat>
+  void matrix_product(InMat1 A, InMat2 B, InMat3 E, OutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat1, in-matrix InMat2, in-matrix InMat3, out-matrix OutMat>
+  void matrix_product(ExecutionPolicy&& exec, InMat1 A, InMat2 B, InMat3 E, OutMat C);
+```
+*Mandates:* *`possibly-addable`*`<InMat3, InMat3, OutMat>()` is `true`.
+*Preconditions:* *`addable`*`(E, E, C)` is `true`.
+*Effects:* Computes C = E + A B.
+*Remarks:* `C` may alias `E`.
+#### Symmetric, Hermitian, and triangular matrix-matrix product <a id="linalg.algs.blas3.xxmm">[[linalg.algs.blas3.xxmm]]</a>
+[*Note 1*: These functions correspond to the BLAS functions `xSYMM`,
+`xHEMM`, and `xTRMM`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas3.xxmm]] in addition to function-specific elements.
+*Mandates:*
+- `possibly-multipliable<decltype(A), decltype(B), decltype(C)>()` is
+  `true`, and
+- `possibly-addable<decltype(E), decltype(E), decltype(C)>()` is `true`
+  for those overloads that take an `E` parameter.
+*Preconditions:*
+- `multipliable(A, B, C)` is `true`, and
+- `addable(E, E, C)` is `true` for those overloads that take an `E`
+  parameter.
+*Complexity:* 𝑂(`A.extent(0)` × `A.extent(1)` × `B.extent(1)`).
+``` cpp
+template<in-matrix InMat1, class Triangle, in-matrix InMat2, out-matrix OutMat>
+  void symmetric_matrix_product(InMat1 A, Triangle t, InMat2 B, OutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat1, class Triangle, in-matrix InMat2, out-matrix OutMat>
+  void symmetric_matrix_product(ExecutionPolicy&& exec, InMat1 A, Triangle t, InMat2 B, OutMat C);
+template<in-matrix InMat1, class Triangle, in-matrix InMat2, out-matrix OutMat>
+  void hermitian_matrix_product(InMat1 A, Triangle t, InMat2 B, OutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat1, class Triangle, in-matrix InMat2, out-matrix OutMat>
+  void hermitian_matrix_product(ExecutionPolicy&& exec, InMat1 A, Triangle t, InMat2 B, OutMat C);
+template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat>
+  void triangular_matrix_product(InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, OutMat C);
+template<class ExecutionPolicy, in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat>
+  void triangular_matrix_product(ExecutionPolicy&& exec,
+                                 InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, OutMat C);
+```
+These functions perform a matrix-matrix multiply, taking into account
+the `Triangle` and `DiagonalStorage` (if applicable) parameters that
+apply to the symmetric, Hermitian, or triangular (respectively) matrix
+`A` [[linalg.general]].
+*Mandates:*
+- If `InMat1` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument; and
+- *`compatible-static-extents`*`<InMat1, InMat1>(0, 1)` is `true`.
+*Preconditions:* `A.extent(0) == A.extent(1)` is `true`.
+*Effects:* Computes C = A B.
+``` cpp
+template<in-matrix InMat1, in-matrix InMat2, class Triangle, out-matrix OutMat>
+    void symmetric_matrix_product(InMat1 A, InMat2 B, Triangle t, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, class Triangle, out-matrix OutMat>
+    void symmetric_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, InMat2 B, Triangle t, OutMat C);
+  template<in-matrix InMat1, in-matrix InMat2, class Triangle, out-matrix OutMat>
+    void hermitian_matrix_product(InMat1 A, InMat2 B, Triangle t, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, class Triangle, out-matrix OutMat>
+    void hermitian_matrix_product(ExecutionPolicy&& exec,
+                                  InMat1 A, InMat2 B, Triangle t, OutMat C);
+  template<in-matrix InMat1, in-matrix InMat2, class Triangle, class DiagonalStorage,
+           out-matrix OutMat>
+    void triangular_matrix_product(InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, OutMat C);
+  template<class ExecutionPolicy,
+           in-matrix InMat1, in-matrix InMat2, class Triangle, class DiagonalStorage,
+           out-matrix OutMat>
+    void triangular_matrix_product(ExecutionPolicy&& exec,
+                                   InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, OutMat C);
+```
+These functions perform a matrix-matrix multiply, taking into account
+the `Triangle` and `DiagonalStorage` (if applicable) parameters that
+apply to the symmetric, Hermitian, or triangular (respectively) matrix
+`B` [[linalg.general]].
+*Mandates:*
+- If `InMat2` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument; and
+- *`compatible-static-extents`*`<InMat2, InMat2>(0, 1)` is `true`.
+*Preconditions:* `B.extent(0) == B.extent(1)` is `true`.
+*Effects:* Computes C = A B.
+``` cpp
+template<in-matrix InMat1, class Triangle, in-matrix InMat2, in-matrix InMat3,
+         out-matrix OutMat>
+  void symmetric_matrix_product(InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat1, class Triangle, in-matrix InMat2, in-matrix InMat3,
+         out-matrix OutMat>
+  void symmetric_matrix_product(ExecutionPolicy&& exec,
+                                InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C);
+template<in-matrix InMat1, class Triangle, in-matrix InMat2, in-matrix InMat3,
+         out-matrix OutMat>
+  void hermitian_matrix_product(InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat1, class Triangle, in-matrix InMat2, in-matrix InMat3,
+         out-matrix OutMat>
+  void hermitian_matrix_product(ExecutionPolicy&& exec,
+                                InMat1 A, Triangle t, InMat2 B, InMat3 E, OutMat C);
+template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, in-matrix InMat3, out-matrix OutMat>
+  void triangular_matrix_product(InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, InMat3 E,
+                                 OutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, in-matrix InMat3, out-matrix OutMat>
+  void triangular_matrix_product(ExecutionPolicy&& exec,
+                                 InMat1 A, Triangle t, DiagonalStorage d, InMat2 B, InMat3 E,
+                                 OutMat C);
+```
+These functions perform a potentially overwriting matrix-matrix
+multiply-add, taking into account the `Triangle` and `DiagonalStorage`
+(if applicable) parameters that apply to the symmetric, Hermitian, or
+triangular (respectively) matrix `A` [[linalg.general]].
+*Mandates:*
+- If `InMat1` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument; and
+- *`compatible-static-extents`*`<InMat1, InMat1>(0, 1)` is `true`.
+*Preconditions:* `A.extent(0) == A.extent(1)` is `true`.
+*Effects:* Computes C = E + A B.
+*Remarks:* `C` may alias `E`.
+``` cpp
+template<in-matrix InMat1, in-matrix InMat2, class Triangle, in-matrix InMat3,
+         out-matrix OutMat>
+  void symmetric_matrix_product(InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat1, in-matrix InMat2, class Triangle, in-matrix InMat3,
+         out-matrix OutMat>
+  void symmetric_matrix_product(ExecutionPolicy&& exec,
+                                InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C);
+template<in-matrix InMat1, in-matrix InMat2, class Triangle, in-matrix InMat3,
+         out-matrix OutMat>
+  void hermitian_matrix_product(InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat1, in-matrix InMat2, class Triangle, in-matrix InMat3,
+         out-matrix OutMat>
+  void hermitian_matrix_product(ExecutionPolicy&& exec,
+                                InMat1 A, InMat2 B, Triangle t, InMat3 E, OutMat C);
+template<in-matrix InMat1, in-matrix InMat2, class Triangle, class DiagonalStorage,
+         in-matrix InMat3, out-matrix OutMat>
+  void triangular_matrix_product(InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, InMat3 E,
+                                 OutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat1, in-matrix InMat2, class Triangle, class DiagonalStorage,
+         in-matrix InMat3, out-matrix OutMat>
+  void triangular_matrix_product(ExecutionPolicy&& exec,
+                                 InMat1 A, InMat2 B, Triangle t, DiagonalStorage d, InMat3 E,
+                                 OutMat C);
+```
+These functions perform a potentially overwriting matrix-matrix
+multiply-add, taking into account the `Triangle` and `DiagonalStorage`
+(if applicable) parameters that apply to the symmetric, Hermitian, or
+triangular (respectively) matrix `B` [[linalg.general]].
+*Mandates:*
+- If `InMat2` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument; and
+- *`compatible-static-extents`*`<InMat2, InMat2>(0, 1)` is `true`.
+*Preconditions:* `B.extent(0) == B.extent(1)` is `true`.
+*Effects:* Computes C = E + A B.
+*Remarks:* `C` may alias `E`.
+#### In-place triangular matrix-matrix product <a id="linalg.algs.blas3.trmm">[[linalg.algs.blas3.trmm]]</a>
+These functions perform an in-place matrix-matrix multiply, taking into
+account the `Triangle` and `DiagonalStorage` parameters that apply to
+the triangular matrix `A` [[linalg.general]].
+[*Note 1*: These functions correspond to the BLAS function
+`xTRMM`. — *end note*]
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+  void triangular_matrix_left_product(InMat A, Triangle t, DiagonalStorage d, InOutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+  void triangular_matrix_left_product(ExecutionPolicy&& exec,
+                                      InMat A, Triangle t, DiagonalStorage d, InOutMat C);
+```
+*Mandates:*
+- If `InMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- *`possibly-multipliable`*`<InMat, InOutMat, InOutMat>()` is `true`;
+  and
+- *`compatible-static-extents`*`<InMat, InMat>(0, 1)` is `true`.
+*Preconditions:*
+- *`multipliable`*`(A, C, C)` is `true`, and
+- `A.extent(0) == A.extent(1)` is `true`.
+*Effects:* Computes a matrix C' such that C' = A C and assigns each
+element of C' to the corresponding element of C.
+*Complexity:* 𝑂(`A.extent(0)` × `A.extent(1)` × `C.extent(0)`).
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+  void triangular_matrix_right_product(InMat A, Triangle t, DiagonalStorage d, InOutMat C);
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+  void triangular_matrix_right_product(ExecutionPolicy&& exec,
+                                       InMat A, Triangle t, DiagonalStorage d, InOutMat C);
+```
+*Mandates:*
+- If `InMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- *`possibly-multipliable`*`<InOutMat, InMat, InOutMat>()` is `true`;
+  and
+- *`compatible-static-extents`*`<InMat, InMat>(0, 1)` is `true`.
+*Preconditions:*
+- *`multipliable`*`(C, A, C)` is `true`, and
+- `A.extent(0) == A.extent(1)` is `true`.
+*Effects:* Computes a matrix C' such that C' = C A and assigns each
+element of C' to the corresponding element of C.
+*Complexity:* 𝑂(`A.extent(0)` × `A.extent(1)` × `C.extent(0)`).
+#### Rank-k update of a symmetric or Hermitian matrix <a id="linalg.algs.blas3.rankk">[[linalg.algs.blas3.rankk]]</a>
+[*Note 1*: These functions correspond to the BLAS functions `xSYRK` and
+`xHERK`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas3.rankk]].
+*Mandates:*
+- If `InOutMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- `compatible-static-extents<decltype(A), decltype(A)>(0, 1)` is `true`;
+- `compatible-static-extents<decltype(C), decltype(C)>(0, 1)` is `true`;
+  and
+- `compatible-static-extents<decltype(A), decltype(C)>(0, 0)` is `true`.
+*Preconditions:*
+- `A.extent(0)` equals `A.extent(1)`,
+- `C.extent(0)` equals `C.extent(1)`, and
+- `A.extent(0)` equals `C.extent(0)`.
+*Complexity:* 𝑂(`A.extent(0)` × `A.extent(1)` × `C.extent(0)`).
+``` cpp
+template<class Scalar, in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_k_update(Scalar alpha, InMat A, InOutMat C, Triangle t);
+  template<class ExecutionPolicy, class Scalar,
+           in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+    void symmetric_matrix_rank_k_update(ExecutionPolicy&& exec,
+                                        Scalar alpha, InMat A, InOutMat C, Triangle t);
+```
+*Effects:* Computes a matrix C' such that $C' = C + \alpha A A^T$, where
+the scalar α is `alpha`, and assigns each element of C' to the
+corresponding element of C.
+``` cpp
+template<in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_k_update(InMat A, InOutMat C, Triangle t);
+template<class ExecutionPolicy,
+         in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_k_update(ExecutionPolicy&& exec,
+                                      InMat A, InOutMat C, Triangle t);
+```
+*Effects:* Computes a matrix C' such that $C' = C + A A^T$, and assigns
+each element of C' to the corresponding element of C.
+``` cpp
+template<class Scalar, in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_k_update(Scalar alpha, InMat A, InOutMat C, Triangle t);
+template<class ExecutionPolicy,
+         class Scalar, in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_k_update(ExecutionPolicy&& exec,
+                                      Scalar alpha, InMat A, InOutMat C, Triangle t);
+```
+*Effects:* Computes a matrix C' such that $C' = C + \alpha A A^H$, where
+the scalar α is `alpha`, and assigns each element of C' to the
+corresponding element of C.
+``` cpp
+template<in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_k_update(InMat A, InOutMat C, Triangle t);
+template<class ExecutionPolicy,
+         in-matrix InMat, possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_k_update(ExecutionPolicy&& exec,
+                                      InMat A, InOutMat C, Triangle t);
+```
+*Effects:* Computes a matrix C' such that $C' = C + A A^H$, and assigns
+each element of C' to the corresponding element of C.
+#### Rank-2k update of a symmetric or Hermitian matrix <a id="linalg.algs.blas3.rank2k">[[linalg.algs.blas3.rank2k]]</a>
+[*Note 1*: These functions correspond to the BLAS functions `xSYR2K`
+and `xHER2K`. — *end note*]
+The following elements apply to all functions in
+[[linalg.algs.blas3.rank2k]].
+*Mandates:*
+- If `InOutMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- `possibly-addable<decltype(A), decltype(B), decltype(C)>()` is `true`;
+  and
+- `compatible-static-extents<decltype(A), decltype(A)>(0, 1)` is `true`.
+*Preconditions:*
+- `addable(A, B, C)` is `true`, and
+- `A.extent(0)` equals `A.extent(1)`.
+*Complexity:* 𝑂(`A.extent(0)` × `A.extent(1)` × `C.extent(0)`).
+``` cpp
+template<in-matrix InMat1, in-matrix InMat2,
+         possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_2k_update(InMat1 A, InMat2 B, InOutMat C, Triangle t);
+template<class ExecutionPolicy, in-matrix InMat1, in-matrix InMat2,
+         possibly-packed-inout-matrix InOutMat, class Triangle>
+  void symmetric_matrix_rank_2k_update(ExecutionPolicy&& exec,
+                                       InMat1 A, InMat2 B, InOutMat C, Triangle t);
+```
+*Effects:* Computes a matrix C' such that $C' = C + A B^T + B A^T$, and
+assigns each element of C' to the corresponding element of C.
+``` cpp
+template<in-matrix InMat1, in-matrix InMat2,
+         possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_2k_update(InMat1 A, InMat2 B, InOutMat C, Triangle t);
+template<class ExecutionPolicy,
+         in-matrix InMat1, in-matrix InMat2,
+         possibly-packed-inout-matrix InOutMat, class Triangle>
+  void hermitian_matrix_rank_2k_update(ExecutionPolicy&& exec,
+                                       InMat1 A, InMat2 B, InOutMat C, Triangle t);
+```
+*Effects:* Computes a matrix C' such that $C' = C + A B^H + B A^H$, and
+assigns each element of C' to the corresponding element of C.
+#### Solve multiple triangular linear systems <a id="linalg.algs.blas3.trsm">[[linalg.algs.blas3.trsm]]</a>
+[*Note 1*: These functions correspond to the BLAS function
+`xTRSM`. — *end note*]
+``` cpp
+template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat, class BinaryDivideOp>
+  void triangular_matrix_matrix_left_solve(InMat1 A, Triangle t, DiagonalStorage d,
+                                           InMat2 B, OutMat X, BinaryDivideOp divide);
+template<class ExecutionPolicy,
+         in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat, class BinaryDivideOp>
+  void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec,
+                                           InMat1 A, Triangle t, DiagonalStorage d,
+                                           InMat2 B, OutMat X, BinaryDivideOp divide);
+```
+These functions perform multiple matrix solves, taking into account the
+`Triangle` and `DiagonalStorage` parameters that apply to the triangular
+matrix `A` [[linalg.general]].
+*Mandates:*
+- If `InMat1` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- *`possibly-multipliable`*`<InMat1, OutMat, InMat2>()` is `true`; and
+- *`compatible-static-extents`*`<InMat1, InMat1>(0, 1)` is `true`.
+*Preconditions:*
+- *`multipliable`*`(A, X, B)` is `true`, and
+- `A.extent(0) == A.extent(1)` is `true`.
+*Effects:* Computes X' such that AX' = B, and assigns each element of X'
+to the corresponding element of X. If no such X' exists, then the
+elements of `X` are valid but unspecified.
+*Complexity:* 𝑂(`A.extent(0)` × `X.extent(1)` × `X.extent(1)`).
+[*Note 2*: Since the triangular matrix is on the left, the desired
+`divide` implementation in the case of noncommutative multiplication is
+mathematically equivalent to $y^{-1} x$, where x is the first argument
+and y is the second argument, and $y^{-1}$ denotes the multiplicative
+inverse of y. — *end note*]
+``` cpp
+template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat>
+  void triangular_matrix_matrix_left_solve(InMat1 A, Triangle t, DiagonalStorage d,
+                                           InMat2 B, OutMat X);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_matrix_left_solve(A, t, d, B, X, divides<void>{});
+```
+``` cpp
+template<class ExecutionPolicy, in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat>
+  void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec,
+                                           InMat1 A, Triangle t, DiagonalStorage d,
+                                           InMat2 B, OutMat X);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_matrix_left_solve(std::forward<ExecutionPolicy>(exec),
+                                    A, t, d, B, X, divides<void>{});
+```
+``` cpp
+template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat, class BinaryDivideOp>
+  void triangular_matrix_matrix_right_solve(InMat1 A, Triangle t, DiagonalStorage d,
+                                            InMat2 B, OutMat X, BinaryDivideOp divide);
+template<class ExecutionPolicy,
+         in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat, class BinaryDivideOp>
+  void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec,
+                                            InMat1 A, Triangle t, DiagonalStorage d,
+                                            InMat2 B, OutMat X, BinaryDivideOp divide);
+```
+These functions perform multiple matrix solves, taking into account the
+`Triangle` and `DiagonalStorage` parameters that apply to the triangular
+matrix `A` [[linalg.general]].
+*Mandates:*
+- If `InMat1` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- *`possibly-multipliable`*`<OutMat, InMat1, InMat2>()` is `true`; and
+- *`compatible-static-extents`*`<InMat1, InMat1>(0,1)` is `true`.
+*Preconditions:*
+- *`multipliable`*`(X, A, B)` is `true`, and
+- `A.extent(0) == A.extent(1)` is `true`.
+*Effects:* Computes X' such that X'A = B, and assigns each element of X'
+to the corresponding element of X. If no such X' exists, then the
+elements of `X` are valid but unspecified.
+*Complexity:* O( `B.extent(0)` ⋅ `B.extent(1)` ⋅ `A.extent(1)` )
+[*Note 1*: Since the triangular matrix is on the right, the desired
+`divide` implementation in the case of noncommutative multiplication is
+mathematically equivalent to $x y^{-1}$, where x is the first argument
+and y is the second argument, and $y^{-1}$ denotes the multiplicative
+inverse of y. — *end note*]
+``` cpp
+template<in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat>
+  void triangular_matrix_matrix_right_solve(InMat1 A, Triangle t, DiagonalStorage d,
+                                            InMat2 B, OutMat X);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_matrix_right_solve(A, t, d, B, X, divides<void>{});
+```
+``` cpp
+template<class ExecutionPolicy, in-matrix InMat1, class Triangle, class DiagonalStorage,
+         in-matrix InMat2, out-matrix OutMat>
+  void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec,
+                                            InMat1 A, Triangle t, DiagonalStorage d,
+                                            InMat2 B, OutMat X);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_matrix_right_solve(std::forward<ExecutionPolicy>(exec),
+                                     A, t, d, B, X, divides<void>{});
+```
+#### Solve multiple triangular linear systems in-place <a id="linalg.algs.blas3.inplacetrsm">[[linalg.algs.blas3.inplacetrsm]]</a>
+[*Note 1*: These functions correspond to the BLAS function
+`xTRSM`. — *end note*]
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage,
+         inout-matrix InOutMat, class BinaryDivideOp>
+  void triangular_matrix_matrix_left_solve(InMat A, Triangle t, DiagonalStorage d,
+                                           InOutMat B, BinaryDivideOp divide);
+template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+         inout-matrix InOutMat, class BinaryDivideOp>
+  void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec,
+                                           InMat A, Triangle t, DiagonalStorage d,
+                                           InOutMat B, BinaryDivideOp divide);
+```
+These functions perform multiple in-place matrix solves, taking into
+account the `Triangle` and `DiagonalStorage` parameters that apply to
+the triangular matrix `A` [[linalg.general]].
+[*Note 1*: This algorithm makes it possible to compute factorizations
+like Cholesky and LU in place. Performing triangular solve in place
+hinders parallelization. However, other `ExecutionPolicy` specific
+optimizations, such as vectorization, are still possible. — *end note*]
+*Mandates:*
+- If `InMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- *`possibly-multipliable`*`<InMat, InOutMat, InOutMat>()` is `true`;
+  and
+- *`compatible-static-extents`*`<InMat, InMat>(0, 1)` is `true`.
+*Preconditions:*
+- *`multipliable`*`(A, B, B)` is `true`, and
+- `A.extent(0) == A.extent(1)` is `true`.
+*Effects:* Computes X' such that AX' = B, and assigns each element of X'
+to the corresponding element of B. If so such X' exists, then the
+elements of `B` are valid but unspecified.
+*Complexity:* 𝑂(`A.extent(0)` × `A.extent(1)` × `B.extent(1)`).
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+  void triangular_matrix_matrix_left_solve(InMat A, Triangle t, DiagonalStorage d,
+                                           InOutMat B);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_matrix_left_solve(A, t, d, B, divides<void>{});
+```
+``` cpp
+template<class ExecutionPolicy,
+           in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+    void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec,
+                                             InMat A, Triangle t, DiagonalStorage d,
+                                             InOutMat B);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_matrix_left_solve(std::forward<ExecutionPolicy>(exec),
+                                    A, t, d, B, divides<void>{});
+```
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage,
+         inout-matrix InOutMat, class BinaryDivideOp>
+  void triangular_matrix_matrix_right_solve(InMat A, Triangle t, DiagonalStorage d,
+                                            InOutMat B, BinaryDivideOp divide);
+template<class ExecutionPolicy, in-matrix InMat, class Triangle, class DiagonalStorage,
+         inout-matrix InOutMat, class BinaryDivideOp>
+  void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec,
+                                            InMat A, Triangle t, DiagonalStorage d,
+                                            InOutMat B, BinaryDivideOp divide);
+```
+These functions perform multiple in-place matrix solves, taking into
+account the `Triangle` and `DiagonalStorage` parameters that apply to
+the triangular matrix `A` [[linalg.general]].
+[*Note 2*: This algorithm makes it possible to compute factorizations
+like Cholesky and LU in place. Performing triangular solve in place
+hinders parallelization. However, other `ExecutionPolicy` specific
+optimizations, such as vectorization, are still possible. — *end note*]
+*Mandates:*
+- If `InMat` has `layout_blas_packed` layout, then the layout’s
+  `Triangle` template argument has the same type as the function’s
+  `Triangle` template argument;
+- *`possibly-multipliable`*`<InOutMat, InMat, InOutMat>()` is `true`;
+  and
+- *`compatible-static-extents`*`<InMat, InMat>(0, 1)` is `true`.
+*Preconditions:*
+- *`multipliable`*`(B, A, B)` is `true`, and
+- `A.extent(0) == A.extent(1)` is `true`.
+*Effects:* Computes X' such that X'A = B, and assigns each element of X'
+to the corresponding element of B. If so such X' exists, then the
+elements of `B` are valid but unspecified.
+*Complexity:* 𝑂(`A.extent(0)` × `A.extent(1)` × `B.extent(1)`).
+``` cpp
+template<in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+  void triangular_matrix_matrix_right_solve(InMat A, Triangle t, DiagonalStorage d, InOutMat B);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_matrix_right_solve(A, t, d, B, divides<void>{});
+```
+``` cpp
+template<class ExecutionPolicy,
+         in-matrix InMat, class Triangle, class DiagonalStorage, inout-matrix InOutMat>
+  void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec,
+                                            InMat A, Triangle t, DiagonalStorage d,
+                                            InOutMat B);
+```
+*Effects:* Equivalent to:
+``` cpp
+triangular_matrix_matrix_right_solve(std::forward<ExecutionPolicy>(exec),
+                                     A, t, d, B, divides<void>{});
+```

Diff to HTML by rtfpessoa