add lae2 (2x2 Hermitian eigvals)

icl-utk-edu · Jun 26, 2024 · 3b6efdc · 3b6efdc
1 parent 57a4c71
commit 3b6efdc
Show file tree

Hide file tree

Showing 10 changed files with 216 additions and 3 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -291,6 +291,7 @@ add_library(
     src/lacgv.cc
     src/lacp2.cc
     src/lacpy.cc
+    src/lae2.cc
     src/laed4.cc
     src/laev2.cc
     src/lag2c.cc

diff --git a/include/lapack/fortran.h b/include/lapack/fortran.h
@@ -20382,6 +20382,19 @@ void LAPACK_zunhr_col(
     lapack_complex_double* T, lapack_int const* ldt,
     lapack_complex_double* D, lapack_int* info );
 
+//--------------------
+#define LAPACK_slae2 LAPACK_GLOBAL( slae2, SLAE2 )
+void LAPACK_slae2(
+    float const* a, float const* b, float const* c,
+    float* rt1, float* rt2
+);
+
+#define LAPACK_dlae2 LAPACK_GLOBAL( dlae2, DLAE2 )
+void LAPACK_dlae2(
+    double const* a, double const* b, double const* c,
+    double* rt1, double* rt2
+);
+
 //--------------------
 #define LAPACK_slaev2 LAPACK_GLOBAL( slaev2, SLAEV2 )
 void LAPACK_slaev2(

diff --git a/include/lapack/wrappers.hh b/include/lapack/wrappers.hh
@@ -3637,6 +3637,17 @@ void lacpy(
     std::complex<double> const* A, int64_t lda,
     std::complex<double>* B, int64_t ldb );
 
+// -----------------------------------------------------------------------------
+void lae2(
+    float a, float b, float c,
+    float* rt1,
+    float* rt2 );
+
+void lae2(
+    double a, double b, double c,
+    double* rt1,
+    double* rt2 );
+
 // -----------------------------------------------------------------------------
 int64_t laed4(
     int64_t n, int64_t i,

diff --git a/src/lae2.cc b/src/lae2.cc
@@ -0,0 +1,73 @@
+// Copyright (c) 2017-2023, University of Tennessee. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
+
+#include "lapack.hh"
+#include "lapack/fortran.h"
+
+#include <vector>
+
+namespace lapack {
+
+//------------------------------------------------------------------------------
+/// @ingroup heev_computational
+void lae2(
+    float a, float b, float c,
+    float* rt1,
+    float* rt2 )
+{
+    LAPACK_slae2(
+        &a, &b, &c, rt1, rt2 );
+}
+
+//------------------------------------------------------------------------------
+/// Computes the eigenvalues of a 2-by-2 symmetric matrix
+///     [ a b ]
+///     [ b c ].
+/// On return, rt1 is the eigenvalue of larger absolute value, and rt2
+/// is the eigenvalue of smaller absolute value.
+///
+/// Overloaded versions are available for
+/// `float`, `double`.
+///
+/// @param[in] a
+///     The (1, 1) element of the 2-by-2 matrix.
+///
+/// @param[in] b
+///     The (1, 2) and (2, 1) elements of the 2-by-2 matrix.
+///
+/// @param[in] c
+///     The (2, 2) element of the 2-by-2 matrix.
+///
+/// @param[out] rt1
+///     The eigenvalue of larger absolute value.
+///
+/// @param[out] rt2
+///     The eigenvalue of smaller absolute value.
+///
+//------------------------------------------------------------------------------
+/// @par Further Details
+///
+/// rt1 is accurate to a few ulps barring over/underflow.
+///
+/// rt2 may be inaccurate if there is massive cancellation in the
+/// determinant a*c-b*b; higher precision or correctly rounded or
+/// correctly truncated arithmetic would be needed to compute rt2
+/// accurately in all cases.
+///
+/// Overflow is possible only if rt1 is within a factor of 5 of overflow.
+/// Underflow is harmless if the input data is 0 or exceeds
+/// underflow_threshold / macheps.
+///
+/// @ingroup heev_computational
+void lae2(
+    double a, double b, double c,
+    double* rt1,
+    double* rt2 )
+{
+    LAPACK_dlae2(
+        &a, &b, &c, rt1, rt2 );
+}
+
+}  // namespace lapack
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -124,6 +124,7 @@ add_executable(
     test_hptri.cc
     test_hptrs.cc
     test_lacpy.cc
+    test_lae2.cc
     test_laed4.cc
     test_laev2.cc
     test_langb.cc

diff --git a/test/run_tests.py b/test/run_tests.py
@@ -596,6 +596,7 @@ def filter_csv( values, csv ):
     [ 'heevr', gen + dtype + align + n + jobz + uplo + vl + vu ],
     [ 'heevr', gen + dtype + align + n + jobz + uplo + il + iu ],
     [ 'hetrd', gen + dtype + align + n + uplo ],
+    [ 'lae2',  gen + dtype_real ],  # 2x2, eigvals only
     [ 'laev2', gen + dtype ],  # 2x2
     [ 'ungtr', gen + dtype + align + n + uplo ],
     [ 'unmtr', gen + dtype_real    + align + mn + uplo + side + trans    ],  # real does trans = N, T, C

diff --git a/test/test.cc b/test/test.cc
@@ -339,6 +339,7 @@ std::vector< testsweeper::routines_t > routines = {
     { "",                   nullptr,        Section::newline },
 
     { "heevr",              test_heevr,     Section::heev }, // backward error check
+    { "lae2",               test_lae2,      Section::heev }, // forward  error check, compared to laev2
     { "laev2",              test_laev2,     Section::heev }, // backward error check
     { "",                   nullptr,        Section::newline },
 

diff --git a/test/test.hh b/test/test.hh
@@ -343,6 +343,7 @@ void test_heevx ( Params& params, bool run );
 void test_heevd ( Params& params, bool run );
 void test_heevr ( Params& params, bool run );
 void test_hetrd ( Params& params, bool run );
+void test_lae2  ( Params& params, bool run );
 void test_laev2 ( Params& params, bool run );
 void test_sturm ( Params& params, bool run );
 void test_ungtr ( Params& params, bool run );

diff --git a/test/test_lae2.cc b/test/test_lae2.cc
@@ -0,0 +1,108 @@
+// Copyright (c) 2017-2023, University of Tennessee. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
+
+#include "test.hh"
+#include "lapack.hh"
+#include "lapack/flops.hh"
+#include "print_matrix.hh"
+#include "error.hh"
+#include "lapacke_wrappers.hh"
+
+#include <vector>
+
+//------------------------------------------------------------------------------
+template< typename scalar_t >
+void test_lae2_work( Params& params, bool run )
+{
+    using real_t = blas::real_type< scalar_t >;
+    using blas::conj;
+    using lapack::Job, lapack::Uplo;
+
+    // Constants
+    const real_t eps = std::numeric_limits< real_t >::epsilon();
+
+    // get & mark input values
+    params.dim.m() = 2;
+    params.dim.n() = 2;
+    real_t tol = params.tol() * eps;
+    int verbose = params.verbose();
+    params.matrix.mark();
+
+    // mark non-standard output values
+    params.error.name( "Lambda" );
+
+    if (! run)
+        return;
+
+    //---------- setup
+    int64_t n = 2;
+    int64_t lda = 2;
+    std::vector< scalar_t > A( lda*n );
+
+    lapack::generate_matrix( params.matrix, n, n, &A[0], lda );
+
+    // A = [ a        b ], stored column-wise.
+    //     [ conj(b)  c ]
+    scalar_t a = A[ 0 ];
+    scalar_t b = A[ 2 ];
+    scalar_t c = A[ 3 ];
+    A[ 1 ] = conj( b );
+
+    real_t rt1, rt2, rt1_ref, rt2_ref, cs1;
+    scalar_t sn1;
+
+    if (verbose >= 2) {
+        printf( "A = " ); print_matrix( n, n, &A[0], lda );
+    }
+
+    //---------- run test
+    testsweeper::flush_cache( params.cache() );
+    double time = testsweeper::get_wtime();
+    // no info returned
+    lapack::lae2( a, b, c, &rt1, &rt2 );
+    time = testsweeper::get_wtime() - time;
+
+    params.time() = time;
+
+    std::vector< real_t > Lambda{ rt1, rt2 };
+
+    if (verbose >= 2) {
+        printf( "Lambda = " ); print_vector( n, &Lambda[0], 1 );
+    }
+
+    if (params.check() == 'y') {
+        //---------- run reference, using laev2
+        testsweeper::flush_cache( params.cache() );
+        time = testsweeper::get_wtime();
+        lapack::laev2( a, b, c, &rt1_ref, &rt2_ref, &cs1, &sn1 );
+        time = testsweeper::get_wtime() - time;
+
+        params.ref_time() = time;
+
+        //---------- check error compared to reference
+        std::vector< real_t > Lambda_ref{ rt1_ref, rt2_ref };
+        real_t error = rel_error( Lambda, Lambda_ref );
+        params.error() = error;
+        params.okay() = (error < tol);
+    }
+}
+
+//------------------------------------------------------------------------------
+void test_lae2( Params& params, bool run )
+{
+    switch (params.datatype()) {
+        case testsweeper::DataType::Single:
+            test_lae2_work< float >( params, run );
+            break;
+
+        case testsweeper::DataType::Double:
+            test_lae2_work< double >( params, run );
+            break;
+
+        default:
+            throw std::runtime_error( "unknown datatype" );
+            break;
+    }
+}
diff --git a/tools/header_gen.py b/tools/header_gen.py
@@ -145,12 +145,15 @@
                 # end
             # end
         # end
-        print( '#define LAPACK_' + func + ' LAPACK_GLOBAL(' + func + ',' + func.upper() + ')', file=output )
-        print( retval + ' LAPACK_' + func + '(\n    ' + ', '.join( args ) + '\n);\n', file=output )
+        print( '#define LAPACK_' + func + ' LAPACK_GLOBAL( ' + func + ', '
+               + func.upper() + ' )', file=output )
+        print( retval + ' LAPACK_' + func + '(\n    ' + ', '.join( args )
+               + '\n);\n', file=output )
     else:
         print( '// skipping, file not found:', filename )
         print( '// skipping, file not found:', filename, file=output )
-        print( '// #define LAPACK_' + func + ' LAPACK_GLOBAL(' + func + ',' + func.upper() + ')', file=output )
+        print( '// #define LAPACK_' + func + ' LAPACK_GLOBAL(' + func + ','
+               + func.upper() + ')', file=output )
         print( '// void LAPACK_' + func + '( ... );\n', file=output )
     # end
 # end