@mastersthesis{1737031,
	author = {Lando Jahn},
	title = {Optimization of an ADER-DG-Solver for Hyperbolic Equations Using the BLIS Linear Algebra Framework},
	year = {2023},

        school = {Technical University of Munich},

        month = {Aug},



        language = {en},

        abstract = {Many simulation problems can be described using hyperbolic partial differential equa-
tions. Solving these with performance in mind while not sacrificing too much in terms
of accuracy can be achieved using refined numerical solvers, such as ADER-DG.
ExaHyPE 2 can generate sophisticated C++ code that uses ADER-DG to solve such
numerical problems. It often requires many matrix multiplications, so providing an
optimized implementation for them can be very beneficial to performance. This is
currently done using an older version of the libxsmm code generator driver, which is
deprecated, so we explore the use of another implementation: BLIS. During testing,
we find that BLIS is not as performant for the smaller matrix multiplications used
throughout most problems. On the grounds of this, we also take into consideration
Eigen and a newer version of libxsmm that generates code during runtime to determine
whether they can achieve performance that is comparable to or better than the existing
option.
We explore the performance of all four implementations in exhaustive isolated perfor-
mance tests with various matrix sizes and environments. During these tests we observe
that Eigen and the newer libxsmm version can outperform the existing implementation
in certain realistically occurring situations, mainly when the existing implementation
does not provide optimizations for the specific microarchitecture. As a result of that
and the fact that BLIS may become more performant for smaller matrix multiplications
in the future, we incorporate all three of the newly explored implementations in the Ex-
aHyPE 2 generator as an addition to the one that is already included. Consequentially,
users of ExaHyPE 2 can easily have such optimizations included in their generated
ADER-DG programs by invoking an option.
In performance tests on the entire program, we find that Eigen may sometimes out-
perform the previously used libxsmm code generator driver, even in cases where it is
optimized for the underlying microarchitecture.},


	
}