diff --git a/NEWS.md b/NEWS.md
index 8c6265679ef..aeb6898fea5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -13,6 +13,8 @@ for human readability.
   and [NLsolve.jl](https://github.com/JuliaNLSolvers/NLsolve.jl) ([#2008])
 - `LobattoLegendreBasis` and related datastructures made fully floating-type general,
   enabling calculations with higher than double (`Float64`) precision ([#2128])
+- New time integrator `PairedExplicitRK4`, implementing the fourth-order paired explicit Runge-Kutta
+  method with [Convex.jl](https://github.com/jump-dev/Convex.jl), [ECOS.jl](https://github.com/jump-dev/ECOS.jl) ([#2147])
 
 #### Changed
 
diff --git a/docs/src/time_integration.md b/docs/src/time_integration.md
index 3cdb7fa75ae..9afbd5f65e4 100644
--- a/docs/src/time_integration.md
+++ b/docs/src/time_integration.md
@@ -212,4 +212,5 @@ Then, the stable CFL number can be computed as described above.
 ##### Single/Standalone methods
 
 - [`Trixi.PairedExplicitRK2`](@ref): Second-order PERK method with at least two stages.
-- [`Trixi.PairedExplicitRK3`](@ref): Third-order PERK method with at least three stages.
\ No newline at end of file
+- [`Trixi.PairedExplicitRK3`](@ref): Third-order PERK method with at least three stages.
+- [`Trixi.PairedExplicitRK4`](@ref): Fourth-order PERK method with at least five stages.
\ No newline at end of file
diff --git a/examples/structured_2d_dgsem/elixir_euler_vortex_perk4.jl b/examples/structured_2d_dgsem/elixir_euler_vortex_perk4.jl
new file mode 100644
index 00000000000..e35dec5854a
--- /dev/null
+++ b/examples/structured_2d_dgsem/elixir_euler_vortex_perk4.jl
@@ -0,0 +1,114 @@
+# We use time integration methods implemented in Trixi.jl, but we need the `CallbackSet`
+using OrdinaryDiffEq: CallbackSet
+using Trixi
+
+# Ratio of specific heats
+gamma = 1.4
+equations = CompressibleEulerEquations2D(gamma)
+
+solver = DGSEM(polydeg = 3, surface_flux = flux_hllc)
+
+"""
+    initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D)
+
+The classical isentropic vortex test case as presented in Section 5.1 of
+
+- Brian Vermeire (2019).
+  Paired Explicit Runge-Kutta Schemes for Stiff Systems of Equations
+  [DOI:10.1016/j.jcp.2019.05.014](https://doi.org/10.1016/j.jcp.2019.05.014)
+  https://spectrum.library.concordia.ca/id/eprint/985444/1/Paired-explicit-Runge-Kutta-schemes-for-stiff-sy_2019_Journal-of-Computation.pdf
+"""
+function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D)
+    # Evaluate error after full domain traversion
+    if t == t_end()
+        t = zero(t)
+    end
+
+    RealT = eltype(x)
+    # Initial center of the vortex
+    inicenter = SVector(0, 0)
+    # Strength of the vortex
+    S = convert(RealT, 13.5)
+    # Radius of vortex
+    R = convert(RealT, 1.5)
+    # Free-stream Mach 
+    M = convert(RealT, 0.4)
+    # Base flow
+    v1 = 1
+    v2 = 1
+    vel = SVector(v1, v2)
+
+    center = inicenter + vel * t # Advection of center
+    center = x - center          # Distance to centerpoint
+    center = SVector(center[2], -center[1])
+    r2 = center[1]^2 + center[2]^2
+
+    f = (1 - r2) / (2 * R^2)
+
+    rho = (1 - (S * M / convert(RealT, pi))^2 * (gamma - 1) * exp(2 * f) / 8)^(1 /
+                                                                               (gamma - 1))
+
+    du = S / (2 * convert(RealT, pi) * R) * exp(f) # Vel. perturbation
+    vel = vel + du * center
+    v1, v2 = vel
+
+    p = rho^gamma / (gamma * M^2)
+
+    prim = SVector(rho, v1, v2, p)
+    return prim2cons(prim, equations)
+end
+initial_condition = initial_condition_isentropic_vortex
+
+edge_length() = 20.0
+
+N_passes() = 1
+t_end() = edge_length() * N_passes()
+tspan = (0.0, t_end())
+
+coordinates_min = (-edge_length() / 2, -edge_length() / 2)
+coordinates_max = (edge_length() / 2, edge_length() / 2)
+
+cells_per_dimension = (32, 32)
+mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+ode = semidiscretize(semi, tspan)
+
+###############################################################################
+# Callbacks
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     analysis_integrals = (entropy,))
+
+# Note quite large CFL number
+stepsize_callback = StepsizeCallback(cfl = 9.1)
+
+callbacks = CallbackSet(summary_callback,
+                        stepsize_callback,
+                        analysis_callback)
+
+###############################################################################
+# set up time integration algorithm
+
+num_stages = 19
+coefficient_file = "a_" * string(num_stages) * ".txt"
+
+# Download the optimized PERK4 coefficients
+path_coeff_file = mktempdir()
+Trixi.download("https://gist.githubusercontent.com/DanielDoehring/84f266ff61f0a69a0127cec64056275e/raw/1a66adbe1b425d33daf502311ecbdd4b191b89cc/a_19.txt",
+               joinpath(path_coeff_file, coefficient_file))
+
+ode_algorithm = Trixi.PairedExplicitRK4(num_stages, path_coeff_file)
+
+###############################################################################
+# run the simulation
+
+sol = Trixi.solve(ode, ode_algorithm,
+                  dt = 42.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep = false, callback = callbacks);
+
+summary_callback()
diff --git a/examples/tree_1d_dgsem/elixir_hypdiff_nonperiodic_perk4.jl b/examples/tree_1d_dgsem/elixir_hypdiff_nonperiodic_perk4.jl
new file mode 100644
index 00000000000..55c1d38e3a6
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_hypdiff_nonperiodic_perk4.jl
@@ -0,0 +1,65 @@
+# We use time integration methods implemented in Trixi.jl, but we need the `CallbackSet`
+using OrdinaryDiffEq: CallbackSet
+using Trixi
+
+# Convex and ECOS are imported because they are used for finding the optimal time step and optimal 
+# monomial coefficients in the stability polynomial of P-ERK time integrators.
+using Convex, ECOS
+
+###############################################################################
+# semidiscretization of the hyperbolic diffusion equations
+
+equations = HyperbolicDiffusionEquations1D()
+
+initial_condition = initial_condition_poisson_nonperiodic
+
+boundary_conditions = boundary_condition_poisson_nonperiodic
+
+solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+
+coordinates_min = 0.0
+coordinates_max = 1.0
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level = 3,
+                n_cells_max = 30_000,
+                periodicity = false)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions,
+                                    source_terms = source_terms_poisson_nonperiodic)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 5.0)
+ode = semidiscretize(semi, tspan);
+
+summary_callback = SummaryCallback()
+
+resid_tol = 5.0e-12
+steady_state_callback = SteadyStateCallback(abstol = resid_tol, reltol = 0.0)
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+# Construct fourth order paired explicit Runge-Kutta method with 11 stages for given simulation setup.
+# Pass `tspan` to calculate maximum time step allowed for the bisection algorithm used 
+# in calculating the polynomial coefficients in the ODE algorithm.                                     
+ode_algorithm = Trixi.PairedExplicitRK4(11, tspan, semi)
+
+cfl_number = Trixi.calculate_cfl(ode_algorithm, ode)
+stepsize_callback = StepsizeCallback(cfl = 0.9 * cfl_number)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+sol = Trixi.solve(ode, ode_algorithm,
+                  dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/ext/TrixiConvexECOSExt.jl b/ext/TrixiConvexECOSExt.jl
index 224b1d814f0..d9e4aeedec5 100644
--- a/ext/TrixiConvexECOSExt.jl
+++ b/ext/TrixiConvexECOSExt.jl
@@ -9,7 +9,7 @@ using ECOS: Optimizer
 using LinearAlgebra: eigvals
 
 # Use functions that are to be extended and additional symbols that are not exported
-using Trixi: Trixi, undo_normalization!, bisect_stability_polynomial, @muladd
+using Trixi: Trixi, bisect_stability_polynomial, @muladd
 
 # By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
 # Since these FMAs can increase the performance of many numerical algorithms,
@@ -20,37 +20,31 @@ using Trixi: Trixi, undo_normalization!, bisect_stability_polynomial, @muladd
 
 # Undo normalization of stability polynomial coefficients by index factorial
 # relative to consistency order.
-function Trixi.undo_normalization!(gamma_opt, consistency_order, num_stage_evals)
-    for k in (consistency_order + 1):num_stage_evals
-        gamma_opt[k - consistency_order] = gamma_opt[k - consistency_order] /
-                                           factorial(k)
+function undo_normalization!(gamma_opt, num_stage_evals,
+                             num_reduced_coeffs, fac_offset)
+    for k in 1:(num_stage_evals - num_reduced_coeffs)
+        gamma_opt[k] /= factorial(k + fac_offset)
     end
-    return gamma_opt
 end
 
 # Compute stability polynomials for paired explicit Runge-Kutta up to specified consistency
 # order, including contributions from free coefficients for higher orders, and
 # return the maximum absolute value
-function stability_polynomials!(pnoms, consistency_order, num_stage_evals,
+function stability_polynomials!(pnoms, num_stage_evals,
                                 normalized_powered_eigvals_scaled,
-                                gamma)
-    num_eig_vals = length(pnoms)
-
+                                gamma, consistency_order)
     # Initialize with zero'th order (z^0) coefficient
-    for i in 1:num_eig_vals
-        pnoms[i] = 1.0
-    end
+    pnoms .= 1
 
     # First `consistency_order` terms of the exponential
     for k in 1:consistency_order
-        for i in 1:num_eig_vals
-            pnoms[i] += normalized_powered_eigvals_scaled[i, k]
-        end
+        pnoms += view(normalized_powered_eigvals_scaled, :, k)
     end
 
     # Contribution from free coefficients
     for k in (consistency_order + 1):num_stage_evals
-        pnoms += gamma[k - consistency_order] * normalized_powered_eigvals_scaled[:, k]
+        pnoms += gamma[k - consistency_order] *
+                 view(normalized_powered_eigvals_scaled, :, k)
     end
 
     # For optimization only the maximum is relevant
@@ -61,6 +55,51 @@ function stability_polynomials!(pnoms, consistency_order, num_stage_evals,
     end
 end
 
+# Specialized form of the stability polynomials for fourth-order PERK schemes.
+function stability_polynomials_PERK4!(pnoms, num_stage_evals,
+                                      normalized_powered_eigvals_scaled,
+                                      gamma, cS3)
+    # Constants arising from the particular form of Butcher tableau chosen for the 4th order PERK methods
+    # cS3 = c_{S-3}
+    k1 = 0.001055026310046423 / cS3
+    k2 = 0.03726406530405851 / cS3
+
+    # Initialize with zero'th order (z^0) coefficient
+    pnoms .= 1
+
+    # First `consistency_order` = 4 terms of the exponential
+    for k in 1:4
+        pnoms += view(normalized_powered_eigvals_scaled, :, k)
+    end
+
+    # "Fixed" term due to choice of the PERK4 Butcher tableau
+    # Required to un-do the normalization of the eigenvalues here
+    pnoms += k1 * view(normalized_powered_eigvals_scaled, :, 5) * factorial(5)
+
+    # Contribution from free coefficients
+    for k in 1:(num_stage_evals - 5)
+        pnoms += (k2 * view(normalized_powered_eigvals_scaled, :, k + 4) *
+                  gamma[k] +
+                  k1 * view(normalized_powered_eigvals_scaled, :, k + 5) *
+                  gamma[k] * (k + 5)) # Ensure same normalization of both summands
+    end
+
+    # For optimization only the maximum is relevant
+    if num_stage_evals == 5
+        return maximum(abs.(pnoms)) # If there is no variable to optimize, we need to use the broadcast operator.
+    else
+        return maximum(abs(pnoms))
+    end
+end
+
+@inline function normalize_power_eigvals!(normalized_powered_eigvals,
+                                          eig_vals,
+                                          num_stage_evals)
+    for j in 1:num_stage_evals
+        @views normalized_powered_eigvals[:, j] = eig_vals[:] .^ j ./ factorial(j)
+    end
+end
+
 #=
 The following structures and methods provide a simplified implementation to 
 discover optimal stability polynomial for a given set of `eig_vals`
@@ -70,35 +109,43 @@ and partial differential equations.
 - Ketcheson and Ahmadia (2012).
 Optimal stability polynomials for numerical integration of initial value problems
 [DOI: 10.2140/camcos.2012.7.247](https://doi.org/10.2140/camcos.2012.7.247)
+
+For the fourth-order PERK schemes, a specialized optimization routine is required, see
+- D. Doehring, L. Christmann, M. Schlottke-Lakemper, G. J. Gassner and M. Torrilhon (2024).
+ Fourth-Order Paired-Explicit Runge-Kutta Methods
+ [DOI:10.48550/arXiv.2408.05470](https://doi.org/10.48550/arXiv.2408.05470)
 =#
 
 # Perform bisection to optimize timestep for stability of the polynomial
 function Trixi.bisect_stability_polynomial(consistency_order, num_eig_vals,
                                            num_stage_evals,
                                            dtmax, dteps, eig_vals;
-                                           verbose = false)
+                                           kwargs...)
     dtmin = 0.0
     dt = -1.0
     abs_p = -1.0
 
+    if consistency_order == 4
+        # Fourth-order scheme has one additional fixed coefficient
+        num_reduced_unknown = 5
+    else # p = 2, 3
+        num_reduced_unknown = consistency_order
+    end
+
     # Construct stability polynomial for each eigenvalue
     pnoms = ones(Complex{Float64}, num_eig_vals, 1)
 
     # Init datastructure for monomial coefficients
-    gamma = Variable(num_stage_evals - consistency_order)
+    gamma = Variable(num_stage_evals - num_reduced_unknown)
 
     normalized_powered_eigvals = zeros(Complex{Float64}, num_eig_vals, num_stage_evals)
-
-    for j in 1:num_stage_evals
-        fac_j = factorial(j)
-        for i in 1:num_eig_vals
-            normalized_powered_eigvals[i, j] = eig_vals[i]^j / fac_j
-        end
-    end
+    normalize_power_eigvals!(normalized_powered_eigvals,
+                             eig_vals,
+                             num_stage_evals)
 
     normalized_powered_eigvals_scaled = similar(normalized_powered_eigvals)
 
-    if verbose
+    if kwargs[:verbose]
         println("Start optimization of stability polynomial \n")
     end
 
@@ -106,37 +153,55 @@ function Trixi.bisect_stability_polynomial(consistency_order, num_eig_vals,
     while dtmax - dtmin > dteps
         dt = 0.5 * (dtmax + dtmin)
 
-        # Compute stability polynomial for current timestep
         for k in 1:num_stage_evals
-            dt_k = dt^k
-            for i in 1:num_eig_vals
-                normalized_powered_eigvals_scaled[i, k] = dt_k *
-                                                          normalized_powered_eigvals[i,
-                                                                                     k]
-            end
+            @views normalized_powered_eigvals_scaled[:, k] = dt^k .*
+                                                             normalized_powered_eigvals[:,
+                                                                                        k]
         end
 
-        # Use last optimal values for gamma in (potentially) next iteration
-        problem = minimize(stability_polynomials!(pnoms, consistency_order,
-                                                  num_stage_evals,
-                                                  normalized_powered_eigvals_scaled,
-                                                  gamma))
-
-        solve!(problem,
-               # Parameters taken from default values for EiCOS
-               MOI.OptimizerWithAttributes(Optimizer, "gamma" => 0.99,
-                                           "delta" => 2e-7,
-                                           "feastol" => 1e-9,
-                                           "abstol" => 1e-9,
-                                           "reltol" => 1e-9,
-                                           "feastol_inacc" => 1e-4,
-                                           "abstol_inacc" => 5e-5,
-                                           "reltol_inacc" => 5e-5,
-                                           "nitref" => 9,
-                                           "maxit" => 100,
-                                           "verbose" => 3); silent = true)
-
-        abs_p = problem.optval
+        # Check if there are variables to optimize
+        if num_stage_evals - num_reduced_unknown > 0
+            # Use last optimal values for gamma in (potentially) next iteration
+            if consistency_order == 4
+                problem = minimize(stability_polynomials_PERK4!(pnoms,
+                                                                num_stage_evals,
+                                                                normalized_powered_eigvals_scaled,
+                                                                gamma, kwargs[:cS3]))
+            else # p = 2, 3
+                problem = minimize(stability_polynomials!(pnoms,
+                                                          num_stage_evals,
+                                                          normalized_powered_eigvals_scaled,
+                                                          gamma, consistency_order))
+            end
+
+            solve!(problem,
+                   # Parameters taken from default values for EiCOS
+                   MOI.OptimizerWithAttributes(Optimizer, "gamma" => 0.99,
+                                               "delta" => 2e-7,
+                                               "feastol" => 1e-9,
+                                               "abstol" => 1e-9,
+                                               "reltol" => 1e-9,
+                                               "feastol_inacc" => 1e-4,
+                                               "abstol_inacc" => 5e-5,
+                                               "reltol_inacc" => 5e-5,
+                                               "nitref" => 9,
+                                               "maxit" => 100,
+                                               "verbose" => 3); silent = true)
+
+            abs_p = problem.optval
+        else
+            if consistency_order == 4
+                abs_p = stability_polynomials_PERK4!(pnoms,
+                                                     num_stage_evals,
+                                                     normalized_powered_eigvals_scaled,
+                                                     gamma, kwargs[:cS3])
+            else
+                abs_p = stability_polynomials!(pnoms,
+                                               num_stage_evals,
+                                               normalized_powered_eigvals_scaled,
+                                               gamma, consistency_order)
+            end
+        end
 
         if abs_p < 1
             dtmin = dt
@@ -145,21 +210,24 @@ function Trixi.bisect_stability_polynomial(consistency_order, num_eig_vals,
         end
     end
 
-    if verbose
+    if kwargs[:verbose]
         println("Concluded stability polynomial optimization \n")
     end
 
-    if consistency_order - num_stage_evals != 0
+    if num_stage_evals - consistency_order > 0
         gamma_opt = evaluate(gamma)
     else
         gamma_opt = nothing # If there is no variable to optimize, return gamma_opt as nothing.
     end
 
-    # Catch case S = 3 (only one opt. variable)
+    # Catch case with only one optimization variable
     if isa(gamma_opt, Number)
         gamma_opt = [gamma_opt]
     end
 
+    undo_normalization!(gamma_opt, num_stage_evals,
+                        num_reduced_unknown, consistency_order)
+
     return gamma_opt, dt
 end
 end # @muladd
diff --git a/src/time_integration/methods_2N.jl b/src/time_integration/methods_2N.jl
index 71343b246fa..65a8aa3e26d 100644
--- a/src/time_integration/methods_2N.jl
+++ b/src/time_integration/methods_2N.jl
@@ -57,6 +57,8 @@ the low-storage explicit Runge-Kutta method of
   Third-order 2N-storage Runge-Kutta schemes with error control
   URL: https://ntrs.nasa.gov/citations/19940028444
   File: https://ntrs.nasa.gov/api/citations/19940028444/downloads/19940028444.pdf
+
+using the same interface as OrdinaryDiffEq.jl.
 """
 struct CarpenterKennedy2N43 <: SimpleAlgorithm2N
     a::SVector{4, Float64}
@@ -101,7 +103,7 @@ mutable struct SimpleIntegrator2N{RealT <: Real, uType, Params, Sol, F, Alg,
     iter::Int # current number of time steps (iteration)
     p::Params # will be the semidiscretization from Trixi.jl
     sol::Sol # faked
-    f::F
+    f::F # `rhs!` of the semidiscretization
     alg::Alg
     opts::SimpleIntegrator2NOptions
     finalstep::Bool # added for convenience
diff --git a/src/time_integration/methods_3Sstar.jl b/src/time_integration/methods_3Sstar.jl
index 51501c6189e..c2ef37d57b6 100644
--- a/src/time_integration/methods_3Sstar.jl
+++ b/src/time_integration/methods_3Sstar.jl
@@ -156,7 +156,7 @@ mutable struct SimpleIntegrator3Sstar{RealT <: Real, uType, Params, Sol, F, Alg,
     iter::Int # current number of time step (iteration)
     p::Params # will be the semidiscretization from Trixi.jl
     sol::Sol # faked
-    f::F
+    f::F # `rhs!` of the semidiscretization
     alg::Alg
     opts::SimpleIntegrator3SstarOptions
     finalstep::Bool # added for convenience
diff --git a/src/time_integration/methods_SSP.jl b/src/time_integration/methods_SSP.jl
index 33d1f164138..91bacb4a7cf 100644
--- a/src/time_integration/methods_SSP.jl
+++ b/src/time_integration/methods_SSP.jl
@@ -83,14 +83,14 @@ mutable struct SimpleIntegratorSSP{RealT <: Real, uType, Params, Sol, F, Alg,
     du::uType
     r0::uType
     t::RealT
-    tdir::RealT
+    tdir::RealT # DIRection of time integration, i.e., if one marches forward or backward in time
     dt::RealT # current time step
     dtcache::RealT # manually set time step
     iter::Int # current number of time steps (iteration)
     p::Params # will be the semidiscretization from Trixi
     sol::Sol # faked
-    f::F
-    alg::Alg
+    f::F # `rhs!` of the semidiscretization
+    alg::Alg # SimpleSSPRK33
     opts::SimpleIntegratorSSPOptions
     finalstep::Bool # added for convenience
     dtchangeable::Bool
diff --git a/src/time_integration/paired_explicit_runge_kutta/methods_PERK2.jl b/src/time_integration/paired_explicit_runge_kutta/methods_PERK2.jl
index 37d994dc9bf..2263fbb0712 100644
--- a/src/time_integration/paired_explicit_runge_kutta/methods_PERK2.jl
+++ b/src/time_integration/paired_explicit_runge_kutta/methods_PERK2.jl
@@ -5,6 +5,15 @@
 @muladd begin
 #! format: noindent
 
+function PERK2_compute_c_coeffs(num_stages, cS)
+    c = zeros(num_stages)
+    for k in 2:num_stages
+        c[k] = cS * (k - 1) / (num_stages - 1)
+    end
+
+    return c
+end
+
 # Compute the coefficients of the A matrix in the Butcher tableau using
 # stage scaling factors and monomial coefficients
 function compute_a_coeffs(num_stage_evals, stage_scaling_factors, monomial_coeffs)
@@ -24,37 +33,30 @@ end
 # using a list of eigenvalues
 function compute_PairedExplicitRK2_butcher_tableau(num_stages, eig_vals, tspan,
                                                    bS, cS; verbose = false)
-    # c Vector from Butcher Tableau (defines timestep per stage)
-    c = zeros(num_stages)
-    for k in 2:num_stages
-        c[k] = cS * (k - 1) / (num_stages - 1)
-    end
+    c = PERK2_compute_c_coeffs(num_stages, cS)
     stage_scaling_factors = bS * reverse(c[2:(end - 1)])
 
     # - 2 Since first entry of A is always zero (explicit method) and second is given by c_2 (consistency)
-    coeffs_max = num_stages - 2
+    num_coeffs_max = num_stages - 2
 
-    a_matrix = zeros(2, coeffs_max)
+    a_matrix = zeros(2, num_coeffs_max)
     a_matrix[1, :] = c[3:end]
 
-    consistency_order = 2
-
     dtmax = tspan[2] - tspan[1]
     dteps = 1e-9 # Hyperparameter of the optimization, might be too large for systems requiring very small timesteps
 
     num_eig_vals, eig_vals = filter_eig_vals(eig_vals; verbose)
 
+    consistency_order = 2
     monomial_coeffs, dt_opt = bisect_stability_polynomial(consistency_order,
                                                           num_eig_vals, num_stages,
                                                           dtmax,
                                                           dteps,
                                                           eig_vals; verbose)
 
-    if num_stages != consistency_order
-        monomial_coeffs = undo_normalization!(monomial_coeffs, consistency_order,
-                                              num_stages)
+    if num_coeffs_max > 0
         num_monomial_coeffs = length(monomial_coeffs)
-        @assert num_monomial_coeffs == coeffs_max
+        @assert num_monomial_coeffs == num_coeffs_max
         A = compute_a_coeffs(num_stages, stage_scaling_factors, monomial_coeffs)
         a_matrix[1, :] -= A
         a_matrix[2, :] = A
@@ -68,31 +70,29 @@ end
 function compute_PairedExplicitRK2_butcher_tableau(num_stages,
                                                    base_path_monomial_coeffs::AbstractString,
                                                    bS, cS)
-    # c Vector form Butcher Tableau (defines timestep per stage)
-    c = zeros(num_stages)
-    for k in 2:num_stages
-        c[k] = cS * (k - 1) / (num_stages - 1)
-    end
+    c = PERK2_compute_c_coeffs(num_stages, cS)
     stage_scaling_factors = bS * reverse(c[2:(end - 1)])
 
     # - 2 Since first entry of A is always zero (explicit method) and second is given by c_2 (consistency)
-    coeffs_max = num_stages - 2
+    num_coeffs_max = num_stages - 2
 
-    a_matrix = zeros(2, coeffs_max)
+    a_matrix = zeros(2, num_coeffs_max)
     a_matrix[1, :] = c[3:end]
 
-    path_monomial_coeffs = joinpath(base_path_monomial_coeffs,
-                                    "gamma_" * string(num_stages) * ".txt")
+    if num_coeffs_max > 0
+        path_monomial_coeffs = joinpath(base_path_monomial_coeffs,
+                                        "gamma_" * string(num_stages) * ".txt")
 
-    @assert isfile(path_monomial_coeffs) "Couldn't find file"
-    monomial_coeffs = readdlm(path_monomial_coeffs, Float64)
-    num_monomial_coeffs = size(monomial_coeffs, 1)
+        @assert isfile(path_monomial_coeffs) "Couldn't find file"
+        monomial_coeffs = readdlm(path_monomial_coeffs, Float64)
+        num_monomial_coeffs = size(monomial_coeffs, 1)
 
-    @assert num_monomial_coeffs == coeffs_max
-    A = compute_a_coeffs(num_stages, stage_scaling_factors, monomial_coeffs)
+        @assert num_monomial_coeffs == num_coeffs_max
+        A = compute_a_coeffs(num_stages, stage_scaling_factors, monomial_coeffs)
 
-    a_matrix[1, :] -= A
-    a_matrix[2, :] = A
+        a_matrix[1, :] -= A
+        a_matrix[2, :] = A
+    end
 
     return a_matrix, c
 end
@@ -113,23 +113,24 @@ end
        In this case the optimal CFL number cannot be computed and the [`StepsizeCallback`](@ref) cannot be used.
     - `tspan`: Time span of the simulation.
     - `semi` (`AbstractSemidiscretization`): Semidiscretization setup.
-    -  `eig_vals` (`Vector{ComplexF64}`): Eigenvalues of the Jacobian of the right-hand side (rhs) of the ODEProblem after the
+    - `eig_vals` (`Vector{ComplexF64}`): Eigenvalues of the Jacobian of the right-hand side (rhs) of the ODEProblem after the
       equation has been semidiscretized.
     - `verbose` (`Bool`, optional): Verbosity flag, default is false.
-    - `bS` (`Float64`, optional): Value of b in the Butcher tableau at b_s, when 
-      s is the number of stages, default is 1.0.
-    - `cS` (`Float64`, optional): Value of c in the Butcher tableau at c_s, when
-      s is the number of stages, default is 0.5.
+    - `bS` (`Float64`, optional): Value of $b_S$ in the Butcher tableau, where
+      $S$ is the number of stages. Default is `1.0`.
+    - `cS` (`Float64`, optional): Value of $c_S$ in the Butcher tableau, where
+      $S$ is the number of stages. Default is `0.5`.
 
 The following structures and methods provide a minimal implementation of
 the second-order paired explicit Runge-Kutta (PERK) method
-optimized for a certain simulation setup (PDE, IC & BC, Riemann Solver, DG Solver).
+optimized for a certain simulation setup (PDE, IC & BCs, Riemann Solver, DG Solver).
 
 - Brian Vermeire (2019).
   Paired explicit Runge-Kutta schemes for stiff systems of equations
   [DOI: 10.1016/j.jcp.2019.05.014](https://doi.org/10.1016/j.jcp.2019.05.014)
 
-Note: To use this integrator, the user must import the `Convex` and `ECOS` packages.
+Note: To use this integrator, the user must import the `Convex` and `ECOS` packages
+unless the coefficients are provided in a "gamma_<num_stages>.txt" file.
 """
 struct PairedExplicitRK2 <: AbstractPairedExplicitRKSingle
     num_stages::Int
@@ -147,6 +148,7 @@ end
 function PairedExplicitRK2(num_stages, base_path_monomial_coeffs::AbstractString,
                            dt_opt = nothing,
                            bS = 1.0, cS = 0.5)
+    @assert num_stages>=2 "PERK2 requires at least two stages"
     # If the user has the monomial coefficients, they also must have the optimal time step
     a_matrix, c = compute_PairedExplicitRK2_butcher_tableau(num_stages,
                                                             base_path_monomial_coeffs,
@@ -160,6 +162,7 @@ end
 function PairedExplicitRK2(num_stages, tspan, semi::AbstractSemidiscretization;
                            verbose = false,
                            bS = 1.0, cS = 0.5)
+    @assert num_stages>=2 "PERK2 requires at least two stages"
     eig_vals = eigvals(jacobian_ad_forward(semi))
 
     return PairedExplicitRK2(num_stages, tspan, eig_vals; verbose, bS, cS)
@@ -170,6 +173,7 @@ end
 function PairedExplicitRK2(num_stages, tspan, eig_vals::Vector{ComplexF64};
                            verbose = false,
                            bS = 1.0, cS = 0.5)
+    @assert num_stages>=2 "PERK2 requires at least two stages"
     a_matrix, c, dt_opt = compute_PairedExplicitRK2_butcher_tableau(num_stages,
                                                                     eig_vals, tspan,
                                                                     bS, cS;
@@ -189,13 +193,13 @@ mutable struct PairedExplicitRK2Integrator{RealT <: Real, uType, Params, Sol, F,
     du::uType
     u_tmp::uType
     t::RealT
-    tdir::RealT
+    tdir::RealT # DIRection of time integration, i.e., if one marches forward or backward in time
     dt::RealT # current time step
     dtcache::RealT # manually set time step
     iter::Int # current number of time steps (iteration)
     p::Params # will be the semidiscretization from Trixi
     sol::Sol # faked
-    f::F
+    f::F # `rhs!` of the semidiscretization
     alg::Alg # This is our own class written above; Abbreviation for ALGorithm
     opts::PairedExplicitRKOptions
     finalstep::Bool # added for convenience
diff --git a/src/time_integration/paired_explicit_runge_kutta/methods_PERK3.jl b/src/time_integration/paired_explicit_runge_kutta/methods_PERK3.jl
index 5be1d498d59..35b0a772919 100644
--- a/src/time_integration/paired_explicit_runge_kutta/methods_PERK3.jl
+++ b/src/time_integration/paired_explicit_runge_kutta/methods_PERK3.jl
@@ -6,7 +6,7 @@
 #! format: noindent
 
 # Initialize Butcher array abscissae c for PairedExplicitRK3 based on SSPRK33 base method
-function compute_c_coeffs(num_stages, cS2)
+function PERK3_compute_c_coeffs(num_stages, cS2)
     c = zeros(eltype(cS2), num_stages)
 
     # Last timesteps as for SSPRK33, see motivation in Section 3.3 of
@@ -28,18 +28,17 @@ function compute_PairedExplicitRK3_butcher_tableau(num_stages, tspan,
                                                    eig_vals::Vector{ComplexF64};
                                                    verbose = false, cS2)
     # Initialize array of c
-    c = compute_c_coeffs(num_stages, cS2)
+    c = PERK3_compute_c_coeffs(num_stages, cS2)
 
     # Initialize the array of our solution
     a_unknown = zeros(num_stages - 2)
 
-    # Calculate coefficients of the stability polynomial in monomial form
-    consistency_order = 3
     dtmax = tspan[2] - tspan[1]
     dteps = 1.0f-9
 
     num_eig_vals, eig_vals = filter_eig_vals(eig_vals; verbose)
 
+    consistency_order = 3
     monomial_coeffs, dt_opt = bisect_stability_polynomial(consistency_order,
                                                           num_eig_vals, num_stages,
                                                           dtmax, dteps,
@@ -49,9 +48,6 @@ function compute_PairedExplicitRK3_butcher_tableau(num_stages, tspan,
     if num_stages == consistency_order
         a_unknown = [0.25] # Use classic SSPRK33 (Shu-Osher) Butcher Tableau
     else
-        monomial_coeffs = undo_normalization!(monomial_coeffs, consistency_order,
-                                              num_stages)
-
         # Solve the nonlinear system of equations from monomial coefficient and
         # Butcher array abscissae c to find Butcher matrix A
         # This function is extended in TrixiNLsolveExt.jl
@@ -75,12 +71,12 @@ function compute_PairedExplicitRK3_butcher_tableau(num_stages,
                                                    cS2)
 
     # Initialize array of c
-    c = compute_c_coeffs(num_stages, cS2)
+    c = PERK3_compute_c_coeffs(num_stages, cS2)
 
     # - 2 Since First entry of A is always zero (explicit method) and second is given by c_2 (consistency)
-    a_coeffs_max = num_stages - 2
+    num_a_coeffs_max = num_stages - 2
 
-    a_matrix = zeros(2, a_coeffs_max)
+    a_matrix = zeros(2, num_a_coeffs_max)
     a_matrix[1, :] = c[3:end]
 
     path_a_coeffs = joinpath(base_path_a_coeffs,
@@ -90,7 +86,7 @@ function compute_PairedExplicitRK3_butcher_tableau(num_stages,
     a_coeffs = readdlm(path_a_coeffs, Float64)
     num_a_coeffs = size(a_coeffs, 1)
 
-    @assert num_a_coeffs == a_coeffs_max
+    @assert num_a_coeffs == num_a_coeffs_max
     # Fill A-matrix in PERK style
     a_matrix[1, :] -= a_coeffs
     a_matrix[2, :] = a_coeffs
@@ -115,15 +111,15 @@ end
        In this case the optimal CFL number cannot be computed and the [`StepsizeCallback`](@ref) cannot be used.
     - `tspan`: Time span of the simulation.
     - `semi` (`AbstractSemidiscretization`): Semidiscretization setup.
-    -  `eig_vals` (`Vector{ComplexF64}`): Eigenvalues of the Jacobian of the right-hand side (rhs) of the ODEProblem after the
+    - `eig_vals` (`Vector{ComplexF64}`): Eigenvalues of the Jacobian of the right-hand side (rhs) of the ODEProblem after the
       equation has been semidiscretized.
     - `verbose` (`Bool`, optional): Verbosity flag, default is false.
-    - `cS2` (`Float64`, optional): Value of c in the Butcher tableau at c_{s-2}, when
-      s is the number of stages, default is 1.0f0.
+    - `cS2` (`Float64`, optional): Value of $c_{S-2}$ in the Butcher tableau, where
+      $S$ is the number of stages. Default is `1.0f0`.
 
 The following structures and methods provide an implementation of
 the third-order paired explicit Runge-Kutta (PERK) method
-optimized for a certain simulation setup (PDE, IC & BC, Riemann Solver, DG Solver).
+optimized for a certain simulation setup (PDE, IC & BCs, Riemann Solver, DG Solver).
 The original paper is
 - Nasab, Vermeire (2022)
 Third-order Paired Explicit Runge-Kutta schemes for stiff systems of equations
@@ -133,7 +129,8 @@ While the changes to SSPRK33 base-scheme are described in
 Multirate Time-Integration based on Dynamic ODE Partitioning through Adaptively Refined Meshes for Compressible Fluid Dynamics
 [DOI: 10.1016/j.jcp.2024.113223](https://doi.org/10.1016/j.jcp.2024.113223)
 
-Note: To use this integrator, the user must import the `Convex`, `ECOS`, and `NLsolve` packages.
+Note: To use this integrator, the user must import the `Convex`, `ECOS`, and `NLsolve` packages
+unless the A-matrix coefficients are provided in a "a_<num_stages>.txt" file.
 """
 struct PairedExplicitRK3 <: AbstractPairedExplicitRKSingle
     num_stages::Int # S
@@ -148,6 +145,7 @@ end
 function PairedExplicitRK3(num_stages, base_path_a_coeffs::AbstractString,
                            dt_opt = nothing;
                            cS2 = 1.0f0)
+    @assert num_stages>=3 "PERK3 requires at least three stages"
     a_matrix, c = compute_PairedExplicitRK3_butcher_tableau(num_stages,
                                                             base_path_a_coeffs;
                                                             cS2)
@@ -158,6 +156,7 @@ end
 # Constructor that computes Butcher matrix A coefficients from a semidiscretization
 function PairedExplicitRK3(num_stages, tspan, semi::AbstractSemidiscretization;
                            verbose = false, cS2 = 1.0f0)
+    @assert num_stages>=3 "PERK3 requires at least three stages"
     eig_vals = eigvals(jacobian_ad_forward(semi))
 
     return PairedExplicitRK3(num_stages, tspan, eig_vals; verbose, cS2)
@@ -166,6 +165,7 @@ end
 # Constructor that calculates the coefficients with polynomial optimizer from a list of eigenvalues
 function PairedExplicitRK3(num_stages, tspan, eig_vals::Vector{ComplexF64};
                            verbose = false, cS2 = 1.0f0)
+    @assert num_stages>=3 "PERK3 requires at least three stages"
     a_matrix, c, dt_opt = compute_PairedExplicitRK3_butcher_tableau(num_stages,
                                                                     tspan,
                                                                     eig_vals;
@@ -184,13 +184,13 @@ mutable struct PairedExplicitRK3Integrator{RealT <: Real, uType, Params, Sol, F,
     du::uType
     u_tmp::uType
     t::RealT
-    tdir::RealT
+    tdir::RealT # DIRection of time integration, i.e., if one marches forward or backward in time
     dt::RealT # current time step
     dtcache::RealT # manually set time step
     iter::Int # current number of time steps (iteration)
     p::Params # will be the semidiscretization from Trixi
     sol::Sol # faked
-    f::F
+    f::F # `rhs!` of the semidiscretization
     alg::Alg # This is our own class written above; Abbreviation for ALGorithm
     opts::PairedExplicitRKOptions
     finalstep::Bool # added for convenience
@@ -227,8 +227,7 @@ function init(ode::ODEProblem, alg::PairedExplicitRK3;
     # initialize callbacks
     if callback isa CallbackSet
         for cb in callback.continuous_callbacks
-            throw(ArgumentError("Continuous callbacks are unsupported with paired explicit Runge-
-            Kutta methods."))
+            throw(ArgumentError("Continuous callbacks are unsupported with paired explicit Runge-Kutta methods."))
         end
         for cb in callback.discrete_callbacks
             cb.initialize(cb, integrator.u, integrator.t, integrator)
@@ -268,9 +267,7 @@ function step!(integrator::PairedExplicitRK3Integrator)
         end
 
         # We need to store `du` of the S-1 stage in `kS1` for the final update:
-        @threaded for i in eachindex(integrator.u)
-            integrator.kS1[i] = integrator.du[i]
-        end
+        integrator.kS1 .= integrator.du
 
         PERK_ki!(integrator, prob.p, alg, alg.num_stages)
 
diff --git a/src/time_integration/paired_explicit_runge_kutta/methods_PERK4.jl b/src/time_integration/paired_explicit_runge_kutta/methods_PERK4.jl
new file mode 100644
index 00000000000..6c097a0e2ee
--- /dev/null
+++ b/src/time_integration/paired_explicit_runge_kutta/methods_PERK4.jl
@@ -0,0 +1,328 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+function PERK4_compute_c_coeffs(num_stages, cS3)
+    c = ones(num_stages) # Best internal stability properties
+    c[1] = 0.0
+
+    c[num_stages - 3] = cS3
+    c[num_stages - 2] = 0.479274057836310
+    c[num_stages - 1] = sqrt(3) / 6 + 0.5
+    c[num_stages] = -sqrt(3) / 6 + 0.5
+
+    return c
+end
+
+# Constant/non-optimized part of the Butcher matrix
+function PERK4_a_matrix_constant(cS3)
+    return [(0.479274057836310-(0.114851811257441 / cS3)) 0.1397682537005989 0.1830127018922191
+            0.114851811257441/cS3 0.648906880894214 0.028312163512968]
+end
+
+# Compute the Butcher tableau for a paired explicit Runge-Kutta method order 4
+# using a list of eigenvalues
+function compute_PairedExplicitRK4_butcher_tableau(num_stages, tspan,
+                                                   eig_vals::Vector{ComplexF64};
+                                                   verbose = false, cS3)
+    c = PERK4_compute_c_coeffs(num_stages, cS3)
+
+    num_coeffs_max = num_stages - 5
+    a_matrix = zeros(2, num_coeffs_max)
+
+    dtmax = tspan[2] - tspan[1]
+    dteps = 1.0f-9
+
+    num_eig_vals, eig_vals = filter_eig_vals(eig_vals; verbose)
+
+    consistency_order = 4
+    monomial_coeffs, dt_opt = bisect_stability_polynomial(consistency_order,
+                                                          num_eig_vals,
+                                                          num_stages,
+                                                          dtmax, dteps,
+                                                          eig_vals;
+                                                          verbose, cS3)
+
+    if num_stages > 5
+        a_unknown = copy(monomial_coeffs)
+        for i in 5:(num_stages - 2)
+            a_unknown[i - 3] /= monomial_coeffs[i - 4]
+        end
+        reverse!(a_unknown)
+
+        a_matrix = zeros(2, num_coeffs_max)
+        a_matrix[1, :] = c[3:(num_stages - 3)]
+
+        a_matrix[1, :] -= a_unknown
+        a_matrix[2, :] = a_unknown
+    end
+
+    a_matrix_constant = PERK4_a_matrix_constant(cS3)
+
+    return a_matrix, a_matrix_constant, c, dt_opt
+end
+
+# Compute the Butcher tableau for a paired explicit Runge-Kutta method order 4
+# using provided values of coefficients a in A-matrix of Butcher tableau
+function compute_PairedExplicitRK4_butcher_tableau(num_stages,
+                                                   base_path_a_coeffs::AbstractString,
+                                                   cS3)
+    c = PERK4_compute_c_coeffs(num_stages, cS3)
+
+    num_coeffs_max = num_stages - 5
+
+    a_matrix = zeros(2, num_coeffs_max)
+    a_matrix[1, :] = c[3:(num_stages - 3)]
+
+    path_a_coeffs = joinpath(base_path_a_coeffs,
+                             "a_" * string(num_stages) * ".txt")
+
+    @assert isfile(path_a_coeffs) "Couldn't find file $path_a_coeffs"
+    if num_stages > 5
+        a_coeffs = readdlm(path_a_coeffs, Float64)
+        num_a_coeffs = size(a_coeffs, 1)
+
+        @assert num_a_coeffs == num_coeffs_max
+        if num_coeffs_max > 0
+            a_matrix[1, :] -= a_coeffs
+            a_matrix[2, :] = a_coeffs
+        end
+    end
+
+    a_matrix_constant = PERK4_a_matrix_constant(cS3)
+
+    return a_matrix, a_matrix_constant, c
+end
+
+@doc raw"""
+    PairedExplicitRK4(num_stages, base_path_a_coeffs::AbstractString, dt_opt = nothing;
+                      cS3 = 1.0f0)
+    PairedExplicitRK4(num_stages, tspan, semi::AbstractSemidiscretization;
+                      verbose = false, cS3 = 1.0f0)
+    PairedExplicitRK4(num_stages, tspan, eig_vals::Vector{ComplexF64};
+                      verbose = false, cS3 = 1.0f0)
+
+    Parameters:
+    - `num_stages` (`Int`): Number of stages in the paired explicit Runge-Kutta (P-ERK) method.
+    - `base_path_a_coeffs` (`AbstractString`): Path to a file containing some coefficients in the A-matrix in 
+      the Butcher tableau of the Runge Kutta method.
+      The matrix should be stored in a text file at `joinpath(base_path_a_coeffs, "a_$(num_stages).txt")` and separated by line breaks.
+    - `dt_opt` (`Float64`, optional): Optimal time step size for the simulation setup. Can be `nothing` if it is unknown. 
+       In this case the optimal CFL number cannot be computed and the [`StepsizeCallback`](@ref) cannot be used.
+    - `tspan`: Time span of the simulation.
+    - `semi` (`AbstractSemidiscretization`): Semidiscretization setup.
+    - `eig_vals` (`Vector{ComplexF64}`): Eigenvalues of the Jacobian of the right-hand side (rhs) of the `ODEProblem` after the
+      equation has been semidiscretized.
+    - `cS3` (`Float64`, optional): Value of $c_{S-3}$ in the Butcher tableau, where
+      $S$ is the number of stages. Default is `1.0f0`.
+
+The following structures and methods provide an implementation of
+the fourth-order paired explicit Runge-Kutta (P-ERK) method
+optimized for a certain simulation setup (PDE, IC & BCs, Riemann Solver, DG Solver).
+The method has been proposed in 
+- D. Doehring, L. Christmann, M. Schlottke-Lakemper, G. J. Gassner and M. Torrilhon (2024).
+  Fourth-Order Paired-Explicit Runge-Kutta Methods
+  [DOI:10.48550/arXiv.2408.05470](https://doi.org/10.48550/arXiv.2408.05470)
+"""
+struct PairedExplicitRK4 <: AbstractPairedExplicitRKSingle
+    num_stages::Int # S
+
+    # Optimized coefficients, i.e., flexible part of the Butcher array matrix A.
+    a_matrix::Union{Matrix{Float64}, Nothing}
+    # This part of the Butcher array matrix A is constant for all PERK methods, i.e., 
+    # regardless of the optimized coefficients.
+    a_matrix_constant::Matrix{Float64}
+    c::Vector{Float64}
+
+    dt_opt::Union{Float64, Nothing}
+end
+
+# Constructor for previously computed A Coeffs
+function PairedExplicitRK4(num_stages, base_path_a_coeffs::AbstractString,
+                           dt_opt = nothing;
+                           cS3 = 1.0f0)  # Default value for best internal stability
+    @assert num_stages>=5 "PERK4 requires at least five stages"
+    a_matrix, a_matrix_constant, c = compute_PairedExplicitRK4_butcher_tableau(num_stages,
+                                                                               base_path_a_coeffs,
+                                                                               cS3)
+
+    return PairedExplicitRK4(num_stages, a_matrix, a_matrix_constant, c, dt_opt)
+end
+
+# Constructor that computes Butcher matrix A coefficients from a semidiscretization
+function PairedExplicitRK4(num_stages, tspan, semi::AbstractSemidiscretization;
+                           verbose = false, cS3 = 1.0f0)
+    @assert num_stages>=5 "PERK4 requires at least five stages"
+    eig_vals = eigvals(jacobian_ad_forward(semi))
+
+    return PairedExplicitRK4(num_stages, tspan, eig_vals; verbose, cS3)
+end
+
+# Constructor that calculates the coefficients with polynomial optimizer from a list of eigenvalues
+function PairedExplicitRK4(num_stages, tspan, eig_vals::Vector{ComplexF64};
+                           verbose = false, cS3 = 1.0f0)
+    @assert num_stages>=5 "PERK4 requires at least five stages"
+    a_matrix, a_matrix_constant, c, dt_opt = compute_PairedExplicitRK4_butcher_tableau(num_stages,
+                                                                                       tspan,
+                                                                                       eig_vals;
+                                                                                       verbose,
+                                                                                       cS3)
+    return PairedExplicitRK4(num_stages, a_matrix, a_matrix_constant, c, dt_opt)
+end
+
+# This struct is needed to fake https://github.com/SciML/OrdinaryDiffEq.jl/blob/0c2048a502101647ac35faabd80da8a5645beac7/src/integrators/type.jl#L77
+# This implements the interface components described at
+# https://diffeq.sciml.ai/v6.8/basics/integrator/#Handing-Integrators-1
+# which are used in Trixi.jl.
+mutable struct PairedExplicitRK4Integrator{RealT <: Real, uType, Params, Sol, F, Alg,
+                                           PairedExplicitRKOptions} <:
+               AbstractPairedExplicitRKSingleIntegrator
+    u::uType
+    du::uType
+    u_tmp::uType
+    t::RealT
+    tdir::RealT # DIRection of time integration, i.e., if one marches forward or backward in time
+    dt::RealT # current time step
+    dtcache::RealT # manually set time step
+    iter::Int # current number of time steps (iteration)
+    p::Params # will be the semidiscretization from Trixi
+    sol::Sol # faked
+    f::F # `rhs!` of the semidiscretization
+    alg::Alg # This is our own class written above; Abbreviation for ALGorithm
+    opts::PairedExplicitRKOptions
+    finalstep::Bool # added for convenience
+    dtchangeable::Bool
+    force_stepfail::Bool
+    # Additional PERK register
+    k1::uType
+end
+
+function init(ode::ODEProblem, alg::PairedExplicitRK4;
+              dt, callback::Union{CallbackSet, Nothing} = nothing, kwargs...)
+    u0 = copy(ode.u0)
+    du = zero(u0)
+    u_tmp = zero(u0)
+
+    k1 = zero(u0) # Additional PERK register
+
+    t0 = first(ode.tspan)
+    tdir = sign(ode.tspan[end] - ode.tspan[1])
+    iter = 0
+
+    integrator = PairedExplicitRK4Integrator(u0, du, u_tmp, t0, tdir, dt, dt, iter,
+                                             ode.p,
+                                             (prob = ode,), ode.f, alg,
+                                             PairedExplicitRKOptions(callback,
+                                                                     ode.tspan;
+                                                                     kwargs...),
+                                             false, true, false,
+                                             k1)
+
+    # initialize callbacks
+    if callback isa CallbackSet
+        for cb in callback.continuous_callbacks
+            throw(ArgumentError("Continuous callbacks are unsupported with paired explicit Runge-Kutta methods."))
+        end
+        for cb in callback.discrete_callbacks
+            cb.initialize(cb, integrator.u, integrator.t, integrator)
+        end
+    end
+
+    return integrator
+end
+
+# Computes last three stages, i.e., i = S-2, S-1, S
+@inline function PERK4_kS2_to_kS!(integrator::PairedExplicitRK4Integrator, p, alg)
+    for stage in 1:2
+        @threaded for i in eachindex(integrator.u)
+            integrator.u_tmp[i] = integrator.u[i] +
+                                  integrator.dt *
+                                  (alg.a_matrix_constant[1, stage] *
+                                   integrator.k1[i] +
+                                   alg.a_matrix_constant[2, stage] *
+                                   integrator.du[i])
+        end
+
+        integrator.f(integrator.du, integrator.u_tmp, p,
+                     integrator.t +
+                     alg.c[alg.num_stages - 3 + stage] * integrator.dt)
+    end
+
+    # Last stage
+    @threaded for i in eachindex(integrator.u)
+        integrator.u_tmp[i] = integrator.u[i] +
+                              integrator.dt *
+                              (alg.a_matrix_constant[1, 3] * integrator.k1[i] +
+                               alg.a_matrix_constant[2, 3] * integrator.du[i])
+    end
+
+    # Store K_{S-1} in `k1`:
+    integrator.k1 .= integrator.du
+
+    integrator.f(integrator.du, integrator.u_tmp, p,
+                 integrator.t + alg.c[alg.num_stages] * integrator.dt)
+
+    @threaded for i in eachindex(integrator.u)
+        # Note that 'k1' carries the values of K_{S-1}
+        # and that we construct 'K_S' "in-place" from 'integrator.du'
+        integrator.u[i] += 0.5 * integrator.dt *
+                           (integrator.k1[i] + integrator.du[i])
+    end
+end
+
+function step!(integrator::PairedExplicitRK4Integrator)
+    @unpack prob = integrator.sol
+    @unpack alg = integrator
+    t_end = last(prob.tspan)
+    callbacks = integrator.opts.callback
+
+    @assert !integrator.finalstep
+    if isnan(integrator.dt)
+        error("time step size `dt` is NaN")
+    end
+
+    modify_dt_for_tstops!(integrator)
+
+    # if the next iteration would push the simulation beyond the end time, set dt accordingly
+    if integrator.t + integrator.dt > t_end ||
+       isapprox(integrator.t + integrator.dt, t_end)
+        integrator.dt = t_end - integrator.t
+        terminate!(integrator)
+    end
+
+    @trixi_timeit timer() "Paired Explicit Runge-Kutta ODE integration step" begin
+        PERK_k1!(integrator, prob.p)
+        PERK_k2!(integrator, prob.p, alg)
+
+        # Higher stages until "constant" stages
+        for stage in 3:(alg.num_stages - 3)
+            PERK_ki!(integrator, prob.p, alg, stage)
+        end
+
+        PERK4_kS2_to_kS!(integrator, prob.p, alg)
+    end
+
+    integrator.iter += 1
+    integrator.t += integrator.dt
+
+    @trixi_timeit timer() "Step-Callbacks" begin
+        # handle callbacks
+        if callbacks isa CallbackSet
+            for cb in callbacks.discrete_callbacks
+                if cb.condition(integrator.u, integrator.t, integrator)
+                    cb.affect!(integrator)
+                end
+            end
+        end
+    end
+
+    # respect maximum number of iterations
+    if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep
+        @warn "Interrupted. Larger maxiters is needed."
+        terminate!(integrator)
+    end
+end
+end # @muladd
diff --git a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl
index 87f067fc39d..6cadcdfe36c 100644
--- a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl
+++ b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl
@@ -9,7 +9,7 @@
 include("polynomial_optimizer.jl")
 
 # Abstract base type for both single/standalone and multi-level 
-# PERK (Paired-Explicit Runge-Kutta) time integration schemes
+# PERK (Paired Explicit Runge-Kutta) time integration schemes
 abstract type AbstractPairedExplicitRK end
 # Abstract base type for single/standalone PERK time integration schemes
 abstract type AbstractPairedExplicitRKSingle <: AbstractPairedExplicitRK end
@@ -206,5 +206,8 @@ function solve_a_butcher_coeffs_unknown! end
 
 # Basic implementation of the second-order paired explicit Runge-Kutta (PERK) method
 include("methods_PERK2.jl")
+# Slightly customized implementation of the third-order PERK method
 include("methods_PERK3.jl")
+# Basic implementation of the fourth-order PERK method
+include("methods_PERK4.jl")
 end # @muladd
diff --git a/src/time_integration/paired_explicit_runge_kutta/polynomial_optimizer.jl b/src/time_integration/paired_explicit_runge_kutta/polynomial_optimizer.jl
index bfd53ba2eaf..0598427ad05 100644
--- a/src/time_integration/paired_explicit_runge_kutta/polynomial_optimizer.jl
+++ b/src/time_integration/paired_explicit_runge_kutta/polynomial_optimizer.jl
@@ -24,5 +24,4 @@ end
 # Add definitions of functions related to polynomial optimization by Convex and ECOS here
 # such that hey can be exported from Trixi.jl and extended in the TrixiConvexECOSExt package
 # extension or by the Convex and ECOS-specific code loaded by Requires.jl
-function undo_normalization! end
 function bisect_stability_polynomial end
diff --git a/test/test_structured_2d.jl b/test/test_structured_2d.jl
index f78f127f434..7c3a97010cd 100644
--- a/test/test_structured_2d.jl
+++ b/test/test_structured_2d.jl
@@ -593,6 +593,34 @@ end
     end
 end
 
+@trixi_testset "elixir_euler_vortex_perk4.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_vortex_perk4.jl"),
+                        l2=[
+                            0.0001846244731283424,
+                            0.00042537910268029285,
+                            0.0003724909264689687,
+                            0.0026689613797051493
+                        ],
+                        linf=[
+                            0.0025031072787504716,
+                            0.009266316022570331,
+                            0.009876399281272374,
+                            0.0306915591360557
+                        ])
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        # Larger values for allowed allocations due to usage of custom 
+        # integrator which are not *recorded* for the methods from 
+        # OrdinaryDiffEq.jl
+        # Corresponding issue: https://github.com/trixi-framework/Trixi.jl/issues/1877
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 8000
+    end
+end
+
 @trixi_testset "elixir_euler_ec.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_ec.jl"),
                         l2=[
diff --git a/test/test_tree_1d_hypdiff.jl b/test/test_tree_1d_hypdiff.jl
index cd570c16708..7e1166a1d1e 100644
--- a/test/test_tree_1d_hypdiff.jl
+++ b/test/test_tree_1d_hypdiff.jl
@@ -25,6 +25,25 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
     end
 end
 
+@trixi_testset "elixir_hypdiff_nonperiodic_perk4.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_hypdiff_nonperiodic_perk4.jl"),
+                        l2=[1.3655114994521285e-7, 1.0200345014751413e-6],
+                        linf=[7.173289867656862e-7, 4.507115296537023e-6],
+                        atol=2.5e-13)
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        # Larger values for allowed allocations due to usage of custom 
+        # integrator which are not *recorded* for the methods from 
+        # OrdinaryDiffEq.jl
+        # Corresponding issue: https://github.com/trixi-framework/Trixi.jl/issues/1877
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 8000
+    end
+end
+
 @trixi_testset "elixir_hypdiff_harmonic_nonperiodic.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR,
                                  "elixir_hypdiff_harmonic_nonperiodic.jl"),
diff --git a/test/test_unit.jl b/test/test_unit.jl
index 97e32726ca7..13654a19534 100644
--- a/test/test_unit.jl
+++ b/test/test_unit.jl
@@ -1768,6 +1768,27 @@ end
                     0.31168238866709846 0.18831761133290154], atol = 1e-13)
 end
 
+@testset "PERK Single p4 Constructors" begin
+    path_coeff_file = mktempdir()
+    Trixi.download("https://gist.githubusercontent.com/warisa-r/8d93f6a3ae0635e13b9f51ee32ab7fff/raw/54dc5b14be9288e186b745facb5bbcb04d1476f8/EigenvalueList_Refined2.txt",
+                   joinpath(path_coeff_file, "spectrum.txt"))
+
+    eig_vals = readdlm(joinpath(path_coeff_file, "spectrum.txt"), ComplexF64)
+    tspan = (0.0, 1.0)
+    ode_algorithm = Trixi.PairedExplicitRK4(14, tspan, vec(eig_vals))
+
+    @test isapprox(transpose(ode_algorithm.a_matrix),
+                   [0.9935765040401348 0.0064234959598652
+                    0.9849926812139576 0.0150073187860425
+                    0.9731978940975923 0.0268021059024077
+                    0.9564664284695985 0.0435335715304015
+                    0.9319632992510594 0.0680367007489407
+                    0.8955171743167522 0.1044828256832478
+                    0.8443975130657495 0.1556024869342504
+                    0.7922561745278265 0.2077438254721735
+                    0.7722324105428290 0.2277675894571710], atol = 1e-13)
+end
+
 @testset "Sutherlands Law" begin
     function mu(u, equations)
         T_ref = 291.15