OGS
MatrixElementCache.h
Go to the documentation of this file.
1// SPDX-FileCopyrightText: Copyright (c) OpenGeoSys Community (opengeosys.org)
2// SPDX-License-Identifier: BSD-3-Clause
3
4#pragma once
5
6#include <range/v3/algorithm/copy.hpp>
7#include <range/v3/algorithm/count_if.hpp>
8#include <range/v3/view/filter.hpp>
9#include <range/v3/view/iota.hpp>
10#include <range/v3/view/transform.hpp>
11
12#include "BaseLib/Macros.h"
16
18{
19namespace detail
20{
21#ifdef USE_PETSC
23 GlobalIndexType const n)
24{
25 if (i == -n)
26 {
27 return 0;
28 }
29 else
30 {
31 return std::abs(i);
32 }
33}
34#else
36 GlobalIndexType const /*n*/)
37{
38 return i;
39}
40#endif
41} // namespace detail
42
43template <std::size_t Dim>
45{
49
50 MatrixElementCacheEntry(std::array<GlobalIndexType, Dim> const& indices,
51 double value)
53 {
54 }
55
57 default;
59
60 std::array<GlobalIndexType, Dim> indices;
61 double value;
62};
63
64template <std::size_t Dim>
66{
67 static_assert(Dim == 1 || Dim == 2);
68 using MatOrVec = std::conditional_t<Dim == 1, GlobalVector, GlobalMatrix>;
69
70 static constexpr std::size_t cache_capacity = 1'000'000;
71
72public:
73 MatrixElementCache(MatOrVec& mat_or_vec, Stats& stats,
74 const int num_threads)
75 : mat_or_vec_(mat_or_vec), stats_(stats), num_threads_(num_threads)
76 {
77 cache_.reserve(cache_capacity);
78 }
79
80 void add(std::vector<double> const& local_data,
81 std::vector<GlobalIndexType> const& indices)
82 {
83 if (local_data.empty())
84 {
85 return;
86 }
87
88 if (num_threads_ == 1)
89 {
90 if constexpr (Dim == 2)
91 {
92 auto const num_r_c = indices.size();
93 mat_or_vec_.add(
95 indices},
96 MathLib::toMatrix(local_data, num_r_c, num_r_c));
97 }
98 else
99 {
100 mat_or_vec_.add(indices, local_data);
101 }
102 return;
103 }
104
105 addToCache(local_data, indices);
106 }
107
109
110private:
111 void addToCache(std::vector<double> const& values,
112 std::vector<GlobalIndexType> const& indices)
113 {
114 ensureEnoughSpace(values.size());
115
116 addToCacheImpl(values, indices,
117 std::integral_constant<std::size_t, Dim>{});
118 }
119
120 // Overload for vectors.
121 void addToCacheImpl(std::vector<double> const& values,
122 std::vector<GlobalIndexType> const& indices,
123 std::integral_constant<std::size_t, 1>)
124 {
125 auto const num_r_c = indices.size();
126 auto const* const __restrict val = values.data();
127 auto const* const __restrict idx = indices.data();
128
129 // Count non-zeros first
130 auto const nonzero_count = ranges::count_if(
131 val, val + num_r_c, [](double v) { return v != 0.0; });
132
133 if (nonzero_count == 0)
134 {
135 stats_.count += num_r_c;
136 return;
137 }
138
139 auto entries =
140 ranges::views::iota(0ul, num_r_c) |
141 ranges::views::filter([val](std::size_t i)
142 { return val[i] != 0.0; }) |
143 ranges::views::transform(
144 [val, idx](std::size_t i)
145 { return MatrixElementCacheEntry<1>{{idx[i]}, val[i]}; });
146
147 auto const old_size = cache_.size();
148 cache_.resize(old_size + nonzero_count);
149 ranges::copy(entries, cache_.begin() + old_size);
150
151 stats_.count += num_r_c;
152 stats_.count_nonzero += nonzero_count;
153 }
154
155 // Overload for matrices.
156 void addToCacheImpl(std::vector<double> const& values,
157 std::vector<GlobalIndexType> const& indices,
158 std::integral_constant<std::size_t, 2>)
159 {
160 auto const num_r_c = indices.size();
161 auto const total_entries = num_r_c * num_r_c;
162
163 // Store pointers to avoid repeated dereferencing.
164 auto const* __restrict__ const idx = indices.data();
165 auto const* __restrict__ const val = values.data();
166
167 auto const old_size = cache_.size();
168 cache_.resize(old_size + total_entries);
169 auto* __restrict__ dest = cache_.data() + old_size;
170
171 for (std::size_t r = 0; r < num_r_c; ++r)
172 {
173 auto const r_global = idx[r];
174 for (std::size_t c = 0; c < num_r_c; ++c)
175 {
176 dest->indices[0] = r_global;
177 dest->indices[1] = idx[c];
178 dest->value = val[r * num_r_c + c]; // Row-major indexing
179 ++dest;
180 }
181 }
182
183 stats_.count += total_entries;
184 stats_.count_nonzero += total_entries;
185 }
186
187 void ensureEnoughSpace(std::size_t const space_needed)
188 {
189 // Fast path - inline this check
190 if (cache_.size() + space_needed <= cache_.capacity()) [[likely]]
191 {
192 return;
193 }
194
195 // Slow path - keep in separate function to avoid code bloat
196 ensureEnoughSpaceSlow(space_needed);
197 }
198
199 OGS_NO_INLINE void ensureEnoughSpaceSlow(std::size_t const space_needed)
200 {
201 if (cache_.capacity() < cache_capacity)
202 {
203 cache_.reserve(cache_capacity);
204 }
205
206 auto const size_initial = cache_.size();
207 auto const cap_initial = cache_.capacity();
208
209 if (size_initial + space_needed <= cap_initial)
210 {
211 return;
212 }
213
215
216 // ensure again that there is enough capacity (corner case, if initial
217 // capacity is too small because of whatever arcane reason)
218 auto const size_after = cache_.size();
219 auto const cap_after = cache_.capacity();
220
221 if (size_after + space_needed > cap_after)
222 {
223 cache_.reserve(size_after + 2 * space_needed);
224 }
225 }
226
228 {
229 if (cache_.empty())
230 {
231 return;
232 }
233
234#pragma omp critical
235 {
236 if constexpr (Dim == 2)
237 {
238 auto const n_cols = mat_or_vec_.getNumberOfColumns();
239
240 // TODO would be more efficient if our global matrix and vector
241 // implementations supported batch addition of matrix elements
242 // with arbitrary indices (not restricted to (n x m) shaped
243 // submatrices).
244 for (auto const [rc, value] : cache_)
245 {
246 auto const [r, c] = rc;
247
248 auto const c_no_ghost =
250
251 mat_or_vec_.add(r, c_no_ghost, value);
252 }
253 }
254 else
255 {
256 // TODO batch addition would be more efficient. That needs the
257 // refactoring of the matrix element cache.
258 for (auto const [r, value] : cache_)
259 {
260 mat_or_vec_.add(r.front(), value);
261 }
262 }
263 }
264
265 stats_.count_global += cache_.size();
266 cache_.clear();
267 }
268
269 std::vector<MatrixElementCacheEntry<Dim>> cache_;
273};
274
275template <int NumMatrices>
277
278// Specialization with 1 matrix ("Jac")
279template <>
281{
282public:
284 MultiStats<1>& stats, const int num_threads)
285 : cache_b_(b, stats.b, num_threads),
286 cache_Jac_(Jac, stats.Jac, num_threads)
287 {
288 }
289
290 void add(std::vector<double> const& local_b_data,
291 std::vector<double> const& local_Jac_data,
292 std::vector<GlobalIndexType> const& indices)
293 {
294 cache_b_.add(local_b_data, indices);
295 cache_Jac_.add(local_Jac_data, indices);
296 }
297
298private:
301};
302
303// Specialization with 2 matrices ("M", "K")
304template <>
306{
307public:
309 MultiStats<2>& stats, const int num_threads)
310 : cache_M_(M, stats.M, num_threads),
311 cache_K_(K, stats.K, num_threads),
312 cache_b_(b, stats.b, num_threads)
313 {
314 }
315
316 void add(std::vector<double> const& local_M_data,
317 std::vector<double> const& local_K_data,
318 std::vector<double> const& local_b_data,
319 std::vector<GlobalIndexType> const& indices)
320 {
321 cache_M_.add(local_M_data, indices);
322 cache_K_.add(local_K_data, indices);
323 cache_b_.add(local_b_data, indices);
324 }
325
326private:
330};
331} // namespace ProcessLib::Assembly
MathLib::EigenMatrix GlobalMatrix
MathLib::EigenVector GlobalVector
GlobalMatrix::IndexType GlobalIndexType
#define OGS_NO_INLINE
Definition Macros.h:10
void addToCacheImpl(std::vector< double > const &values, std::vector< GlobalIndexType > const &indices, std::integral_constant< std::size_t, 2 >)
static constexpr std::size_t cache_capacity
void addToCacheImpl(std::vector< double > const &values, std::vector< GlobalIndexType > const &indices, std::integral_constant< std::size_t, 1 >)
void addToCache(std::vector< double > const &values, std::vector< GlobalIndexType > const &indices)
void add(std::vector< double > const &local_data, std::vector< GlobalIndexType > const &indices)
OGS_NO_INLINE void ensureEnoughSpaceSlow(std::size_t const space_needed)
std::vector< MatrixElementCacheEntry< Dim > > cache_
void ensureEnoughSpace(std::size_t const space_needed)
MatrixElementCache(MatOrVec &mat_or_vec, Stats &stats, const int num_threads)
std::conditional_t< Dim==1, GlobalVector, GlobalMatrix > MatOrVec
void add(std::vector< double > const &local_b_data, std::vector< double > const &local_Jac_data, std::vector< GlobalIndexType > const &indices)
MultiMatrixElementCache(GlobalVector &b, GlobalMatrix &Jac, MultiStats< 1 > &stats, const int num_threads)
MultiMatrixElementCache(GlobalMatrix &M, GlobalMatrix &K, GlobalVector &b, MultiStats< 2 > &stats, const int num_threads)
void add(std::vector< double > const &local_M_data, std::vector< double > const &local_K_data, std::vector< double > const &local_b_data, std::vector< GlobalIndexType > const &indices)
Eigen::Map< const Matrix > toMatrix(std::vector< double > const &data, Eigen::MatrixXd::Index rows, Eigen::MatrixXd::Index cols)
GlobalIndexType transformToNonGhostIndex(GlobalIndexType const i, GlobalIndexType const n)
MatrixElementCacheEntry & operator=(MatrixElementCacheEntry const &)=default
MatrixElementCacheEntry(MatrixElementCacheEntry &&)=default
std::array< GlobalIndexType, Dim > indices
MatrixElementCacheEntry & operator=(MatrixElementCacheEntry &&)=default
MatrixElementCacheEntry(std::array< GlobalIndexType, Dim > const &indices, double value)
MatrixElementCacheEntry(MatrixElementCacheEntry const &)=default