173 _cols(p.
graph().array().begin(), p.
graph().array().end()),
174 _row_ptr(p.
graph().offsets().begin(), p.
graph().offsets().end()),
178 if (_bs[0] > 1 or _bs[1] > 1)
179 throw std::runtime_error(
"Block size not yet supported");
183 _off_diagonal_offset.reserve(num_diag_nnz.size());
184 std::transform(num_diag_nnz.begin(), num_diag_nnz.end(), _row_ptr.begin(),
185 std::back_inserter(_off_diagonal_offset), std::plus{});
188 const std::array local_size
189 = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
190 const std::array local_range
191 = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
192 const std::vector<std::int64_t>& ghosts1 = _index_maps[1]->ghosts();
194 const std::vector<std::int64_t>& ghosts0 = _index_maps[0]->ghosts();
195 const std::vector<int>& src_ranks = _index_maps[0]->src();
196 const std::vector<int>& dest_ranks = _index_maps[0]->dest();
200 MPI_Dist_graph_create_adjacent(_index_maps[0]->comm(), dest_ranks.size(),
201 dest_ranks.data(), MPI_UNWEIGHTED,
202 src_ranks.size(), src_ranks.data(),
203 MPI_UNWEIGHTED, MPI_INFO_NULL,
false, &comm);
208 _ghost_row_to_rank.reserve(_index_maps[0]->owners().
size());
209 for (
int r : _index_maps[0]->owners())
211 auto it = std::lower_bound(src_ranks.begin(), src_ranks.end(), r);
212 assert(it != src_ranks.end() and *it == r);
213 int pos = std::distance(src_ranks.begin(), it);
214 _ghost_row_to_rank.push_back(pos);
218 std::vector<std::int32_t> data_per_proc(src_ranks.size(), 0);
219 for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
221 assert(_ghost_row_to_rank[i] < data_per_proc.size());
222 std::size_t pos = local_size[0] + i;
223 data_per_proc[_ghost_row_to_rank[i]] += _row_ptr[pos + 1] - _row_ptr[pos];
227 _val_send_disp.resize(src_ranks.size() + 1, 0);
228 std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
229 std::next(_val_send_disp.begin()));
232 std::vector<std::int64_t> ghost_index_data(2 * _val_send_disp.back());
234 std::vector<int> insert_pos = _val_send_disp;
235 for (std::size_t i = 0; i < _ghost_row_to_rank.size(); ++i)
237 const int rank = _ghost_row_to_rank[i];
238 int row_id = local_size[0] + i;
239 for (
int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
242 const std::int32_t idx_pos = 2 * insert_pos[
rank];
245 ghost_index_data[idx_pos] = ghosts0[i];
246 if (std::int32_t col_local = _cols[j]; col_local < local_size[1])
247 ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
249 ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
251 insert_pos[
rank] += 1;
257 std::vector<std::int64_t> ghost_index_array;
258 std::vector<int> recv_disp;
260 std::vector<int> send_sizes;
261 std::transform(data_per_proc.begin(), data_per_proc.end(),
262 std::back_inserter(send_sizes),
263 [](
auto x) { return 2 * x; });
265 std::vector<int> recv_sizes(dest_ranks.size());
266 send_sizes.reserve(1);
267 recv_sizes.reserve(1);
268 MPI_Neighbor_alltoall(send_sizes.data(), 1, MPI_INT, recv_sizes.data(), 1,
269 MPI_INT, _comm.comm());
272 std::vector<int> send_disp = {0};
273 std::partial_sum(send_sizes.begin(), send_sizes.end(),
274 std::back_inserter(send_disp));
276 std::partial_sum(recv_sizes.begin(), recv_sizes.end(),
277 std::back_inserter(recv_disp));
279 ghost_index_array.resize(recv_disp.back());
280 MPI_Neighbor_alltoallv(ghost_index_data.data(), send_sizes.data(),
281 send_disp.data(), MPI_INT64_T,
282 ghost_index_array.data(), recv_sizes.data(),
283 recv_disp.data(), MPI_INT64_T, _comm.comm());
288 _val_recv_disp.resize(recv_disp.size());
289 std::transform(recv_disp.begin(), recv_disp.end(), _val_recv_disp.begin(),
290 [](
int d) { return d / 2; });
293 std::vector<std::pair<std::int64_t, std::int32_t>> global_to_local;
294 global_to_local.reserve(ghosts1.size());
295 std::int32_t local_i = local_size[1];
296 for (std::int64_t idx : ghosts1)
297 global_to_local.push_back({idx, global_to_local.size() + local_size[1]});
298 std::sort(global_to_local.begin(), global_to_local.end());
302 for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
305 const std::int32_t local_row = ghost_index_array[i] -
local_range[0][0];
306 assert(local_row >= 0 and local_row < local_size[0]);
309 std::int32_t local_col = ghost_index_array[i + 1] -
local_range[1][0];
310 if (local_col < 0 or local_col >= local_size[1])
312 const auto it = std::lower_bound(
313 global_to_local.begin(), global_to_local.end(),
314 std::pair(ghost_index_array[i + 1], -1),
315 [](
auto& a,
auto& b) { return a.first < b.first; });
316 assert(it != global_to_local.end()
317 and it->first == ghost_index_array[i + 1]);
318 local_col = it->second;
320 auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
321 auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
324 auto cit = std::lower_bound(cit0, cit1, local_col);
326 assert(*cit == local_col);
327 std::size_t d = std::distance(_cols.begin(), cit);
328 _unpack_pos.push_back(d);
424 const std::int32_t local_size0 = _index_maps[0]->size_local();
425 const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
428 std::vector<int> insert_pos = _val_send_disp;
429 std::vector<T> ghost_value_data(_val_send_disp.back());
430 for (
int i = 0; i < num_ghosts0; ++i)
432 const int rank = _ghost_row_to_rank[i];
436 const std::int32_t val_pos = insert_pos[rank];
437 std::copy(std::next(_data.data(), _row_ptr[local_size0 + i]),
438 std::next(_data.data(), _row_ptr[local_size0 + i + 1]),
439 std::next(ghost_value_data.begin(), val_pos));
441 += _row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i];
444 _ghost_value_data_in.resize(_val_recv_disp.back());
447 std::vector<int> val_send_count(_val_send_disp.size() - 1);
448 std::adjacent_difference(std::next(_val_send_disp.begin()),
449 _val_send_disp.end(), val_send_count.begin());
451 std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
452 std::adjacent_difference(std::next(_val_recv_disp.begin()),
453 _val_recv_disp.end(), val_recv_count.begin());
455 int status = MPI_Ineighbor_alltoallv(
456 ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
457 dolfinx::MPI::mpi_type<T>(), _ghost_value_data_in.data(),
458 val_recv_count.data(), _val_recv_disp.data(),
459 dolfinx::MPI::mpi_type<T>(), _comm.comm(), &_request);
460 assert(status == MPI_SUCCESS);