8#ifndef BRICK_ARRAY_MPI_H
9#define BRICK_ARRAY_MPI_H
22 for (
unsigned long i = 0; i < size; ++i)
28 const std::vector<long> &dimlist,
const std::vector<long> &padding,
const std::vector<long> &ghost) {
33 if (neighbor.
get(dim)) {
35 st = padding[d] + dimlist[d];
36 }
else if (neighbor.
get(-(
int) dim)) {
38 st = padding[d] + ghost[d];
41 for (
unsigned i = 0; i < ghost[d]; ++i)
42 buffer_out = pack<dim - 1>(arr + arrstride[d] * (
st + i), neighbor, buffer_out,
43 arrstride, dimlist, padding, ghost);
45 for (
unsigned i = 0; i < dimlist[d]; ++i)
46 buffer_out = pack<dim - 1>(arr + arrstride[d] * (padding[d] + ghost[d] + i), neighbor, buffer_out,
47 arrstride, dimlist, padding, ghost);
55 const std::vector<long> &dimlist,
const std::vector<long> &padding,
56 const std::vector<long> &ghost) {
61 if (neighbor.
get(1)) {
63 st = padding[d] + dimlist[d];
64 }
else if (neighbor.
get(-1)) {
66 st = padding[d] + ghost[d];
70 return buffer_out + ghost[d];
72 elemcpy(buffer_out, arr + padding[d] + ghost[d], dimlist[d]);
73 return buffer_out + dimlist[d];
79 const std::vector<long> &dimlist,
const std::vector<long> &padding,
80 const std::vector<long> &ghost) {
84 int d = (int) dim - 1;
85 if (neighbor.
get(dim)) {
87 st = padding[d] + dimlist[d] + ghost[d];
88 }
else if (neighbor.
get(-(
int) dim)) {
93 for (
unsigned i = 0; i < ghost[d]; ++i)
94 buffer_recv = unpack<dim - 1>(arr + arrstride[d] * (
st + i), neighbor, buffer_recv,
95 arrstride, dimlist, padding, ghost);
97 for (
unsigned i = 0; i < dimlist[d]; ++i)
98 buffer_recv = unpack<dim - 1>(arr + arrstride[d] * (padding[d] + ghost[d] + i), neighbor, buffer_recv,
99 arrstride, dimlist, padding, ghost);
106 const std::vector<long> &dimlist,
const std::vector<long> &padding,
107 const std::vector<long> &ghost) {
112 if (neighbor.
get(1)) {
114 st = padding[d] + dimlist[d] + ghost[d];
115 }
else if (neighbor.
get(-1)) {
120 elemcpy(arr +
st, buffer_recv, ghost[d]);
121 return buffer_recv + ghost[d];
123 elemcpy(arr + padding[d] + ghost[d], buffer_recv, dimlist[d]);
124 return buffer_recv + dimlist[d];
130evalsize(
BitSet region,
const std::vector<long> &dimlist,
const std::vector<long> &ghost,
bool inner =
true) {
133 for (
int i = 1; i <= (int) dimlist.size(); ++i)
134 if (region.
get(i) || region.
get(-i))
135 size = size * ghost[i - 1];
137 size = size * (dimlist[i - 1] - (inner ? 2 * ghost[i - 1] : 0));
145template<
unsigned dim>
146void exchangeArr(
bElem *arr,
const MPI_Comm &comm, std::unordered_map<uint64_t, int> &rank_map,
147 const std::vector<long> &dimlist,
const std::vector<long> &padding,
const std::vector<long> &ghost) {
148 std::vector<BitSet> neighbors;
150 neighbors.erase(neighbors.begin() + (neighbors.size() / 2));
151 std::vector<unsigned long> tot(neighbors.size());
152 std::vector<MPI_Request> requests(neighbors.size() * 2);
153 std::vector<MPI_Status> stats(requests.size());
155 std::vector<unsigned long> arrstride(dimlist.size());
156 unsigned long stri = 1;
158 for (
int i = 0; i < arrstride.size(); ++i) {
160 stri = stri * ((padding[i] + ghost[i]) * 2 + dimlist[i]);
163 for (
int i = 0; i < (int) neighbors.size(); ++i) {
164 tot[i] = (
unsigned long)
evalsize(neighbors[i], dimlist, ghost,
false);
168 for (
int i = 0; i < (int) neighbors.size(); ++i) {
173 double st = omp_get_wtime(), ed;
175#pragma omp parallel for
176 for (
int i = 0; i < (int) neighbors.size(); ++i)
177 pack<dim>(arr, neighbors[i],
arr_buffers_out[i], arrstride, dimlist, padding, ghost);
179 ed = omp_get_wtime();
182#ifdef BARRIER_TIMESTEP
186 st = omp_get_wtime();
188 for (
int i = 0; i < (int) neighbors.size(); ++i) {
190 (
int) neighbors.size() - i - 1, comm, &(requests[i * 2]));
191 MPI_Isend(
arr_buffers_out[i], (
int) (tot[i] *
sizeof(
bElem)), MPI_CHAR, rank_map[neighbors[i].set], i, comm,
192 &(requests[i * 2 + 1]));
195 ed = omp_get_wtime();
200 MPI_Waitall(
static_cast<int>(requests.size()), requests.data(), stats.data());
202 ed = omp_get_wtime();
207#pragma omp parallel for
208 for (
int i = 0; i < (int) neighbors.size(); ++i)
209 unpack<dim>(arr, neighbors[i],
arr_buffers_recv[i], arrstride, dimlist, padding, ghost);
211 ed = omp_get_wtime();
215inline MPI_Datatype
pack_type(
BitSet neighbor,
const std::vector<long> &dimlist,
const std::vector<long> &padding,
216 const std::vector<long> &ghost) {
217 int ndims = dimlist.size();
218 std::vector<int> size(ndims), subsize(ndims), start(ndims);
219 for (
long dd = 0; dd < dimlist.size(); ++dd) {
220 long d = (long)dimlist.size() - dd - 1;
221 size[dd] = dimlist[d] + 2 * (padding[d] + ghost[d]);
224 if (neighbor.
get(dim)) {
226 start[dd] = padding[d] + dimlist[d];
227 }
else if (neighbor.
get(-(
int) dim)) {
229 start[dd] = padding[d] + ghost[d];
232 subsize[dd] = ghost[d];
234 subsize[dd] = dimlist[d];
235 start[dd] = padding[d] + ghost[d];
240 MPI_Type_create_subarray(ndims, size.data(), subsize.data(), start.data(), MPI_ORDER_C, MPI_DOUBLE, &ret);
244inline MPI_Datatype
unpack_type(
BitSet neighbor,
const std::vector<long> &dimlist,
const std::vector<long> &padding,
245 const std::vector<long> &ghost) {
246 int ndims = dimlist.size();
247 std::vector<int> size(ndims), subsize(ndims), start(ndims);
248 for (
long dd = 0; dd < dimlist.size(); ++dd) {
249 long d = (long)dimlist.size() - dd - 1;
250 size[dd] = dimlist[d] + 2 * (padding[d] + ghost[d]);
253 if (neighbor.
get(dim)) {
255 start[dd] = padding[d] + dimlist[d] + ghost[d];
256 }
else if (neighbor.
get(-(
int) dim)) {
258 start[dd] = padding[d];
261 subsize[dd] = ghost[d];
263 subsize[dd] = dimlist[d];
264 start[dd] = padding[d] + ghost[d];
269 MPI_Type_create_subarray(ndims, size.data(), subsize.data(), start.data(), MPI_ORDER_C, MPI_DOUBLE, &ret);
273template<
unsigned dim>
275 std::unordered_map<uint64_t, MPI_Datatype> &rtypemap,
276 const std::vector<long> &dimlist,
const std::vector<long> &padding,
277 const std::vector<long> &ghost) {
278 std::vector<BitSet> neighbors;
280 neighbors.erase(neighbors.begin() + (neighbors.size() / 2));
281 std::vector<MPI_Request> requests(neighbors.size() * 2);
283 for (
auto n: neighbors) {
284 MPI_Datatype MPI_rtype =
unpack_type(n, dimlist, padding, ghost);
285 MPI_Type_commit(&MPI_rtype);
286 rtypemap[n.set] = MPI_rtype;
287 MPI_Datatype MPI_stype =
pack_type(n, dimlist, padding, ghost);
288 MPI_Type_commit(&MPI_stype);
289 stypemap[n.set] = MPI_stype;
294template<
unsigned dim>
296 std::unordered_map<uint64_t, MPI_Datatype> &stypemap,
297 std::unordered_map<uint64_t, MPI_Datatype> &rtypemap) {
298 std::vector<BitSet> neighbors;
300 neighbors.erase(neighbors.begin() + (neighbors.size() / 2));
301 std::vector<MPI_Request> requests(neighbors.size() * 2);
304 MPI_Comm_rank(comm, &rank);
306 double st = omp_get_wtime(), ed;
308 for (
int i = 0; i < (int) neighbors.size(); ++i) {
309 MPI_Irecv(arr, 1, rtypemap[neighbors[i].set], rank_map[neighbors[i].set],
310 (
int) neighbors.size() - i - 1, comm, &(requests[i * 2]));
311 MPI_Isend(arr, 1, stypemap[neighbors[i].set], rank_map[neighbors[i].set], i, comm, &(requests[i * 2 + 1]));
314 ed = omp_get_wtime();
319 std::vector<MPI_Status> stats(requests.size());
320 MPI_Waitall(
static_cast<int>(requests.size()), requests.data(), stats.data());
322 ed = omp_get_wtime();
329 std::unordered_map<uint64_t, int> *
id_map;
333template<
unsigned dim>
335 const std::vector<long> &dimlist,
const std::vector<long> &padding,
336 const std::vector<long> &ghost) {
337 std::vector<BitSet> neighbors;
339 neighbors.erase(neighbors.begin() + (neighbors.size() / 2));
340 std::vector<bElem *> buffers_out(arr.size() * neighbors.size(),
nullptr);
341 std::vector<bElem *> buffers_recv(arr.size() * neighbors.size(),
nullptr);
342 std::vector<unsigned long> tot(neighbors.size());
343 std::vector<MPI_Request> requests(arr.size() * neighbors.size() * 2);
345 std::vector<unsigned long> arrstride(dimlist.size());
346 unsigned long stri = 1;
348 for (
int i = 0; i < arrstride.size(); ++i) {
350 stri = stri * ((padding[i] + ghost[i]) * 2 + dimlist[i]);
353 for (
int i = 0; i < (int) neighbors.size(); ++i) {
354 tot[i] = (
unsigned long)
evalsize(neighbors[i], dimlist, ghost,
false);
355 for (
int s = 0; s < arr.size(); ++s) {
356 buffers_recv[i + s * neighbors.size()] =
new bElem[tot[i]];
357 buffers_out[i + s * neighbors.size()] =
new bElem[tot[i]];
361 double st = omp_get_wtime(), ed;
364#pragma omp parallel for
365 for (
int i = 0; i < (int) neighbors.size(); ++i)
366 for (
int s = 0; s < arr.size(); ++s)
367 pack<dim>(arr[s].arr, neighbors[i], buffers_out[i + s * neighbors.size()], arrstride, dimlist, padding, ghost);
369 ed = omp_get_wtime();
372#ifdef BARRIER_TIMESTEP
376 st = omp_get_wtime();
378 for (
int i = 0; i < (int) neighbors.size(); ++i)
379 for (
int s = 0; s < arr.size(); ++s) {
380 MPI_Irecv(buffers_recv[i + s * neighbors.size()], (
int) (tot[i] *
sizeof(
bElem)), MPI_CHAR,
381 arr[s].rank_map->at(neighbors[i].set),
382 arr[s].id_map->at(neighbors[i].set) * 100 + (
int) neighbors.size() - i - 1,
383 comm, &(requests[i * 2 + s * neighbors.size() * 2]));
384 MPI_Isend(buffers_out[i + s * neighbors.size()], (
int) (tot[i] *
sizeof(
bElem)), MPI_CHAR,
385 arr[s].rank_map->at(neighbors[i].set), arr[s].id * 100 + i, comm,
386 &(requests[i * 2 + s * neighbors.size() * 2 + 1]));
389 ed = omp_get_wtime();
394 std::vector<MPI_Status> stats(requests.size());
395 MPI_Waitall(
static_cast<int>(requests.size()), requests.data(), stats.data());
397 ed = omp_get_wtime();
402#pragma omp parallel for
403 for (
int i = 0; i < (int) neighbors.size(); ++i)
404 for (
int s = 0; s < arr.size(); ++s)
405 unpack<dim>(arr[s].arr, neighbors[i], buffers_recv[i + s * neighbors.size()], arrstride, dimlist, padding, ghost);
407 ed = omp_get_wtime();
411 for (
auto b: buffers_out)
413 for (
auto b: buffers_recv)
unsigned evalsize(BitSet region, const std::vector< long > &dimlist, const std::vector< long > &ghost, bool inner=true)
Definition: array-mpi.h:130
void exchangeArrAll(std::vector< ArrExPack > arr, const MPI_Comm &comm, const std::vector< long > &dimlist, const std::vector< long > &padding, const std::vector< long > &ghost)
Definition: array-mpi.h:334
void exchangeArr(bElem *arr, const MPI_Comm &comm, std::unordered_map< uint64_t, int > &rank_map, const std::vector< long > &dimlist, const std::vector< long > &padding, const std::vector< long > &ghost)
Definition: array-mpi.h:146
bElem * pack(bElem *arr, BitSet neighbor, bElem *buffer_out, const std::vector< unsigned long > &arrstride, const std::vector< long > &dimlist, const std::vector< long > &padding, const std::vector< long > &ghost)
Definition: array-mpi.h:27
std::vector< bElem * > arr_buffers_recv
Definition: array-mpi.cpp:8
bElem * unpack(bElem *arr, BitSet neighbor, bElem *buffer_recv, const std::vector< unsigned long > &arrstride, const std::vector< long > &dimlist, const std::vector< long > &padding, const std::vector< long > &ghost)
Definition: array-mpi.h:78
MPI_Datatype unpack_type(BitSet neighbor, const std::vector< long > &dimlist, const std::vector< long > &padding, const std::vector< long > &ghost)
Definition: array-mpi.h:244
void elemcpy(bElem *dst, const bElem *src, unsigned long size)
Definition: array-mpi.h:20
bElem * pack< 1 >(bElem *arr, BitSet neighbor, bElem *buffer_out, const std::vector< unsigned long > &arrstride, const std::vector< long > &dimlist, const std::vector< long > &padding, const std::vector< long > &ghost)
Definition: array-mpi.h:54
MPI_Datatype pack_type(BitSet neighbor, const std::vector< long > &dimlist, const std::vector< long > &padding, const std::vector< long > &ghost)
Definition: array-mpi.h:215
void exchangeArrTypes(bElem *arr, const MPI_Comm &comm, std::unordered_map< uint64_t, int > &rank_map, std::unordered_map< uint64_t, MPI_Datatype > &stypemap, std::unordered_map< uint64_t, MPI_Datatype > &rtypemap)
Definition: array-mpi.h:295
bElem * unpack< 1 >(bElem *arr, BitSet neighbor, bElem *buffer_recv, const std::vector< unsigned long > &arrstride, const std::vector< long > &dimlist, const std::vector< long > &padding, const std::vector< long > &ghost)
Definition: array-mpi.h:105
void exchangeArrPrepareTypes(std::unordered_map< uint64_t, MPI_Datatype > &stypemap, std::unordered_map< uint64_t, MPI_Datatype > &rtypemap, const std::vector< long > &dimlist, const std::vector< long > &padding, const std::vector< long > &ghost)
Definition: array-mpi.h:274
std::vector< bElem * > arr_buffers_out
Definition: array-mpi.cpp:7
MPI stuff related to bricks.
void allneighbors(BitSet cur, long idx, long dim, std::vector< BitSet > &neighbors)
Enumerate all neighbors.
Definition: brick-mpi.cpp:9
double waittime
Definition: brick-mpi.h:23
double calltime
Definition: brick-mpi.h:23
double packtime
Definition: brick-mpi.cpp:7
Definition: __init__.py:1
Definition: array-mpi.h:326
std::unordered_map< uint64_t, int > * id_map
Definition: array-mpi.h:329
int id
Definition: array-mpi.h:330
bElem * arr
Definition: array-mpi.h:327
std::unordered_map< uint64_t, int > * rank_map
Definition: array-mpi.h:328
Set using bitfield.
Definition: bitset.h:18
bool get(long pos) const
Return whether a number is in the set.
Definition: bitset.h:75
#define bElem
Basic datatype for all brick elements.
Definition: vecscatter.h:13