1 | /*---------------------------------------------------------------------------- |
---|
2 | ADOL-C -- Automatic Differentiation by Overloading in C++ |
---|
3 | File: ampisupport.cpp |
---|
4 | Revision: $Id$ |
---|
5 | |
---|
6 | Copyright (c) Jean Utke |
---|
7 | |
---|
8 | This file is part of ADOL-C. This software is provided as open source. |
---|
9 | Any use, reproduction, or distribution of the software constitutes |
---|
10 | recipient's acceptance of the terms of the accompanying license file. |
---|
11 | |
---|
12 | ----------------------------------------------------------------------------*/ |
---|
13 | |
---|
14 | #include <cassert> |
---|
15 | #include <cstring> |
---|
16 | #include <climits> |
---|
17 | |
---|
18 | #include "taping_p.h" |
---|
19 | #include "oplate.h" |
---|
20 | #include "adolc/adouble.h" |
---|
21 | |
---|
22 | #ifdef ADOLC_AMPI_SUPPORT |
---|
23 | #include "ampi/ampi.h" |
---|
24 | #include "ampi/adTool/support.h" |
---|
25 | #include "ampi/tape/support.h" |
---|
26 | #include "ampi/libCommon/modified.h" |
---|
27 | #include "ampisupportAdolc.h" |
---|
28 | |
---|
29 | MPI_Comm ADTOOL_AMPI_COMM_WORLD_SHADOW; |
---|
30 | |
---|
31 | int AMPI_Init_NT(int* argc, |
---|
32 | char*** argv) { |
---|
33 | int rc; |
---|
34 | rc=MPI_Init(argc, |
---|
35 | argv); |
---|
36 | ADTOOL_AMPI_setupTypes(); |
---|
37 | ADOLC_TLM_init(); |
---|
38 | ourADTOOL_AMPI_FPCollection.pushBcastInfo_fp=&ADTOOL_AMPI_pushBcastInfo; |
---|
39 | ourADTOOL_AMPI_FPCollection.popBcastInfo_fp=&ADTOOL_AMPI_popBcastInfo; |
---|
40 | ourADTOOL_AMPI_FPCollection.pushDoubleArray_fp=&ADTOOL_AMPI_pushDoubleArray; |
---|
41 | ourADTOOL_AMPI_FPCollection.popDoubleArray_fp=&ADTOOL_AMPI_popDoubleArray; |
---|
42 | ourADTOOL_AMPI_FPCollection.pushReduceInfo_fp=&ADTOOL_AMPI_pushReduceInfo; |
---|
43 | ourADTOOL_AMPI_FPCollection.popReduceCountAndType_fp=&ADTOOL_AMPI_popReduceCountAndType; |
---|
44 | ourADTOOL_AMPI_FPCollection.popReduceInfo_fp=&ADTOOL_AMPI_popReduceInfo; |
---|
45 | ourADTOOL_AMPI_FPCollection.pushSRinfo_fp=&ADTOOL_AMPI_pushSRinfo; |
---|
46 | ourADTOOL_AMPI_FPCollection.popSRinfo_fp=&ADTOOL_AMPI_popSRinfo; |
---|
47 | ourADTOOL_AMPI_FPCollection.pushGSinfo_fp=&ADTOOL_AMPI_pushGSinfo; |
---|
48 | ourADTOOL_AMPI_FPCollection.popGScommSizeForRootOrNull_fp=&ADTOOL_AMPI_popGScommSizeForRootOrNull; |
---|
49 | ourADTOOL_AMPI_FPCollection.popGSinfo_fp=&ADTOOL_AMPI_popGSinfo; |
---|
50 | ourADTOOL_AMPI_FPCollection.pushGSVinfo_fp=&ADTOOL_AMPI_pushGSVinfo; |
---|
51 | ourADTOOL_AMPI_FPCollection.popGSVinfo_fp=&ADTOOL_AMPI_popGSVinfo; |
---|
52 | ourADTOOL_AMPI_FPCollection.push_CallCode_fp=&ADTOOL_AMPI_push_CallCode; |
---|
53 | ourADTOOL_AMPI_FPCollection.pop_CallCode_fp=&ADTOOL_AMPI_pop_CallCode; |
---|
54 | ourADTOOL_AMPI_FPCollection.push_AMPI_Request_fp=&ADTOOL_AMPI_push_AMPI_Request; |
---|
55 | ourADTOOL_AMPI_FPCollection.pop_AMPI_Request_fp=&ADTOOL_AMPI_pop_AMPI_Request; |
---|
56 | ourADTOOL_AMPI_FPCollection.push_request_fp=&ADTOOL_AMPI_push_request; |
---|
57 | ourADTOOL_AMPI_FPCollection.pop_request_fp=&ADTOOL_AMPI_pop_request; |
---|
58 | ourADTOOL_AMPI_FPCollection.push_comm_fp=&ADTOOL_AMPI_push_comm; |
---|
59 | ourADTOOL_AMPI_FPCollection.pop_comm_fp=&ADTOOL_AMPI_pop_comm; |
---|
60 | ourADTOOL_AMPI_FPCollection.rawData_fp=&ADTOOL_AMPI_rawData; |
---|
61 | ourADTOOL_AMPI_FPCollection.rawDataV_fp=&ADTOOL_AMPI_rawDataV; |
---|
62 | ourADTOOL_AMPI_FPCollection.packDType_fp=&ADTOOL_AMPI_packDType; |
---|
63 | ourADTOOL_AMPI_FPCollection.unpackDType_fp=&ADTOOL_AMPI_unpackDType; |
---|
64 | ourADTOOL_AMPI_FPCollection.writeData_fp=&ADTOOL_AMPI_writeData; |
---|
65 | ourADTOOL_AMPI_FPCollection.writeDataV_fp=&ADTOOL_AMPI_writeDataV; |
---|
66 | ourADTOOL_AMPI_FPCollection.rawAdjointData_fp=&ADTOOL_AMPI_rawAdjointData; |
---|
67 | ourADTOOL_AMPI_FPCollection.Turn_fp=&ADTOOL_AMPI_Turn; |
---|
68 | ourADTOOL_AMPI_FPCollection.mapBufForAdjoint_fp=&ADTOOL_AMPI_mapBufForAdjoint; |
---|
69 | ourADTOOL_AMPI_FPCollection.setBufForAdjoint_fp=&ADTOOL_AMPI_setBufForAdjoint; |
---|
70 | ourADTOOL_AMPI_FPCollection.getAdjointCount_fp=&ADTOOL_AMPI_getAdjointCount; |
---|
71 | ourADTOOL_AMPI_FPCollection.setAdjointCount_fp=&ADTOOL_AMPI_setAdjointCount; |
---|
72 | ourADTOOL_AMPI_FPCollection.setAdjointCountAndTempBuf_fp=&ADTOOL_AMPI_setAdjointCountAndTempBuf; |
---|
73 | ourADTOOL_AMPI_FPCollection.allocateTempBuf_fp=&ADTOOL_AMPI_allocateTempBuf; |
---|
74 | ourADTOOL_AMPI_FPCollection.releaseAdjointTempBuf_fp=&ADTOOL_AMPI_releaseAdjointTempBuf; |
---|
75 | ourADTOOL_AMPI_FPCollection.incrementAdjoint_fp=&ADTOOL_AMPI_incrementAdjoint; |
---|
76 | ourADTOOL_AMPI_FPCollection.multiplyAdjoint_fp=&ADTOOL_AMPI_multiplyAdjoint; |
---|
77 | ourADTOOL_AMPI_FPCollection.divideAdjoint_fp=&ADTOOL_AMPI_divideAdjoint; |
---|
78 | ourADTOOL_AMPI_FPCollection.equalAdjoints_fp=&ADTOOL_AMPI_equalAdjoints; |
---|
79 | ourADTOOL_AMPI_FPCollection.nullifyAdjoint_fp=&ADTOOL_AMPI_nullifyAdjoint; |
---|
80 | ourADTOOL_AMPI_FPCollection.setupTypes_fp=&ADTOOL_AMPI_setupTypes; |
---|
81 | ourADTOOL_AMPI_FPCollection.cleanupTypes_fp=&ADTOOL_AMPI_cleanupTypes; |
---|
82 | ourADTOOL_AMPI_FPCollection.FW_rawType_fp=&ADTOOL_AMPI_FW_rawType; |
---|
83 | ourADTOOL_AMPI_FPCollection.BW_rawType_fp=&ADTOOL_AMPI_BW_rawType; |
---|
84 | ourADTOOL_AMPI_FPCollection.isActiveType_fp=&ADTOOL_AMPI_isActiveType; |
---|
85 | ourADTOOL_AMPI_FPCollection.allocateTempActiveBuf_fp=&ADTOOL_AMPI_allocateTempActiveBuf; |
---|
86 | ourADTOOL_AMPI_FPCollection.releaseTempActiveBuf_fp=&ADTOOL_AMPI_releaseTempActiveBuf; |
---|
87 | ourADTOOL_AMPI_FPCollection.copyActiveBuf_fp=&ADTOOL_AMPI_copyActiveBuf; |
---|
88 | return rc; |
---|
89 | } |
---|
90 | |
---|
91 | locint startLocAssertContiguous(adouble* adoubleBuffer, int count) { |
---|
92 | locint start=0; |
---|
93 | if (count>0) { |
---|
94 | start=adoubleBuffer->loc(); |
---|
95 | assert(start+count-1==(adoubleBuffer+count-1)->loc()); // buf must have consecutive ascending locations |
---|
96 | } |
---|
97 | return start; |
---|
98 | } |
---|
99 | |
---|
100 | void ADTOOL_AMPI_pushBcastInfo(void* buf, |
---|
101 | int count, |
---|
102 | MPI_Datatype datatype, |
---|
103 | int root, |
---|
104 | MPI_Comm comm) { |
---|
105 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) { |
---|
106 | int i, dt_idx = derivedTypeIdx(datatype); |
---|
107 | int activeVarCount, bitCountToFirstActive, bitCountToLastActive; |
---|
108 | if (isDerivedType(dt_idx)) { |
---|
109 | derivedTypeData* dtdata = getDTypeData(); |
---|
110 | activeVarCount = dtdata->num_actives[dt_idx]*count; |
---|
111 | bitCountToFirstActive = dtdata->first_active_blocks[dt_idx];; |
---|
112 | bitCountToLastActive = (count-1)*dtdata->extents[dt_idx] |
---|
113 | + dtdata->last_active_blocks[dt_idx] |
---|
114 | + sizeof(adouble)*(dtdata->last_active_block_lengths[dt_idx]-1); |
---|
115 | } |
---|
116 | else { |
---|
117 | activeVarCount = count; |
---|
118 | bitCountToFirstActive = 0; |
---|
119 | bitCountToLastActive = (count-1)*sizeof(adouble); |
---|
120 | } |
---|
121 | if (count>0) { |
---|
122 | assert(buf); |
---|
123 | locint start=((adouble*)((char*)buf+bitCountToFirstActive))->loc(); |
---|
124 | locint end=((adouble*)((char*)buf+bitCountToLastActive))->loc(); |
---|
125 | assert(start+activeVarCount-1==end); // buf must have consecutive ascending locations |
---|
126 | ADOLC_PUT_LOCINT(start); |
---|
127 | } |
---|
128 | else { |
---|
129 | ADOLC_PUT_LOCINT(0); // have to put something |
---|
130 | } |
---|
131 | TAPE_AMPI_push_int(count); |
---|
132 | TAPE_AMPI_push_MPI_Datatype(datatype); |
---|
133 | TAPE_AMPI_push_int(root); |
---|
134 | TAPE_AMPI_push_MPI_Comm(comm); |
---|
135 | } |
---|
136 | } |
---|
137 | |
---|
138 | void ADTOOL_AMPI_popBcastInfo(void** buf, |
---|
139 | int* count, |
---|
140 | MPI_Datatype* datatype, |
---|
141 | int* root, |
---|
142 | MPI_Comm* comm, |
---|
143 | void **idx) { |
---|
144 | TAPE_AMPI_pop_MPI_Comm(comm); |
---|
145 | TAPE_AMPI_pop_int(root); |
---|
146 | TAPE_AMPI_pop_MPI_Datatype(datatype); |
---|
147 | TAPE_AMPI_pop_int(count); |
---|
148 | *buf=(void*)(&(ADOLC_CURRENT_TAPE_INFOS.rp_A[get_locint_r()])); |
---|
149 | } |
---|
150 | |
---|
151 | void ADTOOL_AMPI_pushDoubleArray(void* buf, |
---|
152 | int count) { |
---|
153 | int i; |
---|
154 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) { |
---|
155 | for (i=0;i<count;i++) { |
---|
156 | TAPE_AMPI_push_double(((adouble*)(buf))[i].value()); |
---|
157 | } |
---|
158 | } |
---|
159 | } |
---|
160 | |
---|
161 | void ADTOOL_AMPI_popDoubleArray(double* buf, |
---|
162 | int* count) { |
---|
163 | int i; |
---|
164 | for (i=*count-1;i>=0;i--) { |
---|
165 | TAPE_AMPI_pop_double(&(buf[i])); |
---|
166 | } |
---|
167 | } |
---|
168 | |
---|
169 | void ADTOOL_AMPI_pushReduceInfo(void* sbuf, |
---|
170 | void* rbuf, |
---|
171 | void* resultData, |
---|
172 | int pushResultData, /* push resultData if true */ |
---|
173 | int count, |
---|
174 | MPI_Datatype datatype, |
---|
175 | MPI_Op op, |
---|
176 | int root, |
---|
177 | MPI_Comm comm) { |
---|
178 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) { |
---|
179 | if (count>0) { |
---|
180 | assert(rbuf); |
---|
181 | ADOLC_PUT_LOCINT(startLocAssertContiguous((adouble*)rbuf,count)); |
---|
182 | ADOLC_PUT_LOCINT(startLocAssertContiguous((adouble*)sbuf,count)); |
---|
183 | } |
---|
184 | else { |
---|
185 | ADOLC_PUT_LOCINT(0); |
---|
186 | ADOLC_PUT_LOCINT(0); |
---|
187 | } |
---|
188 | TAPE_AMPI_push_int(count); |
---|
189 | TAPE_AMPI_push_int(pushResultData); |
---|
190 | ADTOOL_AMPI_pushDoubleArray(sbuf,count); |
---|
191 | if (pushResultData) ADTOOL_AMPI_pushDoubleArray(resultData,count); |
---|
192 | TAPE_AMPI_push_int(pushResultData); |
---|
193 | TAPE_AMPI_push_MPI_Op(op); |
---|
194 | TAPE_AMPI_push_int(root); |
---|
195 | TAPE_AMPI_push_MPI_Comm(comm); |
---|
196 | TAPE_AMPI_push_MPI_Datatype(datatype); |
---|
197 | TAPE_AMPI_push_int(count); |
---|
198 | } |
---|
199 | } |
---|
200 | |
---|
201 | void ADTOOL_AMPI_popReduceCountAndType(int* count, |
---|
202 | MPI_Datatype* datatype) { |
---|
203 | TAPE_AMPI_pop_int(count); |
---|
204 | TAPE_AMPI_pop_MPI_Datatype(datatype); |
---|
205 | } |
---|
206 | |
---|
207 | void ADTOOL_AMPI_popReduceInfo(void** sbuf, |
---|
208 | void** rbuf, |
---|
209 | void** prevData, |
---|
210 | void** resultData, |
---|
211 | int* count, |
---|
212 | MPI_Op* op, |
---|
213 | int* root, |
---|
214 | MPI_Comm* comm, |
---|
215 | void **idx) { |
---|
216 | int popResultData; |
---|
217 | TAPE_AMPI_pop_MPI_Comm(comm); |
---|
218 | TAPE_AMPI_pop_int(root); |
---|
219 | TAPE_AMPI_pop_MPI_Op(op); |
---|
220 | TAPE_AMPI_pop_int(&popResultData); |
---|
221 | if (popResultData) ADTOOL_AMPI_popDoubleArray((double*)(*resultData),count); |
---|
222 | ADTOOL_AMPI_popDoubleArray((double*)(*prevData),count); |
---|
223 | TAPE_AMPI_pop_int(&popResultData); |
---|
224 | TAPE_AMPI_pop_int(count); |
---|
225 | *sbuf=(void*)(&(ADOLC_CURRENT_TAPE_INFOS.rp_A[get_locint_r()])); |
---|
226 | *rbuf=(void*)(&(ADOLC_CURRENT_TAPE_INFOS.rp_A[get_locint_r()])); |
---|
227 | } |
---|
228 | |
---|
229 | void ADTOOL_AMPI_pushSRinfo(void* buf, |
---|
230 | int count, |
---|
231 | MPI_Datatype datatype, |
---|
232 | int endPoint, |
---|
233 | int tag, |
---|
234 | enum AMPI_PairedWith_E pairedWith, |
---|
235 | MPI_Comm comm) { |
---|
236 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) { |
---|
237 | int i, dt_idx = derivedTypeIdx(datatype); |
---|
238 | int activeVarCount, bitCountToFirstActive, bitCountToLastActive; |
---|
239 | if (isDerivedType(dt_idx)) { |
---|
240 | derivedTypeData* dtdata = getDTypeData(); |
---|
241 | activeVarCount = dtdata->num_actives[dt_idx]*count; |
---|
242 | bitCountToFirstActive = dtdata->first_active_blocks[dt_idx]; |
---|
243 | bitCountToLastActive = (count-1)*dtdata->extents[dt_idx] |
---|
244 | + dtdata->last_active_blocks[dt_idx] |
---|
245 | + sizeof(adouble)*(dtdata->last_active_block_lengths[dt_idx]-1); |
---|
246 | } |
---|
247 | else { activeVarCount = count; bitCountToFirstActive = 0; bitCountToLastActive = count-1; } |
---|
248 | if (count>0) { |
---|
249 | assert(buf); |
---|
250 | locint start=((adouble*)((char*)buf+bitCountToFirstActive))->loc(); |
---|
251 | locint end=((adouble*)((char*)buf+bitCountToLastActive))->loc(); |
---|
252 | assert(start+activeVarCount-1==end); // buf must have consecutive ascending locations |
---|
253 | ADOLC_PUT_LOCINT(start); |
---|
254 | } |
---|
255 | else { |
---|
256 | ADOLC_PUT_LOCINT(0); // have to put something |
---|
257 | } |
---|
258 | TAPE_AMPI_push_int(count); |
---|
259 | TAPE_AMPI_push_MPI_Datatype(datatype); |
---|
260 | TAPE_AMPI_push_int(endPoint); |
---|
261 | TAPE_AMPI_push_int(tag); |
---|
262 | TAPE_AMPI_push_int(pairedWith); |
---|
263 | TAPE_AMPI_push_MPI_Comm(comm); |
---|
264 | } |
---|
265 | } |
---|
266 | |
---|
267 | void ADTOOL_AMPI_popSRinfo(void** buf, |
---|
268 | int* count, |
---|
269 | MPI_Datatype* datatype, |
---|
270 | int* endPoint, |
---|
271 | int* tag, |
---|
272 | AMPI_PairedWith_E* pairedWith, |
---|
273 | MPI_Comm* comm, |
---|
274 | void **idx) { |
---|
275 | TAPE_AMPI_pop_MPI_Comm(comm); |
---|
276 | TAPE_AMPI_pop_int((int*)pairedWith); |
---|
277 | TAPE_AMPI_pop_int(tag); |
---|
278 | TAPE_AMPI_pop_int(endPoint); |
---|
279 | TAPE_AMPI_pop_MPI_Datatype(datatype); |
---|
280 | TAPE_AMPI_pop_int(count); |
---|
281 | *buf=(void*)(&(ADOLC_CURRENT_TAPE_INFOS.rp_A[get_locint_r()])); |
---|
282 | } |
---|
283 | |
---|
284 | void ADTOOL_AMPI_pushGSinfo(int commSizeForRootOrNull, |
---|
285 | void *rbuf, |
---|
286 | int rcnt, |
---|
287 | MPI_Datatype rtype, |
---|
288 | void *buf, |
---|
289 | int count, |
---|
290 | MPI_Datatype type, |
---|
291 | int root, |
---|
292 | MPI_Comm comm) { |
---|
293 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) { |
---|
294 | int i; |
---|
295 | TAPE_AMPI_push_int(commSizeForRootOrNull); // counter at the beginning |
---|
296 | if(commSizeForRootOrNull>0) { |
---|
297 | TAPE_AMPI_push_int(rcnt); |
---|
298 | assert(rbuf); |
---|
299 | ADOLC_PUT_LOCINT(startLocAssertContiguous((adouble*)rbuf,rcnt)); |
---|
300 | TAPE_AMPI_push_MPI_Datatype(rtype); |
---|
301 | } |
---|
302 | locint start=0; // have to put something regardless |
---|
303 | if (buf!=MPI_IN_PLACE && count>0) { |
---|
304 | assert(buf); |
---|
305 | start=startLocAssertContiguous((adouble*)buf,count); |
---|
306 | } |
---|
307 | else { |
---|
308 | count=0; |
---|
309 | } |
---|
310 | ADOLC_PUT_LOCINT(start); |
---|
311 | TAPE_AMPI_push_int(count); |
---|
312 | TAPE_AMPI_push_MPI_Datatype(type); |
---|
313 | TAPE_AMPI_push_int(root); |
---|
314 | TAPE_AMPI_push_MPI_Comm(comm); |
---|
315 | TAPE_AMPI_push_int(commSizeForRootOrNull); // counter at the end |
---|
316 | } |
---|
317 | } |
---|
318 | |
---|
319 | void ADTOOL_AMPI_popGScommSizeForRootOrNull(int *commSizeForRootOrNull) { |
---|
320 | TAPE_AMPI_pop_int(commSizeForRootOrNull); |
---|
321 | } |
---|
322 | |
---|
323 | void ADTOOL_AMPI_popGSinfo(int commSizeForRootOrNull, |
---|
324 | void **rbuf, |
---|
325 | int *rcnt, |
---|
326 | MPI_Datatype *rtype, |
---|
327 | void **buf, |
---|
328 | int *count, |
---|
329 | MPI_Datatype *type, |
---|
330 | int *root, |
---|
331 | MPI_Comm *comm) { |
---|
332 | int i; |
---|
333 | TAPE_AMPI_pop_MPI_Comm(comm); |
---|
334 | TAPE_AMPI_pop_int(root); |
---|
335 | TAPE_AMPI_pop_MPI_Datatype(type); |
---|
336 | TAPE_AMPI_pop_int(count); |
---|
337 | locint bufLoc=get_locint_r(); |
---|
338 | if (*count==0) { |
---|
339 | if (commSizeForRootOrNull) *buf=MPI_IN_PLACE; |
---|
340 | else *buf=0; |
---|
341 | } |
---|
342 | else *buf=(void*)(&(ADOLC_CURRENT_TAPE_INFOS.rp_A[bufLoc])); |
---|
343 | if (commSizeForRootOrNull>0) { |
---|
344 | TAPE_AMPI_pop_MPI_Datatype(rtype); |
---|
345 | *rbuf=(void*)(&(ADOLC_CURRENT_TAPE_INFOS.rp_A[get_locint_r()])); |
---|
346 | TAPE_AMPI_pop_int(rcnt); |
---|
347 | } |
---|
348 | else { |
---|
349 | // at least initialize to something nonrandom |
---|
350 | // because we know we always have valid addresses passed in here |
---|
351 | // NOTE JU: may not be true for source transformation... |
---|
352 | *rbuf=0; |
---|
353 | *rcnt=0; |
---|
354 | } |
---|
355 | TAPE_AMPI_pop_int(&commSizeForRootOrNull); |
---|
356 | } |
---|
357 | |
---|
358 | void ADTOOL_AMPI_pushGSVinfo(int commSizeForRootOrNull, |
---|
359 | void *rbuf, |
---|
360 | int *rcnts, |
---|
361 | int *displs, |
---|
362 | MPI_Datatype rtype, |
---|
363 | void *buf, |
---|
364 | int count, |
---|
365 | MPI_Datatype type, |
---|
366 | int root, |
---|
367 | MPI_Comm comm) { |
---|
368 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) { |
---|
369 | int i; |
---|
370 | int minDispls=INT_MAX,endOffsetMax=0; |
---|
371 | TAPE_AMPI_push_int(commSizeForRootOrNull); // counter at the beginning |
---|
372 | for (i=0;i<commSizeForRootOrNull;++i) { |
---|
373 | TAPE_AMPI_push_int(rcnts[i]); |
---|
374 | TAPE_AMPI_push_int(displs[i]); |
---|
375 | if (rcnts[i]>0) { |
---|
376 | if (minDispls>displs[i]) minDispls=displs[i]; |
---|
377 | if (endOffsetMax<displs[i]+rcnts[i]) endOffsetMax=displs[i]+rcnts[i]; |
---|
378 | } |
---|
379 | if (endOffsetMax==0) minDispls=0; |
---|
380 | } |
---|
381 | if (commSizeForRootOrNull>0) { |
---|
382 | assert(minDispls==0); // don't want to make assumptions about memory layout for nonzero displacements |
---|
383 | assert(rbuf); |
---|
384 | ADOLC_PUT_LOCINT(startLocAssertContiguous((adouble*)rbuf,endOffsetMax)); |
---|
385 | TAPE_AMPI_push_MPI_Datatype(rtype); |
---|
386 | } |
---|
387 | locint start=0; // have to put something regardless |
---|
388 | if (count>0 && buf!=MPI_IN_PLACE) { |
---|
389 | assert(buf); |
---|
390 | start=startLocAssertContiguous((adouble*)buf,count); |
---|
391 | } |
---|
392 | else { |
---|
393 | count=0; |
---|
394 | } |
---|
395 | ADOLC_PUT_LOCINT(start); |
---|
396 | TAPE_AMPI_push_int(count); |
---|
397 | TAPE_AMPI_push_MPI_Datatype(type); |
---|
398 | TAPE_AMPI_push_int(root); |
---|
399 | TAPE_AMPI_push_MPI_Comm(comm); |
---|
400 | TAPE_AMPI_push_int(commSizeForRootOrNull); // counter at the end |
---|
401 | } |
---|
402 | } |
---|
403 | |
---|
404 | void ADTOOL_AMPI_popGSVinfo(int commSizeForRootOrNull, |
---|
405 | void **rbuf, |
---|
406 | int *rcnts, |
---|
407 | int *displs, |
---|
408 | MPI_Datatype *rtype, |
---|
409 | void **buf, |
---|
410 | int *count, |
---|
411 | MPI_Datatype *type, |
---|
412 | int *root, |
---|
413 | MPI_Comm *comm) { |
---|
414 | int i; |
---|
415 | TAPE_AMPI_pop_MPI_Comm(comm); |
---|
416 | TAPE_AMPI_pop_int(root); |
---|
417 | TAPE_AMPI_pop_MPI_Datatype(type); |
---|
418 | TAPE_AMPI_pop_int(count); |
---|
419 | locint bufLoc=get_locint_r(); |
---|
420 | if (*count==0) { |
---|
421 | if (commSizeForRootOrNull) *buf=MPI_IN_PLACE; |
---|
422 | else *buf=0; |
---|
423 | } |
---|
424 | else *buf=(void*)(&(ADOLC_CURRENT_TAPE_INFOS.rp_A[bufLoc])); |
---|
425 | if (commSizeForRootOrNull>0) { |
---|
426 | TAPE_AMPI_pop_MPI_Datatype(rtype); |
---|
427 | *rbuf=(void*)(&(ADOLC_CURRENT_TAPE_INFOS.rp_A[get_locint_r()])); |
---|
428 | } |
---|
429 | else { |
---|
430 | // at least initialize to something nonrandom |
---|
431 | // because we know we always have valid addresses passed in here |
---|
432 | // NOTE JU: may not be true for source transformation... |
---|
433 | *rbuf=0; |
---|
434 | } |
---|
435 | for (i=commSizeForRootOrNull-1;i>=0;--i) { |
---|
436 | TAPE_AMPI_pop_int(&(displs[i])); |
---|
437 | TAPE_AMPI_pop_int(&(rcnts[i])); |
---|
438 | } |
---|
439 | TAPE_AMPI_pop_int(&commSizeForRootOrNull); |
---|
440 | } |
---|
441 | |
---|
442 | void ADTOOL_AMPI_push_CallCode(enum AMPI_PairedWith_E thisCall) { |
---|
443 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) { |
---|
444 | switch(thisCall) { |
---|
445 | case AMPI_WAIT: |
---|
446 | put_op(ampi_wait); |
---|
447 | break; |
---|
448 | case AMPI_BARRIER: |
---|
449 | put_op(ampi_barrier); |
---|
450 | break; |
---|
451 | case AMPI_SEND: |
---|
452 | put_op(ampi_send); |
---|
453 | break; |
---|
454 | case AMPI_RECV: |
---|
455 | put_op(ampi_recv); |
---|
456 | break; |
---|
457 | case AMPI_ISEND: |
---|
458 | put_op(ampi_isend); |
---|
459 | break; |
---|
460 | case AMPI_IRECV: |
---|
461 | put_op(ampi_irecv); |
---|
462 | break; |
---|
463 | case AMPI_BCAST: |
---|
464 | put_op(ampi_bcast); |
---|
465 | break; |
---|
466 | case AMPI_REDUCE: |
---|
467 | put_op(ampi_reduce); |
---|
468 | break; |
---|
469 | case AMPI_ALLREDUCE: |
---|
470 | put_op(ampi_allreduce); |
---|
471 | break; |
---|
472 | case AMPI_GATHER: |
---|
473 | put_op(ampi_gather); |
---|
474 | break; |
---|
475 | case AMPI_SCATTER: |
---|
476 | put_op(ampi_scatter); |
---|
477 | break; |
---|
478 | case AMPI_ALLGATHER: |
---|
479 | put_op(ampi_allgather); |
---|
480 | break; |
---|
481 | case AMPI_GATHERV: |
---|
482 | put_op(ampi_gatherv); |
---|
483 | break; |
---|
484 | case AMPI_SCATTERV: |
---|
485 | put_op(ampi_scatterv); |
---|
486 | break; |
---|
487 | case AMPI_ALLGATHERV: |
---|
488 | put_op(ampi_allgatherv); |
---|
489 | break; |
---|
490 | default: |
---|
491 | assert(0); |
---|
492 | break; |
---|
493 | } |
---|
494 | } |
---|
495 | } |
---|
496 | |
---|
497 | void ADTOOL_AMPI_pop_CallCode(enum AMPI_PairedWith_E *thisCall) { |
---|
498 | assert(0); |
---|
499 | } |
---|
500 | |
---|
501 | void ADTOOL_AMPI_push_AMPI_Request(struct AMPI_Request_S *ampiRequest) { |
---|
502 | ADTOOL_AMPI_pushSRinfo(ampiRequest->buf, |
---|
503 | ampiRequest->count, |
---|
504 | ampiRequest->datatype, |
---|
505 | ampiRequest->endPoint, |
---|
506 | ampiRequest->tag, |
---|
507 | ampiRequest->pairedWith, |
---|
508 | ampiRequest->comm); |
---|
509 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) { |
---|
510 | TAPE_AMPI_push_MPI_Request(ampiRequest->tracedRequest); |
---|
511 | TAPE_AMPI_push_int(ampiRequest->origin); |
---|
512 | } |
---|
513 | } |
---|
514 | |
---|
515 | void ADTOOL_AMPI_pop_AMPI_Request(struct AMPI_Request_S *ampiRequest) { |
---|
516 | TAPE_AMPI_pop_int((int*)&(ampiRequest->origin)); |
---|
517 | TAPE_AMPI_pop_MPI_Request(&(ampiRequest->tracedRequest)); |
---|
518 | ADTOOL_AMPI_popSRinfo(&(ampiRequest->adjointBuf), |
---|
519 | &(ampiRequest->count), |
---|
520 | &(ampiRequest->datatype), |
---|
521 | &(ampiRequest->endPoint), |
---|
522 | &(ampiRequest->tag), |
---|
523 | &(ampiRequest->pairedWith), |
---|
524 | &(ampiRequest->comm), |
---|
525 | &(ampiRequest->idx)); |
---|
526 | } |
---|
527 | |
---|
528 | void ADTOOL_AMPI_push_request(MPI_Request request) { |
---|
529 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) TAPE_AMPI_push_MPI_Request(request); |
---|
530 | } |
---|
531 | |
---|
532 | MPI_Request ADTOOL_AMPI_pop_request() { |
---|
533 | MPI_Request r; |
---|
534 | TAPE_AMPI_pop_MPI_Request(&r); |
---|
535 | return r; |
---|
536 | } |
---|
537 | |
---|
538 | void ADTOOL_AMPI_push_comm(MPI_Comm comm) { |
---|
539 | if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) TAPE_AMPI_push_MPI_Comm(comm); |
---|
540 | } |
---|
541 | |
---|
542 | MPI_Comm ADTOOL_AMPI_pop_comm() { |
---|
543 | MPI_Comm c; |
---|
544 | TAPE_AMPI_pop_MPI_Comm(&c); |
---|
545 | return c; |
---|
546 | } |
---|
547 | |
---|
548 | void * ADTOOL_AMPI_rawData(void* activeData, int *size) { |
---|
549 | void *ret=0; |
---|
550 | if (*size>0) { |
---|
551 | adouble* adouble_p=(adouble*)activeData; |
---|
552 | ret=(void*)(&(ADOLC_GLOBAL_TAPE_VARS.store[adouble_p->loc()])); |
---|
553 | } |
---|
554 | return ret; |
---|
555 | } |
---|
556 | |
---|
557 | void * ADTOOL_AMPI_rawDataV(void* activeData, int commSize, int *counts, int *displs) { |
---|
558 | void *ret=NULL; |
---|
559 | int nonNullCount=0; |
---|
560 | int minDispls=INT_MAX; |
---|
561 | for (int i=0; i< commSize; ++i) { |
---|
562 | if (counts[i]>nonNullCount) nonNullCount=counts[i]; |
---|
563 | if (minDispls>displs[i]) minDispls=displs[i]; |
---|
564 | } |
---|
565 | if (nonNullCount>0) { |
---|
566 | assert(minDispls==0); |
---|
567 | adouble* adouble_p=(adouble*)activeData; |
---|
568 | ret=(void*)(&(ADOLC_GLOBAL_TAPE_VARS.store[adouble_p->loc()])); |
---|
569 | } |
---|
570 | return ret; |
---|
571 | } |
---|
572 | |
---|
573 | void * ADTOOL_AMPI_packDType(void* indata, void* outdata, int count, int idx) { |
---|
574 | if (!isDerivedType(idx)) return indata; /* not derived type, or only passive elements */ |
---|
575 | int i, j, s, in_offset, out_offset, dt_idx; |
---|
576 | MPI_Aint p_extent, extent; |
---|
577 | MPI_Datatype datatype; |
---|
578 | derivedTypeData* dtdata = getDTypeData(); |
---|
579 | char *out_addr, *in_addr; |
---|
580 | p_extent = dtdata->p_extents[idx]; |
---|
581 | extent = dtdata->extents[idx]; |
---|
582 | for (j=0;j<count;j++) { |
---|
583 | in_offset = j*extent; |
---|
584 | out_offset = j*p_extent; |
---|
585 | for (i=0;i<dtdata->counts[idx];i++) { |
---|
586 | datatype = dtdata->arrays_of_types[idx][i]; |
---|
587 | if (datatype==MPI_UB || datatype==MPI_LB) assert(0); |
---|
588 | dt_idx = derivedTypeIdx(datatype); |
---|
589 | out_addr = (char*)outdata + out_offset + (int)dtdata->arrays_of_p_displacements[idx][i]; |
---|
590 | in_addr = (char*)indata + in_offset + (int)dtdata->arrays_of_displacements[idx][i]; |
---|
591 | if (ADTOOL_AMPI_isActiveType(datatype)==AMPI_ACTIVE) { |
---|
592 | memcpy(out_addr, |
---|
593 | ADTOOL_AMPI_rawData((void*)in_addr,&dtdata->arrays_of_blocklengths[idx][i]), |
---|
594 | sizeof(revreal)*dtdata->arrays_of_blocklengths[idx][i]); |
---|
595 | } |
---|
596 | else if (isDerivedType(dt_idx)) { |
---|
597 | ADTOOL_AMPI_packDType(in_addr, |
---|
598 | out_addr, |
---|
599 | dtdata->arrays_of_blocklengths[idx][i], |
---|
600 | dt_idx); |
---|
601 | } |
---|
602 | else { |
---|
603 | if (datatype==MPI_DOUBLE) s = (int)sizeof(double); |
---|
604 | else if (datatype==MPI_INT) s = (int)sizeof(int); |
---|
605 | else if (datatype==MPI_FLOAT) s = (int)sizeof(float); |
---|
606 | else if (datatype==MPI_CHAR) s = (int)sizeof(char); |
---|
607 | else assert(0); |
---|
608 | memcpy(out_addr, |
---|
609 | in_addr, |
---|
610 | s*dtdata->arrays_of_blocklengths[idx][i]); |
---|
611 | } |
---|
612 | } |
---|
613 | } |
---|
614 | return outdata; |
---|
615 | } |
---|
616 | |
---|
617 | void * ADTOOL_AMPI_unpackDType(void* indata, void* outdata, int count, int idx) { |
---|
618 | if (!isDerivedType(idx)) return indata; /* not derived type, or only passive elements */ |
---|
619 | int i, j, s, in_offset, out_offset, dt_idx; |
---|
620 | MPI_Aint p_extent, extent; |
---|
621 | MPI_Datatype datatype; |
---|
622 | derivedTypeData* dtdata = getDTypeData(); |
---|
623 | char *out_addr, *in_addr; |
---|
624 | p_extent = dtdata->p_extents[idx]; |
---|
625 | extent = dtdata->extents[idx]; |
---|
626 | for (j=0;j<count;j++) { |
---|
627 | in_offset = j*p_extent; |
---|
628 | out_offset = j*extent; |
---|
629 | for (i=0;i<dtdata->counts[idx];i++) { |
---|
630 | datatype = dtdata->arrays_of_types[idx][i]; |
---|
631 | if (datatype==MPI_UB || datatype==MPI_LB) assert(0); |
---|
632 | dt_idx = derivedTypeIdx(datatype); |
---|
633 | out_addr = (char*)outdata + out_offset + (int)dtdata->arrays_of_displacements[idx][i]; |
---|
634 | in_addr = (char*)indata + in_offset + (int)dtdata->arrays_of_p_displacements[idx][i]; |
---|
635 | if (ADTOOL_AMPI_isActiveType(datatype)==AMPI_ACTIVE) { |
---|
636 | memcpy(ADTOOL_AMPI_rawData((void*)out_addr,&dtdata->arrays_of_blocklengths[idx][i]), |
---|
637 | in_addr, |
---|
638 | sizeof(revreal)*dtdata->arrays_of_blocklengths[idx][i]); |
---|
639 | } |
---|
640 | else if (isDerivedType(dt_idx)) { |
---|
641 | ADTOOL_AMPI_unpackDType(in_addr, |
---|
642 | out_addr, |
---|
643 | dtdata->arrays_of_blocklengths[idx][i], |
---|
644 | dt_idx); |
---|
645 | } |
---|
646 | else { |
---|
647 | if (datatype==MPI_DOUBLE) s = (int)sizeof(double); |
---|
648 | else if (datatype==MPI_INT) s = (int)sizeof(int); |
---|
649 | else if (datatype==MPI_FLOAT) s = (int)sizeof(float); |
---|
650 | else if (datatype==MPI_CHAR) s = (int)sizeof(char); |
---|
651 | else assert(0); |
---|
652 | memcpy(out_addr, |
---|
653 | in_addr, |
---|
654 | s*dtdata->arrays_of_blocklengths[idx][i]); |
---|
655 | } |
---|
656 | } |
---|
657 | } |
---|
658 | return outdata; |
---|
659 | } |
---|
660 | |
---|
661 | void ADTOOL_AMPI_writeData(void* activeData,int *size) {} |
---|
662 | |
---|
663 | void ADTOOL_AMPI_writeDataV(void* activeData, int *counts, int* displs) {} |
---|
664 | |
---|
665 | void * ADTOOL_AMPI_rawAdjointData(void* activeData) { |
---|
666 | return activeData; |
---|
667 | } |
---|
668 | |
---|
669 | void ADTOOL_AMPI_mapBufForAdjoint(struct AMPI_Request_S *ampiRequest, |
---|
670 | void* buf) { |
---|
671 | ampiRequest->buf=buf; |
---|
672 | } |
---|
673 | |
---|
674 | void ADTOOL_AMPI_Turn(void* buf, void* adjointBuf) {} |
---|
675 | |
---|
676 | void ADTOOL_AMPI_setBufForAdjoint(struct AMPI_Request_S *ampiRequest, |
---|
677 | void* buf) { |
---|
678 | /* do nothing */ |
---|
679 | } |
---|
680 | |
---|
681 | void ADTOOL_AMPI_getAdjointCount(int *count, |
---|
682 | MPI_Datatype datatype) { |
---|
683 | int dt_idx = derivedTypeIdx(datatype); |
---|
684 | if (isDerivedType(dt_idx)) *count *= getDTypeData()->num_actives[dt_idx]; |
---|
685 | } |
---|
686 | |
---|
687 | void ADTOOL_AMPI_setAdjointCount(struct AMPI_Request_S *ampiRequest) { |
---|
688 | /* for now we keep the count as is but for example in vector mode one would have to multiply by vector length */ |
---|
689 | ampiRequest->adjointCount=ampiRequest->count; |
---|
690 | } |
---|
691 | |
---|
692 | void ADTOOL_AMPI_setAdjointCountAndTempBuf(struct AMPI_Request_S *ampiRequest) { |
---|
693 | ADTOOL_AMPI_setAdjointCount(ampiRequest); |
---|
694 | ampiRequest->adjointTempBuf= |
---|
695 | ADTOOL_AMPI_allocateTempBuf(ampiRequest->adjointCount, |
---|
696 | ampiRequest->datatype, |
---|
697 | ampiRequest->comm); |
---|
698 | } |
---|
699 | |
---|
700 | void* ADTOOL_AMPI_allocateTempBuf(int adjointCount, |
---|
701 | MPI_Datatype dataType, |
---|
702 | MPI_Comm comm) { |
---|
703 | size_t s=0; |
---|
704 | void* buf; |
---|
705 | int dt_idx = derivedTypeIdx(dataType); |
---|
706 | if (dataType==AMPI_ADOUBLE) s=sizeof(revreal); |
---|
707 | else if(dataType==MPI_DOUBLE) s=sizeof(double); |
---|
708 | else if(dataType==MPI_FLOAT) s=sizeof(float); |
---|
709 | else if(isDerivedType(dt_idx)) s=getDTypeData()->p_extents[dt_idx]; |
---|
710 | else MPI_Abort(comm, MPI_ERR_TYPE); |
---|
711 | buf=malloc(adjointCount*s); |
---|
712 | assert(buf); |
---|
713 | return buf; |
---|
714 | } |
---|
715 | |
---|
716 | void ADTOOL_AMPI_releaseAdjointTempBuf(void *tempBuf) { |
---|
717 | free(tempBuf); |
---|
718 | } |
---|
719 | |
---|
720 | void* ADTOOL_AMPI_allocateTempActiveBuf(int count, |
---|
721 | MPI_Datatype datatype, |
---|
722 | MPI_Comm comm) { |
---|
723 | int dt_idx = derivedTypeIdx(datatype); |
---|
724 | if (isDerivedType(dt_idx)) { |
---|
725 | int i, j, extent, struct_offset, block_offset; |
---|
726 | MPI_Datatype blocktype; |
---|
727 | derivedTypeData* dtdata = getDTypeData(); |
---|
728 | void* buf; |
---|
729 | extent = dtdata->extents[dt_idx]; |
---|
730 | buf = malloc(count*extent); |
---|
731 | assert(buf); |
---|
732 | for (j=0;j<count;j++) { |
---|
733 | struct_offset = j*extent; |
---|
734 | for (i=0;i<dtdata->counts[dt_idx];i++) { |
---|
735 | blocktype = dtdata->arrays_of_types[dt_idx][i]; |
---|
736 | if (blocktype==MPI_UB || blocktype==MPI_LB) assert(0); |
---|
737 | block_offset = struct_offset + dtdata->arrays_of_displacements[dt_idx][i]; |
---|
738 | if (blocktype==AMPI_ADOUBLE) { |
---|
739 | new ((void*)((char*)buf + block_offset)) adouble[dtdata->arrays_of_blocklengths[dt_idx][i]]; |
---|
740 | } |
---|
741 | } |
---|
742 | } |
---|
743 | return buf; |
---|
744 | } |
---|
745 | else if (datatype==AMPI_ADOUBLE) { |
---|
746 | adouble* buf = new adouble[count]; |
---|
747 | assert(buf); |
---|
748 | return buf; |
---|
749 | } |
---|
750 | else assert(0); |
---|
751 | } |
---|
752 | |
---|
753 | void ADTOOL_AMPI_releaseTempActiveBuf(void *buf, |
---|
754 | int count, |
---|
755 | MPI_Datatype datatype) { |
---|
756 | int dt_idx = derivedTypeIdx(datatype); |
---|
757 | if (isDerivedType(dt_idx)) { |
---|
758 | int i, j, k, extent, struct_offset, block_offset; |
---|
759 | MPI_Datatype blocktype; |
---|
760 | derivedTypeData* dtdata = getDTypeData(); |
---|
761 | extent = dtdata->extents[dt_idx]; |
---|
762 | for (j=0;j<count;j++) { |
---|
763 | struct_offset = j*extent; |
---|
764 | for (i=0;i<dtdata->counts[dt_idx];i++) { |
---|
765 | blocktype = dtdata->arrays_of_types[dt_idx][i]; |
---|
766 | block_offset = struct_offset + dtdata->arrays_of_displacements[dt_idx][i]; |
---|
767 | if (blocktype==AMPI_ADOUBLE) { |
---|
768 | for (k=0;k<dtdata->arrays_of_blocklengths[dt_idx][i];k++) { |
---|
769 | ((adouble*)((char*)buf + block_offset + k*sizeof(adouble)))->~adouble(); |
---|
770 | } |
---|
771 | } |
---|
772 | } |
---|
773 | } |
---|
774 | free(buf); |
---|
775 | } |
---|
776 | else if (datatype==AMPI_ADOUBLE) delete[] (adouble*)buf; |
---|
777 | else assert(0); |
---|
778 | } |
---|
779 | |
---|
780 | void * ADTOOL_AMPI_copyActiveBuf(void* source, |
---|
781 | void* target, |
---|
782 | int count, |
---|
783 | MPI_Datatype datatype, |
---|
784 | MPI_Comm comm) { |
---|
785 | int s, k, dt_idx = derivedTypeIdx(datatype); |
---|
786 | if (ADTOOL_AMPI_isActiveType(datatype)==AMPI_ACTIVE) { |
---|
787 | for (k=0;k<count;k++) ((adouble*)target)[k] = ((adouble*)source)[k]; |
---|
788 | } |
---|
789 | else if (isDerivedType(dt_idx)) { |
---|
790 | int i, j, extent, struct_offset, block_offset; |
---|
791 | MPI_Datatype blocktype; |
---|
792 | derivedTypeData* dtdata = getDTypeData(); |
---|
793 | extent = dtdata->extents[dt_idx]; |
---|
794 | for (j=0;j<count;j++) { |
---|
795 | struct_offset = j*extent; |
---|
796 | for (i=0;i<dtdata->counts[dt_idx];i++) { |
---|
797 | blocktype = dtdata->arrays_of_types[dt_idx][i]; |
---|
798 | if (blocktype==MPI_UB || blocktype==MPI_LB) assert(0); |
---|
799 | block_offset = struct_offset + (int)dtdata->arrays_of_displacements[dt_idx][i]; |
---|
800 | if (ADTOOL_AMPI_isActiveType(blocktype)==AMPI_ACTIVE) { |
---|
801 | for (k=0;k<dtdata->arrays_of_blocklengths[dt_idx][i];k++) { |
---|
802 | ((adouble*)((char*)target + block_offset))[k] = ((adouble*)((char*)source + block_offset))[k]; |
---|
803 | } |
---|
804 | } |
---|
805 | else { |
---|
806 | if (blocktype==MPI_DOUBLE) s = sizeof(double); |
---|
807 | else if (blocktype==MPI_INT) s = sizeof(int); |
---|
808 | else if (blocktype==MPI_FLOAT) s = sizeof(float); |
---|
809 | else if (blocktype==MPI_CHAR) s = sizeof(char); |
---|
810 | memcpy((char*)target + block_offset, |
---|
811 | (char*)source + block_offset, |
---|
812 | s*dtdata->arrays_of_blocklengths[dt_idx][i]); |
---|
813 | } |
---|
814 | } |
---|
815 | } |
---|
816 | } |
---|
817 | else assert(0); |
---|
818 | return target; |
---|
819 | } |
---|
820 | |
---|
821 | void ADTOOL_AMPI_incrementAdjoint(int adjointCount, |
---|
822 | MPI_Datatype datatype, |
---|
823 | MPI_Comm comm, |
---|
824 | void* target, |
---|
825 | void *source, |
---|
826 | void *idx) { |
---|
827 | for (unsigned int i=0; i<adjointCount; ++i) ((revreal*)(target))[i]+=((revreal*)(source))[i]; |
---|
828 | } |
---|
829 | |
---|
830 | void ADTOOL_AMPI_multiplyAdjoint(int adjointCount, |
---|
831 | MPI_Datatype datatype, |
---|
832 | MPI_Comm comm, |
---|
833 | void* target, |
---|
834 | void *source, |
---|
835 | void *idx) { |
---|
836 | for (unsigned int i=0; i<adjointCount; ++i) ((revreal*)(target))[i]*=((revreal*)(source))[i]; |
---|
837 | } |
---|
838 | |
---|
839 | void ADTOOL_AMPI_divideAdjoint(int adjointCount, |
---|
840 | MPI_Datatype datatype, |
---|
841 | MPI_Comm comm, |
---|
842 | void* target, |
---|
843 | void *source, |
---|
844 | void *idx) { |
---|
845 | for (unsigned int i=0; i<adjointCount; ++i) ((revreal*)(target))[i]/=((revreal*)(source))[i]; |
---|
846 | } |
---|
847 | |
---|
848 | void ADTOOL_AMPI_equalAdjoints(int adjointCount, |
---|
849 | MPI_Datatype datatype, |
---|
850 | MPI_Comm comm, |
---|
851 | void* target, |
---|
852 | void *source1, |
---|
853 | void *source2, |
---|
854 | void *idx) { |
---|
855 | for (unsigned int i=0; i<adjointCount; ++i) ((revreal*)(target))[i]=((revreal*)(source1))[i]==((revreal*)(source2))[i]; |
---|
856 | } |
---|
857 | |
---|
858 | void ADTOOL_AMPI_nullifyAdjoint(int adjointCount, |
---|
859 | MPI_Datatype datatype, |
---|
860 | MPI_Comm comm, |
---|
861 | void* target) { |
---|
862 | for (unsigned int i=0; i<adjointCount; ++i) ((revreal*)(target))[i]=0.0; |
---|
863 | } |
---|
864 | |
---|
865 | AMPI_Activity ADTOOL_AMPI_isActiveType(MPI_Datatype datatype) { |
---|
866 | if (datatype==AMPI_ADOUBLE || datatype==AMPI_AFLOAT) return AMPI_ACTIVE; |
---|
867 | return AMPI_PASSIVE; |
---|
868 | }; |
---|
869 | |
---|
870 | void ADTOOL_AMPI_setupTypes() { |
---|
871 | MPI_Type_contiguous(1,MPI_DOUBLE,&I_ADOUBLE); |
---|
872 | MPI_Type_commit(&I_ADOUBLE); |
---|
873 | MPI_Type_contiguous(1,MPI_FLOAT,&I_AFLOAT); |
---|
874 | MPI_Type_commit(&I_AFLOAT); |
---|
875 | }; |
---|
876 | |
---|
877 | void ADTOOL_AMPI_cleanupTypes() { |
---|
878 | if (AMPI_ADOUBLE!=MPI_DATATYPE_NULL) MPI_Type_free(&I_ADOUBLE); |
---|
879 | if (AMPI_AFLOAT !=MPI_DATATYPE_NULL) MPI_Type_free(&I_AFLOAT); |
---|
880 | } |
---|
881 | |
---|
882 | MPI_Datatype ADTOOL_AMPI_FW_rawType(MPI_Datatype datatype) { |
---|
883 | int dt_idx = derivedTypeIdx(datatype); |
---|
884 | if (datatype==AMPI_ADOUBLE) return MPI_DOUBLE; |
---|
885 | else if (datatype==AMPI_AFLOAT) return MPI_FLOAT; |
---|
886 | else if (isDerivedType(dt_idx)) return getDTypeData()->packed_types[dt_idx]; |
---|
887 | else return datatype; |
---|
888 | } |
---|
889 | |
---|
890 | MPI_Datatype ADTOOL_AMPI_BW_rawType(MPI_Datatype datatype) { |
---|
891 | int dt_idx = derivedTypeIdx(datatype); |
---|
892 | if (datatype==AMPI_ADOUBLE) return MPI_DOUBLE; |
---|
893 | else if (datatype==AMPI_AFLOAT) return MPI_FLOAT; |
---|
894 | else if (isDerivedType(dt_idx)) return MPI_DOUBLE; |
---|
895 | else return datatype; |
---|
896 | } |
---|
897 | |
---|
898 | // tracing |
---|
899 | |
---|
900 | int AMPI_Send(void* buf, |
---|
901 | int count, |
---|
902 | MPI_Datatype datatype, |
---|
903 | int src, |
---|
904 | int tag, |
---|
905 | AMPI_PairedWith pairedWith, |
---|
906 | MPI_Comm comm) { |
---|
907 | return FW_AMPI_Send(buf, |
---|
908 | count, |
---|
909 | datatype, |
---|
910 | src, |
---|
911 | tag, |
---|
912 | pairedWith, |
---|
913 | comm); |
---|
914 | } |
---|
915 | |
---|
916 | int AMPI_Recv(void* buf, |
---|
917 | int count, |
---|
918 | MPI_Datatype datatype, |
---|
919 | int src, |
---|
920 | int tag, |
---|
921 | AMPI_PairedWith pairedWith, |
---|
922 | MPI_Comm comm, |
---|
923 | MPI_Status* status) { |
---|
924 | return FW_AMPI_Recv(buf, |
---|
925 | count, |
---|
926 | datatype, |
---|
927 | src, |
---|
928 | tag, |
---|
929 | pairedWith, |
---|
930 | comm, |
---|
931 | status); |
---|
932 | } |
---|
933 | |
---|
934 | int AMPI_Isend (void* buf, |
---|
935 | int count, |
---|
936 | MPI_Datatype datatype, |
---|
937 | int dest, |
---|
938 | int tag, |
---|
939 | AMPI_PairedWith pairedWith, |
---|
940 | MPI_Comm comm, |
---|
941 | AMPI_Request* request) { |
---|
942 | return FW_AMPI_Isend(buf, |
---|
943 | count, |
---|
944 | datatype, |
---|
945 | dest, |
---|
946 | tag, |
---|
947 | pairedWith, |
---|
948 | comm, |
---|
949 | request); |
---|
950 | } |
---|
951 | |
---|
952 | int AMPI_Irecv (void* buf, |
---|
953 | int count, |
---|
954 | MPI_Datatype datatype, |
---|
955 | int src, |
---|
956 | int tag, |
---|
957 | AMPI_PairedWith pairedWith, |
---|
958 | MPI_Comm comm, |
---|
959 | AMPI_Request* request) { |
---|
960 | return FW_AMPI_Irecv(buf, |
---|
961 | count, |
---|
962 | datatype, |
---|
963 | src, |
---|
964 | tag, |
---|
965 | pairedWith, |
---|
966 | comm, |
---|
967 | request); |
---|
968 | } |
---|
969 | |
---|
970 | int AMPI_Wait(AMPI_Request *request, |
---|
971 | MPI_Status *status) { |
---|
972 | return FW_AMPI_Wait(request, |
---|
973 | status); |
---|
974 | } |
---|
975 | |
---|
976 | int AMPI_Barrier(MPI_Comm comm) { |
---|
977 | return FW_AMPI_Barrier(comm); |
---|
978 | } |
---|
979 | |
---|
980 | int AMPI_Gather(void *sendbuf, |
---|
981 | int sendcnt, |
---|
982 | MPI_Datatype sendtype, |
---|
983 | void *recvbuf, |
---|
984 | int recvcnt, |
---|
985 | MPI_Datatype recvtype, |
---|
986 | int root, |
---|
987 | MPI_Comm comm) { |
---|
988 | return FW_AMPI_Gather(sendbuf, |
---|
989 | sendcnt, |
---|
990 | sendtype, |
---|
991 | recvbuf, |
---|
992 | recvcnt, |
---|
993 | recvtype, |
---|
994 | root, |
---|
995 | comm); |
---|
996 | } |
---|
997 | |
---|
998 | int AMPI_Scatter(void *sendbuf, |
---|
999 | int sendcnt, |
---|
1000 | MPI_Datatype sendtype, |
---|
1001 | void *recvbuf, |
---|
1002 | int recvcnt, |
---|
1003 | MPI_Datatype recvtype, |
---|
1004 | int root, MPI_Comm comm) { |
---|
1005 | return FW_AMPI_Scatter(sendbuf, |
---|
1006 | sendcnt, |
---|
1007 | sendtype, |
---|
1008 | recvbuf, |
---|
1009 | recvcnt, |
---|
1010 | recvtype, |
---|
1011 | root, |
---|
1012 | comm); |
---|
1013 | } |
---|
1014 | |
---|
1015 | int AMPI_Allgather(void *sendbuf, |
---|
1016 | int sendcnt, |
---|
1017 | MPI_Datatype sendtype, |
---|
1018 | void *recvbuf, |
---|
1019 | int recvcnt, |
---|
1020 | MPI_Datatype recvtype, |
---|
1021 | MPI_Comm comm) { |
---|
1022 | return FW_AMPI_Allgather(sendbuf, |
---|
1023 | sendcnt, |
---|
1024 | sendtype, |
---|
1025 | recvbuf, |
---|
1026 | recvcnt, |
---|
1027 | recvtype, |
---|
1028 | comm); |
---|
1029 | } |
---|
1030 | |
---|
1031 | int AMPI_Gatherv(void *sendbuf, |
---|
1032 | int sendcnt, |
---|
1033 | MPI_Datatype sendtype, |
---|
1034 | void *recvbuf, |
---|
1035 | int *recvcnts, |
---|
1036 | int *displs, |
---|
1037 | MPI_Datatype recvtype, |
---|
1038 | int root, |
---|
1039 | MPI_Comm comm) { |
---|
1040 | return FW_AMPI_Gatherv(sendbuf, |
---|
1041 | sendcnt, |
---|
1042 | sendtype, |
---|
1043 | recvbuf, |
---|
1044 | recvcnts, |
---|
1045 | displs, |
---|
1046 | recvtype, |
---|
1047 | root, |
---|
1048 | comm); |
---|
1049 | } |
---|
1050 | |
---|
1051 | int AMPI_Scatterv(void *sendbuf, |
---|
1052 | int *sendcnts, |
---|
1053 | int *displs, |
---|
1054 | MPI_Datatype sendtype, |
---|
1055 | void *recvbuf, |
---|
1056 | int recvcnt, |
---|
1057 | MPI_Datatype recvtype, |
---|
1058 | int root, MPI_Comm comm) { |
---|
1059 | return FW_AMPI_Scatterv(sendbuf, |
---|
1060 | sendcnts, |
---|
1061 | displs, |
---|
1062 | sendtype, |
---|
1063 | recvbuf, |
---|
1064 | recvcnt, |
---|
1065 | recvtype, |
---|
1066 | root, |
---|
1067 | comm); |
---|
1068 | } |
---|
1069 | |
---|
1070 | int AMPI_Allgatherv(void *sendbuf, |
---|
1071 | int sendcnt, |
---|
1072 | MPI_Datatype sendtype, |
---|
1073 | void *recvbuf, |
---|
1074 | int *recvcnts, |
---|
1075 | int *displs, |
---|
1076 | MPI_Datatype recvtype, |
---|
1077 | MPI_Comm comm) { |
---|
1078 | return FW_AMPI_Allgatherv(sendbuf, |
---|
1079 | sendcnt, |
---|
1080 | sendtype, |
---|
1081 | recvbuf, |
---|
1082 | recvcnts, |
---|
1083 | displs, |
---|
1084 | recvtype, |
---|
1085 | comm); |
---|
1086 | } |
---|
1087 | |
---|
1088 | int AMPI_Bcast(void* buf, |
---|
1089 | int count, |
---|
1090 | MPI_Datatype datatype, |
---|
1091 | int root, |
---|
1092 | MPI_Comm comm) { |
---|
1093 | return FW_AMPI_Bcast(buf, |
---|
1094 | count, |
---|
1095 | datatype, |
---|
1096 | root, |
---|
1097 | comm); |
---|
1098 | } |
---|
1099 | |
---|
1100 | int AMPI_Reduce(void* sbuf, |
---|
1101 | void* rbuf, |
---|
1102 | int count, |
---|
1103 | MPI_Datatype datatype, |
---|
1104 | MPI_Op op, |
---|
1105 | int root, |
---|
1106 | MPI_Comm comm) { |
---|
1107 | return FWB_AMPI_Reduce(sbuf, |
---|
1108 | rbuf, |
---|
1109 | count, |
---|
1110 | datatype, |
---|
1111 | op, |
---|
1112 | root, |
---|
1113 | comm); |
---|
1114 | } |
---|
1115 | |
---|
1116 | int AMPI_Allreduce(void* sbuf, |
---|
1117 | void* rbuf, |
---|
1118 | int count, |
---|
1119 | MPI_Datatype datatype, |
---|
1120 | MPI_Op op, |
---|
1121 | MPI_Comm comm) { |
---|
1122 | return FW_AMPI_Allreduce(sbuf, |
---|
1123 | rbuf, |
---|
1124 | count, |
---|
1125 | datatype, |
---|
1126 | op, |
---|
1127 | comm); |
---|
1128 | } |
---|
1129 | |
---|
1130 | #endif |
---|