source: trunk/ADOL-C/src/externfcts.cpp @ 708

Last change on this file since 708 was 608, checked in by kulshres, 4 years ago

merge branch 'master' of 'gitclone' into 'svn'

The following commits have been merged:

commit 48aee4916d2ed907b772dbd1c1d6ce46cb273651
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Mon Aug 10 21:28:49 2015 +0200

modernise configure.ac

disable static library building. this causes more
problems than it solves.

commit 47332811a4c5c27cb884f75792c910c813378ef4
Merge: 0ee77fd 0d4eeec
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Thu Aug 6 22:33:46 2015 +0200

Merge branch 'edf-memory'

This is to reduce memory allocation and copying in ext_diff_fct

commit 0ee77fd33a1d6d55fcc67ad419937b2cb777ed4e
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Wed Aug 5 15:49:33 2015 +0200

Remove empty file from dist

this can be created during compilation

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit 51505c34571aa61b4b21ebce6cdf1728ff56ddaa
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Tue Aug 4 17:36:26 2015 +0200

adouble(const adub&) should match operator=(const adub&)

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit 03e49097aa0455337647d280cda530064987e6b9
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Fri Jul 3 11:17:53 2015 +0200

make a define for default contiguous locations

this is not needed during compilation of the library
only during compilation of user-code, if the user
wants to have all adouble* allocations to have
contiguous locations.

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit f00cfb5d0dc8a8993581fd8c08dd8c6c5cd23248
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Wed Jul 1 11:56:39 2015 +0200

rename adolc_lie.h to drivers.h

the old name led to an include <adolc/lie/adolc_lie.h>
which looks highly redundant.
new name makes for include <adolc/lie/drivers.h>

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit fcf78bf8426a227750a0bcaa32ff65e57ef329b8
Author: franke <mirko.franke@…>
Date: Wed May 20 16:39:16 2015 +0200

added Lie drivers

commit 0d4eeec7b6212aa64c8997db8a511f81b604b3e1
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Fri Jun 26 14:19:41 2015 +0200

minimise extra memory requirement and copies in ext_diff

This should in theory reduce the amount of memory required
to run an external function with the old interface. It also
reduced some copying operations.

Fingers crossed that we've not broken checkpointing and/or
fixpoint iterations.

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit 8811f02a4d4a15946a18f7513ab17dada66509c3
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Fri May 22 12:41:45 2015 +0200

try to streamline data copies in ext_diff_v2

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

  • Property svn:keywords set to Author Date Id Revision
File size: 8.6 KB
Line 
1/*----------------------------------------------------------------------------
2 ADOL-C -- Automatic Differentiation by Overloading in C++
3 File:     externfcts.cpp
4 Revision: $Id: externfcts.cpp 608 2015-08-10 20:06:55Z kulshres $
5 Contents: functions and data types for extern (differentiated) functions.
6 
7 Copyright (c) Andreas Kowarz, Jean Utke
8 
9 This file is part of ADOL-C. This software is provided as open source.
10 Any use, reproduction, or distribution of the software constitutes
11 recipient's acceptance of the terms of the accompanying license file.
12         
13----------------------------------------------------------------------------*/
14
15#include "taping_p.h"
16#include <adolc/externfcts.h>
17#include "externfcts_p.h"
18#include <adolc/adouble.h>
19#include <adolc/adalloc.h>
20#include "oplate.h"
21#include "buffer_temp.h"
22
23#include <cstring>
24
25/****************************************************************************/
26/*                                    extern differentiated functions stuff */
27
28#define ADOLC_BUFFER_TYPE \
29   Buffer< ext_diff_fct, EDFCTS_BLOCK_SIZE >
30static ADOLC_BUFFER_TYPE buffer(edf_zero);
31
32void edf_zero(ext_diff_fct *edf) {
33  // sanity settings
34  edf->function=0;
35  edf->function_iArr=0;
36
37  edf->zos_forward=0;
38  edf->zos_forward_iArr=0;
39
40  edf->fos_forward=0;
41  edf->fos_forward_iArr=0;
42  edf->hos_forward=0;
43  edf->hos_forward_iArr=0;
44  edf->fov_forward=0;
45  edf->fov_forward_iArr=0;
46  edf->hov_forward=0;
47  edf->hov_forward_iArr=0;
48
49  edf->fos_reverse=0;
50  edf->fos_reverse_iArr=0;
51  edf->hos_reverse=0;
52  edf->hos_reverse_iArr=0;
53  edf->fov_reverse=0;
54  edf->fov_reverse_iArr=0;
55  edf->hov_reverse=0;
56  edf->hov_reverse_iArr=0;
57
58  edf->dp_x=0; 
59  edf->dp_X=0; 
60  edf->dpp_X=0;
61  edf->dppp_X=0;
62  edf->dp_y=0; 
63  edf->dp_Y=0; 
64  edf->dpp_Y=0; 
65  edf->dppp_Y=0;
66
67  edf->dp_U=0; 
68  edf->dpp_U=0; 
69  edf->dp_Z=0; 
70  edf->dpp_Z=0; 
71  edf->dppp_Z=0;
72
73  edf->spp_nz=0;
74
75  edf->max_n=0;
76  edf->max_m=0;
77
78  edf->nestedAdolc=true;
79  edf->dp_x_changes=true;
80  edf->dp_y_priorRequired=true;
81  if (edf->allmem != NULL)
82      free(edf->allmem);
83  edf->allmem=NULL;
84}
85
86ext_diff_fct *reg_ext_fct(ADOLC_ext_fct ext_fct) {
87  // this call sets edf->index:
88  ext_diff_fct * edf=buffer.append();
89  edf->function=ext_fct;
90  return edf;
91}
92
93ext_diff_fct *reg_ext_fct(ADOLC_ext_fct_iArr ext_fct) {
94  // this call sets  edf->index:
95  ext_diff_fct * edf=buffer.append();
96  edf->function_iArr=ext_fct;
97  return edf;
98}
99
100/*
101 * The externfcts.h had a comment previously that said the following:
102 ****
103 * The user has to preallocate the variables and set the pointers for any of the call back functions
104 * that will be called during trace interpretation.
105 * The dimensions given below correspond to the formal arguments in the call back funtions signatures above.
106 * If the dimensions n and m change between multiple calls to the same external function, then the variables
107 * have to be preallocation with the maximum of the respective dimension values.
108 * The dp_x and dp_y pointers have to be valid during both, the tracing phase and the trace interpretation;
109 * all the other pointers are required to be valid only for the trace interpretation.
110 ****
111 * Doing this now internally saves the user from doing it, as well as updating
112 * when using multiple problem sizes.
113 */
114static void update_ext_fct_memory(ext_diff_fct *edfct, int n, int m) {
115  if (edfct->max_n<n || edfct->max_m<m) {
116      /* We need memory stored in the edfct dp_x[n], dp_X[n], dp_Z[n],
117       * dp_y[m], dp_Y[m], dp_U[m], dpp_X[n][n], dpp_Y[m][n],
118       * dpp_U[m][m], dpp_Z[m][n]. We have no implementation for higher order
119       * so leave it out.
120       */
121      size_t totalmem = (3*n + 3*m /*+ n*n + 2*n*m + m*m*/)*sizeof(double)
122                         + (3*m+n)*sizeof(double*);
123      char *tmp;
124      if (edfct->allmem != NULL) free(edfct->allmem);
125      edfct->allmem = (char*)malloc(totalmem);
126      memset(edfct->allmem,0,totalmem);
127      edfct->dp_x = (double*)edfct->allmem;
128      edfct->dp_y = edfct->dp_x+n;
129      edfct->dp_X = edfct->dp_y+m;
130      edfct->dp_Y = edfct->dp_X+n;
131      edfct->dp_U = edfct->dp_Y+m;
132      edfct->dp_Z = edfct->dp_U+m;
133      tmp = (char*)(edfct->dp_Z+n);
134      edfct->dpp_X = (double**)tmp;
135      edfct->dpp_Y = edfct->dpp_X + n;
136      edfct->dpp_U = edfct->dpp_Y + m;
137      edfct->dpp_Z = edfct->dpp_U + m;
138      /*
139      tmp = populate_dpp(&edfct->dpp_X, tmp, n,n);
140      tmp = populate_dpp(&edfct->dpp_Y, tmp, m,n);
141      tmp = populate_dpp(&edfct->dpp_U, tmp, m,m);
142      tmp = populate_dpp(&edfct->dpp_Z, tmp, m,n);
143      */
144  }
145
146  edfct->max_n=(edfct->max_n<n)?n:edfct->max_n;
147  edfct->max_m=(edfct->max_m<m)?m:edfct->max_m;
148 
149}
150
151void call_ext_fct_commonPrior(ext_diff_fct *edfct,
152                              int n, adouble *xa,
153                              int m, adouble *ya,
154                              int &numVals,
155                              double *&vals,
156                              int &oldTraceFlag) {
157  ADOLC_OPENMP_THREAD_NUMBER;
158  ADOLC_OPENMP_GET_THREAD_NUMBER;
159
160  if (xa[n-1].loc()-xa[0].loc()!=(unsigned)n-1 || ya[m-1].loc()-ya[0].loc()!=(unsigned)m-1) fail(ADOLC_EXT_DIFF_LOCATIONGAP);
161  if (edfct==NULL) fail(ADOLC_EXT_DIFF_NULLPOINTER_STRUCT);
162  if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) {
163    ADOLC_PUT_LOCINT(edfct->index);
164    ADOLC_PUT_LOCINT(n);
165    ADOLC_PUT_LOCINT(m);
166    ADOLC_PUT_LOCINT(xa[0].loc());
167    ADOLC_PUT_LOCINT(ya[0].loc());
168    ADOLC_PUT_LOCINT(0);               /* keep space for checkpointing index */
169    oldTraceFlag=ADOLC_CURRENT_TAPE_INFOS.traceFlag;
170    ADOLC_CURRENT_TAPE_INFOS.traceFlag=0;
171  }
172  else oldTraceFlag=0;
173  if (edfct->nestedAdolc) {
174    numVals = ADOLC_GLOBAL_TAPE_VARS.storeSize;
175    vals = new double[numVals];
176    memcpy(vals, ADOLC_GLOBAL_TAPE_VARS.store,
177           numVals * sizeof(double));
178  }
179
180  update_ext_fct_memory(edfct,n,m);
181
182  /* update taylor buffer if keep != 0 ; possible double counting as in
183   * adouble.cpp => correction in taping.c */
184
185  if (oldTraceFlag != 0) {
186    if (edfct->dp_x_changes) ADOLC_CURRENT_TAPE_INFOS.numTays_Tape += n;
187    if (edfct->dp_y_priorRequired) ADOLC_CURRENT_TAPE_INFOS.numTays_Tape += m;
188    if (ADOLC_CURRENT_TAPE_INFOS.keepTaylors) {
189      if (edfct->dp_x_changes) for (int i=0; i<n; ++i) ADOLC_WRITE_SCAYLOR(xa[i].getValue());
190      if (edfct->dp_y_priorRequired) for (int i=0; i<m; ++i) ADOLC_WRITE_SCAYLOR(ya[i].getValue());
191    }
192  }
193
194  for (int i=0; i<n; ++i) edfct->dp_x[i]=xa[i].getValue();
195  if (edfct->dp_y_priorRequired) for (int i=0; i<m; ++i) edfct->dp_y[i]=ya[i].getValue();
196}
197
198void call_ext_fct_commonPost(ext_diff_fct *edfct,
199                              int n, adouble *xa,
200                              int m, adouble *ya,
201                              int &numVals,
202                              double *&vals,
203                              int &oldTraceFlag) {
204  ADOLC_OPENMP_THREAD_NUMBER;
205  ADOLC_OPENMP_GET_THREAD_NUMBER;
206  if (edfct->nestedAdolc) {
207    memcpy(ADOLC_GLOBAL_TAPE_VARS.store, vals,
208        numVals * sizeof(double));
209    delete[] vals;
210    vals=0;
211  }
212  /* write back */
213  if (edfct->dp_x_changes) for (int i=0; i<n; ++i) xa[i].setValue(edfct->dp_x[i]);
214  for (int i=0; i<m; ++i) ya[i].setValue(edfct->dp_y[i]);
215  ADOLC_CURRENT_TAPE_INFOS.traceFlag=oldTraceFlag;
216}
217
218int call_ext_fct(ext_diff_fct *edfct,
219                 int n, adouble *xa,
220                 int m, adouble *ya) {
221  int ret;
222  int numVals = 0;
223  double *vals = NULL;
224  int oldTraceFlag;
225  ADOLC_OPENMP_THREAD_NUMBER;
226  ADOLC_OPENMP_GET_THREAD_NUMBER;
227  if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) put_op(ext_diff);
228  call_ext_fct_commonPrior (edfct,n,xa,m,ya,numVals,vals,oldTraceFlag);
229  ret=edfct->function(n, edfct->dp_x, m, edfct->dp_y);
230  call_ext_fct_commonPost (edfct,n,xa,m,ya,numVals,vals,oldTraceFlag);
231  return ret;
232}
233
234int call_ext_fct(ext_diff_fct *edfct,
235                 int iArrLength, int *iArr,
236                 int n, adouble *xa,
237                 int m, adouble *ya) {
238  int ret;
239  int numVals = 0;
240  double *vals = NULL;
241  int oldTraceFlag;
242  ADOLC_OPENMP_THREAD_NUMBER;
243  ADOLC_OPENMP_GET_THREAD_NUMBER;
244  if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) {
245    put_op_reserve(ext_diff_iArr,iArrLength+2);
246    ADOLC_PUT_LOCINT(iArrLength);
247    for (int i=0; i<iArrLength; ++i) ADOLC_PUT_LOCINT(iArr[i]);
248    ADOLC_PUT_LOCINT(iArrLength); // do it again so we can read in either direction
249  }
250  call_ext_fct_commonPrior (edfct,n,xa,m,ya,numVals,vals,oldTraceFlag);
251  ret=edfct->function_iArr(iArrLength, iArr, n, edfct->dp_x, m, edfct->dp_y);
252  call_ext_fct_commonPost (edfct,n,xa,m,ya,numVals,vals,oldTraceFlag);
253  return ret;
254}
255
256ext_diff_fct *get_ext_diff_fct( int index ) {
257    return buffer.getElement(index);
258}
259
260/****************************************************************************/
261/*                                                               THAT'S ALL */
262
Note: See TracBrowser for help on using the repository browser.