source: trunk/ADOL-C/src/externfcts2.cpp @ 708

Last change on this file since 708 was 608, checked in by kulshres, 4 years ago

merge branch 'master' of 'gitclone' into 'svn'

The following commits have been merged:

commit 48aee4916d2ed907b772dbd1c1d6ce46cb273651
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Mon Aug 10 21:28:49 2015 +0200

modernise configure.ac

disable static library building. this causes more
problems than it solves.

commit 47332811a4c5c27cb884f75792c910c813378ef4
Merge: 0ee77fd 0d4eeec
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Thu Aug 6 22:33:46 2015 +0200

Merge branch 'edf-memory'

This is to reduce memory allocation and copying in ext_diff_fct

commit 0ee77fd33a1d6d55fcc67ad419937b2cb777ed4e
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Wed Aug 5 15:49:33 2015 +0200

Remove empty file from dist

this can be created during compilation

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit 51505c34571aa61b4b21ebce6cdf1728ff56ddaa
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Tue Aug 4 17:36:26 2015 +0200

adouble(const adub&) should match operator=(const adub&)

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit 03e49097aa0455337647d280cda530064987e6b9
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Fri Jul 3 11:17:53 2015 +0200

make a define for default contiguous locations

this is not needed during compilation of the library
only during compilation of user-code, if the user
wants to have all adouble* allocations to have
contiguous locations.

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit f00cfb5d0dc8a8993581fd8c08dd8c6c5cd23248
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Wed Jul 1 11:56:39 2015 +0200

rename adolc_lie.h to drivers.h

the old name led to an include <adolc/lie/adolc_lie.h>
which looks highly redundant.
new name makes for include <adolc/lie/drivers.h>

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit fcf78bf8426a227750a0bcaa32ff65e57ef329b8
Author: franke <mirko.franke@…>
Date: Wed May 20 16:39:16 2015 +0200

added Lie drivers

commit 0d4eeec7b6212aa64c8997db8a511f81b604b3e1
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Fri Jun 26 14:19:41 2015 +0200

minimise extra memory requirement and copies in ext_diff

This should in theory reduce the amount of memory required
to run an external function with the old interface. It also
reduced some copying operations.

Fingers crossed that we've not broken checkpointing and/or
fixpoint iterations.

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

commit 8811f02a4d4a15946a18f7513ab17dada66509c3
Author: Kshitij Kulshreshtha <kshitij@…>
Date: Fri May 22 12:41:45 2015 +0200

try to streamline data copies in ext_diff_v2

Signed-off-by: Kshitij Kulshreshtha <kshitij@…>

File size: 6.7 KB
Line 
1/*----------------------------------------------------------------------------
2 ADOL-C -- Automatic Differentiation by Overloading in C++
3 File:     externfcts2.cpp
4 Revision: $Id$
5 Contents: functions and data types for extern (differentiated) functions.
6
7 Copyright (c) Kshitij Kulshreshtha
8
9 This file is part of ADOL-C. This software is provided as open source.
10 Any use, reproduction, or distribution of the software constitutes
11 recipient's acceptance of the terms of the accompanying license file.
12
13----------------------------------------------------------------------------*/
14
15#include "taping_p.h"
16#include <adolc/externfcts2.h>
17#include "externfcts_p.h"
18#include <adolc/adouble.h>
19#include <adolc/adalloc.h>
20#include "oplate.h"
21#include "buffer_temp.h"
22
23#include <cstring>
24
25/****************************************************************************/
26/*                                    extern differentiated functions stuff */
27
28#define ADOLC_BUFFER_TYPE \
29   Buffer< ext_diff_fct_v2, EDFCTS_BLOCK_SIZE >
30
31static ADOLC_BUFFER_TYPE buffer(edf_zero);
32
33void edf_zero(ext_diff_fct_v2 *edf) {
34  // sanity settings
35  edf->function=0;
36  edf->zos_forward=0;
37  edf->fos_forward=0;
38  edf->fov_forward=0;
39  edf->fos_reverse=0;
40  edf->fov_reverse=0;
41  edf->x = 0;
42  edf->y = 0;
43  edf->xp = 0;
44  edf->yp = 0;
45  edf->Xp = 0;
46  edf->Yp = 0;
47  edf->up = 0;
48  edf->zp = 0;
49  edf->Up = 0;
50  edf->Zp = 0;
51  edf->max_nin = 0;
52  edf->max_nout = 0;
53  edf->max_insz = 0;
54  edf->max_outsz = 0;
55  edf->nestedAdolc=false;
56  edf->dp_x_changes=true;
57  edf->dp_y_priorRequired=true;
58  edf->context = NULL;
59  if (edf->allmem != NULL)
60      free(edf->allmem);
61  edf->allmem=NULL;
62}
63
64ext_diff_fct_v2 *reg_ext_fct(ADOLC_ext_fct_v2 *ext_fct) {
65    ext_diff_fct_v2 *edf = buffer.append();
66    edf->function = ext_fct;
67    return edf;
68}
69
70static void update_ext_fct_memory(ext_diff_fct_v2 *edfct, int nin, int nout, int *insz, int *outsz) {
71    int m_isz=0, m_osz=0;
72    int i,j;
73    for(i=0;i<nin;i++)
74        m_isz=(m_isz<insz[i])?insz[i]:m_isz;
75    for(i=0;i<nout;i++)
76        m_osz=(m_osz<outsz[i])?outsz[i]:m_osz;
77    if (edfct->max_nin<nin || edfct->max_nout<nout || edfct->max_insz<m_isz || edfct->max_outsz<m_osz) {
78        char* tmp;
79        size_t p = nin*m_isz, q = nout*m_osz;
80        size_t totalmem =
81            (3*nin*m_isz + 3*nout*m_osz
82             // + nin*m_isz*p + nout*m_osz*p
83             // + q*nout*m_osz + q*nin*m_isz
84            )*sizeof(double)
85            + (3*nin + 3*nout + nin*m_isz + nout*m_osz
86               + q*nout + q*nin)*sizeof(double*)
87            + (nin + nout + 2*q)*sizeof(double**);
88        if (edfct->allmem != NULL) free(edfct->allmem);
89        edfct->allmem=(char*)malloc(totalmem);
90        memset(edfct->allmem,0,totalmem);
91        tmp = edfct->allmem;
92        tmp = populate_dpp(&edfct->x,tmp,nin,m_isz);
93        tmp = populate_dpp(&edfct->y,tmp,nout,m_osz);
94        tmp = populate_dpp(&edfct->xp,tmp,nin,m_isz);
95        tmp = populate_dpp(&edfct->yp,tmp,nout,m_osz);
96        tmp = populate_dpp(&edfct->up,tmp,nout,m_osz);
97        tmp = populate_dpp(&edfct->zp,tmp,nin,m_isz);
98        tmp = populate_dppp_nodata(&edfct->Xp,tmp,nin,m_isz);
99        tmp = populate_dppp_nodata(&edfct->Yp,tmp,nout,m_osz);
100        tmp = populate_dppp_nodata(&edfct->Up,tmp,nout,m_osz);
101        tmp = populate_dppp_nodata(&edfct->Zp,tmp,nin,m_isz);
102    }
103    edfct->max_nin=(edfct->max_nin<nin)?nin:edfct->max_nin;
104    edfct->max_nout=(edfct->max_nout<nout)?nout:edfct->max_nout;
105    edfct->max_insz=(edfct->max_insz<m_isz)?m_isz:edfct->max_insz;
106    edfct->max_outsz=(edfct->max_outsz<m_osz)?m_osz:edfct->max_outsz;
107}
108
109int call_ext_fct(ext_diff_fct_v2 *edfct,
110                 int iArrLen, int* iArr,
111                 int nin, int nout,
112                 int *insz, adouble **x,
113                 int *outsz, adouble **y) {
114    int ret;
115    int oldTraceFlag;
116    int i,j; size_t numVals;
117    double *vals;
118    ADOLC_OPENMP_THREAD_NUMBER;
119    ADOLC_OPENMP_GET_THREAD_NUMBER;
120    if (ADOLC_CURRENT_TAPE_INFOS.traceFlag) {
121        put_op_reserve(ext_diff_v2, 2*(nin+nout)+iArrLen);
122        ADOLC_PUT_LOCINT(edfct->index);
123        ADOLC_PUT_LOCINT(iArrLen);
124        for(i=0;i<iArrLen;i++)
125            ADOLC_PUT_LOCINT(iArr[i]);
126        ADOLC_PUT_LOCINT(iArrLen);
127        ADOLC_PUT_LOCINT(nin);
128        ADOLC_PUT_LOCINT(nout);
129        for (i=0;i<nin;i++) {
130            if (x[i][insz[i]-1].loc()-x[i][0].loc() != (unsigned)insz[i]-1) fail(ADOLC_EXT_DIFF_LOCATIONGAP);
131            ADOLC_PUT_LOCINT(insz[i]);
132            ADOLC_PUT_LOCINT(x[i][0].loc());
133        }
134        for (i=0;i<nout;i++) {
135            if (y[i][outsz[i]-1].loc()-y[i][0].loc() != (unsigned)outsz[i]-1) fail(ADOLC_EXT_DIFF_LOCATIONGAP);
136            ADOLC_PUT_LOCINT(outsz[i]);
137            ADOLC_PUT_LOCINT(y[i][0].loc());
138        }
139        ADOLC_PUT_LOCINT(nin);
140        ADOLC_PUT_LOCINT(nout);
141        oldTraceFlag = ADOLC_CURRENT_TAPE_INFOS.traceFlag;
142        ADOLC_CURRENT_TAPE_INFOS.traceFlag = 0;
143    } else oldTraceFlag = 0;
144    if (edfct->nestedAdolc) {
145        numVals = ADOLC_GLOBAL_TAPE_VARS.storeSize;
146        vals = new double[numVals];
147        memcpy(vals,ADOLC_GLOBAL_TAPE_VARS.store, numVals*sizeof(double));
148    }
149    update_ext_fct_memory(edfct,nin,nout,insz,outsz);
150    if (oldTraceFlag != 0) {
151        if (edfct->dp_x_changes)
152            for(i=0;i<nin;i++)
153                ADOLC_CURRENT_TAPE_INFOS.numTays_Tape += insz[i];
154        if (edfct->dp_y_priorRequired)
155            for(i=0;i<nout;i++)
156                ADOLC_CURRENT_TAPE_INFOS.numTays_Tape += outsz[i];
157        if (ADOLC_CURRENT_TAPE_INFOS.keepTaylors) {
158            if (edfct->dp_x_changes)
159                for(i=0;i<nin;i++)
160                    for(j=0;j<insz[i];j++)
161                        ADOLC_WRITE_SCAYLOR(x[i][j].getValue());
162            if (edfct->dp_y_priorRequired)
163                for(i=0;i<nout;i++)
164                    for(j=0;j<outsz[i];j++)
165                        ADOLC_WRITE_SCAYLOR(y[i][j].getValue());
166        }
167    }
168
169    for(i=0;i<nin;i++)
170        for(j=0;j<insz[i];j++)
171            edfct->x[i][j] = x[i][j].getValue();
172
173    if (edfct->dp_y_priorRequired)
174        for(i=0;i<nout;i++)
175            for(j=0;j<outsz[i];j++)
176                edfct->y[i][j] = y[i][j].getValue();
177
178    ret=edfct->function(iArrLen,iArr,nin,nout,insz,edfct->x,outsz,edfct->y,edfct->context);
179
180    if (edfct->nestedAdolc) {
181        memcpy(ADOLC_GLOBAL_TAPE_VARS.store, vals, numVals*sizeof(double));
182        delete[] vals;
183        vals = NULL;
184    }
185    if (edfct->dp_x_changes)
186        for(i=0;i<nin;i++)
187            for(j=0;j<insz[i];j++)
188                x[i][j].setValue(edfct->x[i][j]);
189
190    for(i=0;i<nout;i++)
191        for(j=0;j<outsz[i];j++)
192            y[i][j].setValue(edfct->y[i][j]);
193
194    ADOLC_CURRENT_TAPE_INFOS.traceFlag=oldTraceFlag;
195    return ret;
196}
197
198ext_diff_fct_v2 *get_ext_diff_fct_v2( int index ) {
199    return buffer.getElement(index);
200}
Note: See TracBrowser for help on using the repository browser.