source: trunk/Clp/src/CoinAbcHelperFunctions.cpp @ 1910

Last change on this file since 1910 was 1910, checked in by stefan, 7 years ago
  • add configure option --enable-aboca={1,2,3,4,yes,no}
  • compile Aboca source only if --enable-aboca set (instead of compiling empty source files)
  • fix svn properties
  • Property svn:keywords set to Id
File size: 64.9 KB
Line 
1/* $Id: CoinAbcHelperFunctions.cpp 1910 2013-01-27 02:00:13Z stefan $ */
2// Copyright (C) 2003, International Business Machines
3// Corporation and others, Copyright (C) 2012, FasterCoin.  All Rights Reserved.
4// This code is licensed under the terms of the Eclipse Public License (EPL).
5
6#include <cfloat>
7#include <cstdlib>
8#include <cmath>
9#include "CoinHelperFunctions.hpp"
10#include "CoinAbcHelperFunctions.hpp"
11#include "CoinTypes.hpp"
12#include "CoinFinite.hpp"
13#include "CoinAbcCommon.hpp"
14//#define AVX2 1
15#if AVX2==1
16#define set_const_v2df(bb,b) {double * temp=reinterpret_cast<double*>(&bb);temp[0]=b;temp[1]=b;}       
17#endif
18#if INLINE_SCATTER==0
19void 
20CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
21                     const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
22                     const int *  COIN_RESTRICT thisIndex,
23                     CoinFactorizationDouble * COIN_RESTRICT region)
24{
25#if UNROLL_SCATTER==0
26  for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
27    CoinSimplexInt iRow = thisIndex[j];
28    CoinFactorizationDouble regionValue = region[iRow];
29    CoinFactorizationDouble value = thisElement[j];
30    assert (value);
31    region[iRow] = regionValue - value * pivotValue;
32  }
33#elif UNROLL_SCATTER==1
34  if ((number&1)!=0) {
35    number--;
36    CoinSimplexInt iRow = thisIndex[number];
37    CoinFactorizationDouble regionValue = region[iRow];
38    CoinFactorizationDouble value = thisElement[number]; 
39    region[iRow] = regionValue - value * pivotValue;
40  }
41  for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
42    CoinSimplexInt iRow0 = thisIndex[j];
43    CoinSimplexInt iRow1 = thisIndex[j-1];
44    CoinFactorizationDouble regionValue0 = region[iRow0];
45    CoinFactorizationDouble regionValue1 = region[iRow1];
46    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
47    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
48  }
49#elif UNROLL_SCATTER==2
50  if ((number&1)!=0) {
51    number--;
52    CoinSimplexInt iRow = thisIndex[number];
53    CoinFactorizationDouble regionValue = region[iRow];
54    CoinFactorizationDouble value = thisElement[number]; 
55    region[iRow] = regionValue - value * pivotValue;
56  }
57  if ((number&2)!=0) {
58    CoinSimplexInt iRow0 = thisIndex[number-1];
59    CoinFactorizationDouble regionValue0 = region[iRow0];
60    CoinFactorizationDouble value0 = thisElement[number-1]; 
61    CoinSimplexInt iRow1 = thisIndex[number-2];
62    CoinFactorizationDouble regionValue1 = region[iRow1];
63    CoinFactorizationDouble value1 = thisElement[number-2]; 
64    region[iRow0] = regionValue0 - value0 * pivotValue;
65    region[iRow1] = regionValue1 - value1 * pivotValue;
66    number-=2;
67  }
68  for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
69    CoinSimplexInt iRow0 = thisIndex[j];
70    CoinSimplexInt iRow1 = thisIndex[j-1];
71    CoinFactorizationDouble regionValue0 = region[iRow0];
72    CoinFactorizationDouble regionValue1 = region[iRow1];
73    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
74    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
75    CoinSimplexInt iRow2 = thisIndex[j-2];
76    CoinSimplexInt iRow3 = thisIndex[j-3];
77    CoinFactorizationDouble regionValue2 = region[iRow2];
78    CoinFactorizationDouble regionValue3 = region[iRow3];
79    region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
80    region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
81  }
82#elif UNROLL_SCATTER==3
83  CoinSimplexInt iRow0;
84  CoinSimplexInt iRow1;
85  CoinFactorizationDouble regionValue0;
86  CoinFactorizationDouble regionValue1;
87  switch(static_cast<unsigned int>(number)) {
88  case 0:
89    break;
90  case 1:
91    iRow0 = thisIndex[0];
92    regionValue0 = region[iRow0];
93    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
94    break;
95  case 2:
96    iRow0 = thisIndex[0];
97    iRow1 = thisIndex[1];
98    regionValue0 = region[iRow0];
99    regionValue1 = region[iRow1];
100    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
101    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
102    break;
103  case 3:
104    iRow0 = thisIndex[0];
105    iRow1 = thisIndex[1];
106    regionValue0 = region[iRow0];
107    regionValue1 = region[iRow1];
108    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
109    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
110    iRow0 = thisIndex[2];
111    regionValue0 = region[iRow0];
112    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
113    break;
114  case 4:
115    iRow0 = thisIndex[0];
116    iRow1 = thisIndex[1];
117    regionValue0 = region[iRow0];
118    regionValue1 = region[iRow1];
119    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
120    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
121    iRow0 = thisIndex[2];
122    iRow1 = thisIndex[3];
123    regionValue0 = region[iRow0];
124    regionValue1 = region[iRow1];
125    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
126    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
127    break;
128  case 5:
129    iRow0 = thisIndex[0];
130    iRow1 = thisIndex[1];
131    regionValue0 = region[iRow0];
132    regionValue1 = region[iRow1];
133    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
134    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
135    iRow0 = thisIndex[2];
136    iRow1 = thisIndex[3];
137    regionValue0 = region[iRow0];
138    regionValue1 = region[iRow1];
139    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
140    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
141    iRow0 = thisIndex[4];
142    regionValue0 = region[iRow0];
143    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
144    break;
145  case 6:
146    iRow0 = thisIndex[0];
147    iRow1 = thisIndex[1];
148    regionValue0 = region[iRow0];
149    regionValue1 = region[iRow1];
150    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
151    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
152    iRow0 = thisIndex[2];
153    iRow1 = thisIndex[3];
154    regionValue0 = region[iRow0];
155    regionValue1 = region[iRow1];
156    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
157    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
158    iRow0 = thisIndex[4];
159    iRow1 = thisIndex[5];
160    regionValue0 = region[iRow0];
161    regionValue1 = region[iRow1];
162    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
163    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
164    break;
165  case 7:
166    iRow0 = thisIndex[0];
167    iRow1 = thisIndex[1];
168    regionValue0 = region[iRow0];
169    regionValue1 = region[iRow1];
170    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
171    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
172    iRow0 = thisIndex[2];
173    iRow1 = thisIndex[3];
174    regionValue0 = region[iRow0];
175    regionValue1 = region[iRow1];
176    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
177    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
178    iRow0 = thisIndex[4];
179    iRow1 = thisIndex[5];
180    regionValue0 = region[iRow0];
181    regionValue1 = region[iRow1];
182    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
183    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
184    iRow0 = thisIndex[6];
185    regionValue0 = region[iRow0];
186    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
187    break;
188  case 8:
189    iRow0 = thisIndex[0];
190    iRow1 = thisIndex[1];
191    regionValue0 = region[iRow0];
192    regionValue1 = region[iRow1];
193    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
194    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
195    iRow0 = thisIndex[2];
196    iRow1 = thisIndex[3];
197    regionValue0 = region[iRow0];
198    regionValue1 = region[iRow1];
199    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
200    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
201    iRow0 = thisIndex[4];
202    iRow1 = thisIndex[5];
203    regionValue0 = region[iRow0];
204    regionValue1 = region[iRow1];
205    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
206    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
207    iRow0 = thisIndex[6];
208    iRow1 = thisIndex[7];
209    regionValue0 = region[iRow0];
210    regionValue1 = region[iRow1];
211    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
212    region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
213    break;
214  default:
215    if ((number&1)!=0) {
216      number--;
217      CoinSimplexInt iRow = thisIndex[number];
218      CoinFactorizationDouble regionValue = region[iRow];
219      CoinFactorizationDouble value = thisElement[number];
220      region[iRow] = regionValue - value * pivotValue;
221    }
222    for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
223      CoinSimplexInt iRow0 = thisIndex[j];
224      CoinSimplexInt iRow1 = thisIndex[j-1];
225      CoinFactorizationDouble regionValue0 = region[iRow0];
226      CoinFactorizationDouble regionValue1 = region[iRow1];
227      region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
228      region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
229    }
230    break;
231  }
232#endif
233}
234#endif
235#if INLINE_GATHER==0
236CoinFactorizationDouble
237CoinAbcGatherUpdate(CoinSimplexInt number,
238                    const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
239                    const int *  COIN_RESTRICT thisIndex,
240                    CoinFactorizationDouble * COIN_RESTRICT region)
241{
242#if UNROLL_GATHER==0
243  CoinFactorizationDouble pivotValue=0.0;
244  for (CoinBigIndex j = 0; j < number; j ++ ) {
245    CoinFactorizationDouble value = thisElement[j];
246    CoinSimplexInt jRow = thisIndex[j];
247    value *= region[jRow];
248    pivotValue -= value;
249  }
250  return pivotValue;
251#else
252#error code
253#endif
254}
255#endif
256#if INLINE_MULTIPLY_ADD==0
257void 
258CoinAbcMultiplyIndexed(int number,
259                       const CoinFactorizationDouble *  COIN_RESTRICT multiplier,
260                       const int *  COIN_RESTRICT thisIndex,
261                       CoinSimplexDouble * COIN_RESTRICT region)
262{
263#ifndef INTEL_COMPILER
264#pragma simd
265#endif
266#if UNROLL_MULTIPLY_ADD==0
267  for (CoinSimplexInt i = 0; i < number; i ++ ) {
268    CoinSimplexInt iRow = thisIndex[i];
269    CoinSimplexDouble value = region[iRow];
270    value *= multiplier[iRow];
271    region[iRow] = value;
272  }
273#else
274#error code
275#endif
276}
277void 
278CoinAbcMultiplyIndexed(int number,
279                       const long double *  COIN_RESTRICT multiplier,
280                       const int *  COIN_RESTRICT thisIndex,
281                       long double * COIN_RESTRICT region)
282{
283#ifndef INTEL_COMPILER
284#pragma simd
285#endif
286#if UNROLL_MULTIPLY_ADD==0
287  for (CoinSimplexInt i = 0; i < number; i ++ ) {
288    CoinSimplexInt iRow = thisIndex[i];
289    long double value = region[iRow];
290    value *= multiplier[iRow];
291    region[iRow] = value;
292  }
293#else
294#error code
295#endif
296}
297#endif
298double
299CoinAbcMaximumAbsElement(const double * region, int sizeIn)
300{
301  int size=sizeIn;
302     double maxValue = 0.0;
303     //printf("a\n");
304#ifndef INTEL_COMPILER
305#pragma simd
306#endif
307     /*cilk_*/ for (int i = 0; i < size; i++)
308          maxValue = CoinMax(maxValue, fabs(region[i]));
309     return maxValue;
310}
311void 
312CoinAbcMinMaxAbsElement(const double * region, int sizeIn,double & minimum , double & maximum)
313{
314  double minValue=minimum;
315  double maxValue=maximum;
316  int size=sizeIn;
317  //printf("b\n");
318#ifndef INTEL_COMPILER
319#pragma simd
320#endif
321     /*cilk_*/ for (int i=0;i<size;i++) {
322    double value=fabs(region[i]);
323    minValue=CoinMin(value,minValue);
324    maxValue=CoinMax(value,maxValue);
325  }
326  minimum=minValue;
327  maximum=maxValue;
328}
329void 
330CoinAbcMinMaxAbsNormalValues(const double * region, int sizeIn,double & minimum , double & maximum)
331{
332  int size=sizeIn;
333  double minValue=minimum;
334  double maxValue=maximum;
335#define NORMAL_LOW_VALUE 1.0e-8
336#define NORMAL_HIGH_VALUE 1.0e8
337  //printf("c\n");
338#ifndef INTEL_COMPILER
339#pragma simd
340#endif
341     /*cilk_*/ for (int i=0;i<size;i++) {
342    double value=fabs(region[i]);
343    if (value>NORMAL_LOW_VALUE&&value<NORMAL_HIGH_VALUE) {
344      minValue=CoinMin(value,minValue);
345      maxValue=CoinMax(value,maxValue);
346    }
347  }
348  minimum=minValue;
349  maximum=maxValue;
350}
351void 
352CoinAbcScale(double * region, double multiplier,int sizeIn)
353{
354  int size=sizeIn;
355  // used printf("d\n");
356#ifndef INTEL_COMPILER
357#pragma simd
358#endif
359#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
360  cilk_for (int i=0;i<size;i++) {
361    region[i] *= multiplier;
362  }
363}
364void 
365CoinAbcScaleNormalValues(double * region, double multiplier,double killIfLessThanThis,int sizeIn)
366{
367  int size=sizeIn;
368     // used printf("e\n");
369#ifndef INTEL_COMPILER
370#pragma simd
371#endif
372#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
373  cilk_for (int i=0;i<size;i++) {
374    double value=fabs(region[i]);
375    if (value>killIfLessThanThis) {
376      if (value!=COIN_DBL_MAX) {
377        value *= multiplier;
378        if (value>killIfLessThanThis)
379          region[i] *= multiplier;
380        else
381          region[i]=0.0;
382      }
383    } else {
384      region[i]=0.0;
385    }
386  }
387}
388// maximum fabs(region[i]) and then region[i]*=multiplier
389double 
390CoinAbcMaximumAbsElementAndScale(double * region, double multiplier,int sizeIn)
391{
392  double maxValue=0.0;
393  int size=sizeIn;
394  //printf("f\n");
395#ifndef INTEL_COMPILER
396#pragma simd
397#endif
398     /*cilk_*/ for (int i=0;i<size;i++) {
399    double value=fabs(region[i]);
400    region[i] *= multiplier;
401    maxValue=CoinMax(value,maxValue);
402  }
403  return maxValue;
404}
405void
406CoinAbcSetElements(double * region, int sizeIn, double value)
407{
408  int size=sizeIn;
409     printf("g\n");
410#ifndef INTEL_COMPILER
411#pragma simd
412#endif
413#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
414     cilk_for (int i = 0; i < size; i++)
415          region[i] = value;
416}
417void
418CoinAbcMultiplyAdd(const double * region1, int sizeIn, double multiplier1,
419            double * regionChanged, double multiplier2)
420{
421  //printf("h\n");
422  int size=sizeIn;
423     if (multiplier1 == 1.0) {
424          if (multiplier2 == 1.0) {
425#ifndef INTEL_COMPILER
426#pragma simd
427#endif
428               /*cilk_*/ for (int i = 0; i < size; i++)
429                    regionChanged[i] = region1[i] + regionChanged[i];
430          } else if (multiplier2 == -1.0) {
431#ifndef INTEL_COMPILER
432#pragma simd
433#endif
434               /*cilk_*/ for (int i = 0; i < size; i++)
435                    regionChanged[i] = region1[i] - regionChanged[i];
436          } else if (multiplier2 == 0.0) {
437#ifndef INTEL_COMPILER
438#pragma simd
439#endif
440               /*cilk_*/ for (int i = 0; i < size; i++)
441                    regionChanged[i] = region1[i] ;
442          } else {
443#ifndef INTEL_COMPILER
444#pragma simd
445#endif
446               /*cilk_*/ for (int i = 0; i < size; i++)
447                    regionChanged[i] = region1[i] + multiplier2 * regionChanged[i];
448          }
449     } else if (multiplier1 == -1.0) {
450          if (multiplier2 == 1.0) {
451#ifndef INTEL_COMPILER
452#pragma simd
453#endif
454               /*cilk_*/ for (int i = 0; i < size; i++)
455                    regionChanged[i] = -region1[i] + regionChanged[i];
456          } else if (multiplier2 == -1.0) {
457#ifndef INTEL_COMPILER
458#pragma simd
459#endif
460               /*cilk_*/ for (int i = 0; i < size; i++)
461                    regionChanged[i] = -region1[i] - regionChanged[i];
462          } else if (multiplier2 == 0.0) {
463#ifndef INTEL_COMPILER
464#pragma simd
465#endif
466               /*cilk_*/ for (int i = 0; i < size; i++)
467                    regionChanged[i] = -region1[i] ;
468          } else {
469#ifndef INTEL_COMPILER
470#pragma simd
471#endif
472               /*cilk_*/ for (int i = 0; i < size; i++)
473                    regionChanged[i] = -region1[i] + multiplier2 * regionChanged[i];
474          }
475     } else if (multiplier1 == 0.0) {
476          if (multiplier2 == 1.0) {
477               // nothing to do
478          } else if (multiplier2 == -1.0) {
479#ifndef INTEL_COMPILER
480#pragma simd
481#endif
482               /*cilk_*/ for (int i = 0; i < size; i++)
483                    regionChanged[i] =  -regionChanged[i];
484          } else if (multiplier2 == 0.0) {
485#ifndef INTEL_COMPILER
486#pragma simd
487#endif
488               /*cilk_*/ for (int i = 0; i < size; i++)
489                    regionChanged[i] =  0.0;
490          } else {
491#ifndef INTEL_COMPILER
492#pragma simd
493#endif
494               /*cilk_*/ for (int i = 0; i < size; i++)
495                    regionChanged[i] =  multiplier2 * regionChanged[i];
496          }
497     } else {
498          if (multiplier2 == 1.0) {
499#ifndef INTEL_COMPILER
500#pragma simd
501#endif
502               /*cilk_*/ for (int i = 0; i < size; i++)
503                    regionChanged[i] = multiplier1 * region1[i] + regionChanged[i];
504          } else if (multiplier2 == -1.0) {
505#ifndef INTEL_COMPILER
506#pragma simd
507#endif
508               /*cilk_*/ for (int i = 0; i < size; i++)
509                    regionChanged[i] = multiplier1 * region1[i] - regionChanged[i];
510          } else if (multiplier2 == 0.0) {
511#ifndef INTEL_COMPILER
512#pragma simd
513#endif
514               /*cilk_*/ for (int i = 0; i < size; i++)
515                    regionChanged[i] = multiplier1 * region1[i] ;
516          } else {
517#ifndef INTEL_COMPILER
518#pragma simd
519#endif
520               /*cilk_*/ for (int i = 0; i < size; i++)
521                    regionChanged[i] = multiplier1 * region1[i] + multiplier2 * regionChanged[i];
522          }
523     }
524}
525double
526CoinAbcInnerProduct(const double * region1, int sizeIn, const double * region2)
527{
528  int size=sizeIn;
529  //printf("i\n");
530     double value = 0.0;
531#ifndef INTEL_COMPILER
532#pragma simd
533#endif
534     /*cilk_*/ for (int i = 0; i < size; i++)
535          value += region1[i] * region2[i];
536     return value;
537}
538void
539CoinAbcGetNorms(const double * region, int sizeIn, double & norm1, double & norm2)
540{
541  int size=sizeIn;
542  //printf("j\n");
543     norm1 = 0.0;
544     norm2 = 0.0;
545#ifndef INTEL_COMPILER
546#pragma simd
547#endif
548     /*cilk_*/ for (int i = 0; i < size; i++) {
549          norm2 += region[i] * region[i];
550          norm1 = CoinMax(norm1, fabs(region[i]));
551     }
552}
553// regionTo[index[i]]=regionFrom[i]
554void 
555CoinAbcScatterTo(const double * regionFrom, double * regionTo, const int * index,int numberIn)
556{
557  int number=numberIn;
558     // used printf("k\n");
559#ifndef INTEL_COMPILER
560#pragma simd
561#endif
562#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
563  cilk_for (int i=0;i<number;i++) {
564    int k=index[i];
565    regionTo[k]=regionFrom[i];
566  }
567}
568// regionTo[i]=regionFrom[index[i]]
569void 
570CoinAbcGatherFrom(const double * regionFrom, double * regionTo, const int * index,int numberIn)
571{
572  int number=numberIn;
573     // used printf("l\n");
574#ifndef INTEL_COMPILER
575#pragma simd
576#endif
577#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
578  cilk_for (int i=0;i<number;i++) {
579    int k=index[i];
580    regionTo[i]=regionFrom[k];
581  }
582}
583// regionTo[index[i]]=0.0
584void 
585CoinAbcScatterZeroTo(double * regionTo, const int * index,int numberIn)
586{
587  int number=numberIn;
588     // used printf("m\n");
589#ifndef INTEL_COMPILER
590#pragma simd
591#endif
592#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
593  cilk_for (int i=0;i<number;i++) {
594    int k=index[i];
595    regionTo[k]=0.0;
596  }
597}
598// regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
599void 
600CoinAbcScatterToList(const double * regionFrom, double * regionTo, 
601                   const int * indexList, const int * indexScatter ,int numberIn)
602{
603  int number=numberIn;
604  //printf("n\n");
605#ifndef INTEL_COMPILER
606#pragma simd
607#endif
608     /*cilk_*/ for (int i=0;i<number;i++) {
609    int j=indexList[i];
610    double value=regionFrom[j];
611    int k=indexScatter[j];
612    regionTo[k]=value;
613  }
614}
615void 
616CoinAbcInverseSqrts(double * array, int nIn)
617{
618  int n=nIn;
619     // used printf("o\n");
620#ifndef INTEL_COMPILER
621#pragma simd
622#endif
623#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
624  cilk_for (int i = 0; i < n; i++)
625    array[i] = 1.0 / sqrt(array[i]);
626}
627void 
628CoinAbcReciprocal(double * array, int nIn, const double *input)
629{
630  int n=nIn;
631     // used printf("p\n");
632#ifndef INTEL_COMPILER
633#pragma simd
634#endif
635#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
636  cilk_for (int i = 0; i < n; i++)
637    array[i] = 1.0 / input[i];
638}
639void 
640CoinAbcMemcpyLong(double * array,const double * arrayFrom,int size)
641{
642  memcpy(array,arrayFrom,size*sizeof(double));
643}
644void 
645CoinAbcMemcpyLong(int * array,const int * arrayFrom,int size)
646{
647  memcpy(array,arrayFrom,size*sizeof(int));
648}
649void 
650CoinAbcMemcpyLong(unsigned char * array,const unsigned char * arrayFrom,int size)
651{
652  memcpy(array,arrayFrom,size*sizeof(unsigned char));
653}
654void 
655CoinAbcMemset0Long(double * array,int size)
656{
657  memset(array,0,size*sizeof(double));
658}
659void 
660CoinAbcMemset0Long(int * array,int size)
661{
662  memset(array,0,size*sizeof(int));
663}
664void 
665CoinAbcMemset0Long(unsigned char * array,int size)
666{
667  memset(array,0,size*sizeof(unsigned char));
668}
669void 
670CoinAbcMemmove(double * array,const double * arrayFrom,int size)
671{
672  memmove(array,arrayFrom,size*sizeof(double));
673}
674void CoinAbcMemmove(int * array,const int * arrayFrom,int size)
675{
676  memmove(array,arrayFrom,size*sizeof(int));
677}
678void CoinAbcMemmove(unsigned char * array,const unsigned char * arrayFrom,int size)
679{
680  memmove(array,arrayFrom,size*sizeof(unsigned char));
681}
682// This moves down and zeroes out end
683void CoinAbcMemmoveAndZero(double * array,double * arrayFrom,int size)
684{
685  assert(arrayFrom-array>=0);
686  memmove(array,arrayFrom,size*sizeof(double));
687  double * endArray = array+size;
688  double * endArrayFrom = arrayFrom+size;
689  double * startZero = CoinMax(arrayFrom,endArray);
690  memset(startZero,0,(endArrayFrom-startZero)*sizeof(double));
691}
692// This compacts several sections and zeroes out end
693int CoinAbcCompact(int numberSections,int alreadyDone,double * array,const int * starts, const int * lengths)
694{
695  int totalLength=alreadyDone;
696  for (int i=0;i<numberSections;i++) {
697    totalLength += lengths[i];
698  }
699  for (int i=0;i<numberSections;i++) {
700    int start=starts[i];
701    int length=lengths[i];
702    int end=start+length;
703    memmove(array+alreadyDone,array+start,length*sizeof(double));
704    if (end>totalLength) {
705      start=CoinMax(start,totalLength);
706      memset(array+start,0,(end-start)*sizeof(double));
707    }
708    alreadyDone += length;
709  }
710  return totalLength;
711} 
712// This compacts several sections
713int CoinAbcCompact(int numberSections,int alreadyDone,int * array,const int * starts, const int * lengths)
714{
715  for (int i=0;i<numberSections;i++) {
716    memmove(array+alreadyDone,array+starts[i],lengths[i]*sizeof(int));
717    alreadyDone += lengths[i];
718  }
719  return alreadyDone;
720}
721void CoinAbcScatterUpdate0(int number,CoinFactorizationDouble /*pivotValue*/,
722                           const CoinFactorizationDouble *  COIN_RESTRICT /*thisElement*/,
723                           const int *  COIN_RESTRICT /*thisIndex*/,
724                           CoinFactorizationDouble *  COIN_RESTRICT /*region*/)
725{
726  assert (number==0);
727}
728void CoinAbcScatterUpdate1(int number,CoinFactorizationDouble pivotValue,
729                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
730                          const int *  COIN_RESTRICT thisIndex,
731                          CoinFactorizationDouble * COIN_RESTRICT region)
732{
733  assert (number==1);
734  int iRow0 = thisIndex[0];
735  CoinFactorizationDouble regionValue0 = region[iRow0];
736  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
737}
738void CoinAbcScatterUpdate2(int number,CoinFactorizationDouble pivotValue,
739                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
740                          const int *  COIN_RESTRICT thisIndex,
741                          CoinFactorizationDouble * COIN_RESTRICT region)
742{
743  assert (number==2);
744  int iRow0 = thisIndex[0];
745  int iRow1 = thisIndex[1];
746  CoinFactorizationDouble regionValue0 = region[iRow0];
747  CoinFactorizationDouble regionValue1 = region[iRow1];
748  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
749  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
750}
751void CoinAbcScatterUpdate3(int number,CoinFactorizationDouble pivotValue,
752                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
753                          const int *  COIN_RESTRICT thisIndex,
754                          CoinFactorizationDouble * COIN_RESTRICT region)
755{
756  assert (number==3);
757  int iRow0 = thisIndex[0];
758  int iRow1 = thisIndex[1];
759  CoinFactorizationDouble regionValue0 = region[iRow0];
760  CoinFactorizationDouble regionValue1 = region[iRow1];
761  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
762  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
763  iRow0 = thisIndex[2];
764  regionValue0 = region[iRow0];
765  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
766}
767void CoinAbcScatterUpdate4(int number,CoinFactorizationDouble pivotValue,
768                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
769                          const int *  COIN_RESTRICT thisIndex,
770                          CoinFactorizationDouble * COIN_RESTRICT region)
771{
772  assert (number==4);
773  int iRow0 = thisIndex[0];
774  int iRow1 = thisIndex[1];
775  CoinFactorizationDouble regionValue0 = region[iRow0];
776  CoinFactorizationDouble regionValue1 = region[iRow1];
777  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
778  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
779  iRow0 = thisIndex[2];
780  iRow1 = thisIndex[3];
781  regionValue0 = region[iRow0];
782  regionValue1 = region[iRow1];
783  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
784  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
785}
786void CoinAbcScatterUpdate5(int number,CoinFactorizationDouble pivotValue,
787                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
788                          const int *  COIN_RESTRICT thisIndex,
789                          CoinFactorizationDouble * COIN_RESTRICT region)
790{
791  assert (number==5);
792  int iRow0 = thisIndex[0];
793  int iRow1 = thisIndex[1];
794  CoinFactorizationDouble regionValue0 = region[iRow0];
795  CoinFactorizationDouble regionValue1 = region[iRow1];
796  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
797  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
798  iRow0 = thisIndex[2];
799  iRow1 = thisIndex[3];
800  regionValue0 = region[iRow0];
801  regionValue1 = region[iRow1];
802  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
803  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
804  iRow0 = thisIndex[4];
805  regionValue0 = region[iRow0];
806  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
807}
808void CoinAbcScatterUpdate6(int number,CoinFactorizationDouble pivotValue,
809                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
810                          const int *  COIN_RESTRICT thisIndex,
811                          CoinFactorizationDouble * COIN_RESTRICT region)
812{
813  assert (number==6);
814  int iRow0 = thisIndex[0];
815  int iRow1 = thisIndex[1];
816  CoinFactorizationDouble regionValue0 = region[iRow0];
817  CoinFactorizationDouble regionValue1 = region[iRow1];
818  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
819  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
820  iRow0 = thisIndex[2];
821  iRow1 = thisIndex[3];
822  regionValue0 = region[iRow0];
823  regionValue1 = region[iRow1];
824  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
825  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
826  iRow0 = thisIndex[4];
827  iRow1 = thisIndex[5];
828  regionValue0 = region[iRow0];
829  regionValue1 = region[iRow1];
830  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
831  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
832}
833void CoinAbcScatterUpdate7(int number,CoinFactorizationDouble pivotValue,
834                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
835                          const int *  COIN_RESTRICT thisIndex,
836                          CoinFactorizationDouble * COIN_RESTRICT region)
837{
838  assert (number==7);
839  int iRow0 = thisIndex[0];
840  int iRow1 = thisIndex[1];
841  CoinFactorizationDouble regionValue0 = region[iRow0];
842  CoinFactorizationDouble regionValue1 = region[iRow1];
843  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
844  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
845  iRow0 = thisIndex[2];
846  iRow1 = thisIndex[3];
847  regionValue0 = region[iRow0];
848  regionValue1 = region[iRow1];
849  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
850  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
851  iRow0 = thisIndex[4];
852  iRow1 = thisIndex[5];
853  regionValue0 = region[iRow0];
854  regionValue1 = region[iRow1];
855  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
856  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
857  iRow0 = thisIndex[6];
858  regionValue0 = region[iRow0];
859  region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
860}
861void CoinAbcScatterUpdate8(int number,CoinFactorizationDouble pivotValue,
862                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
863                          const int *  COIN_RESTRICT thisIndex,
864                          CoinFactorizationDouble * COIN_RESTRICT region)
865{
866  assert (number==8);
867  int iRow0 = thisIndex[0];
868  int iRow1 = thisIndex[1];
869  CoinFactorizationDouble regionValue0 = region[iRow0];
870  CoinFactorizationDouble regionValue1 = region[iRow1];
871  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
872  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
873  iRow0 = thisIndex[2];
874  iRow1 = thisIndex[3];
875  regionValue0 = region[iRow0];
876  regionValue1 = region[iRow1];
877  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
878  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
879  iRow0 = thisIndex[4];
880  iRow1 = thisIndex[5];
881  regionValue0 = region[iRow0];
882  regionValue1 = region[iRow1];
883  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
884  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
885  iRow0 = thisIndex[6];
886  iRow1 = thisIndex[7];
887  regionValue0 = region[iRow0];
888  regionValue1 = region[iRow1];
889  region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
890  region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
891}
892void CoinAbcScatterUpdate4N(int number,CoinFactorizationDouble pivotValue,
893                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
894                          const int *  COIN_RESTRICT thisIndex,
895                          CoinFactorizationDouble * COIN_RESTRICT region)
896{
897  assert ((number&3)==0);
898  for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
899    CoinSimplexInt iRow0 = thisIndex[j];
900    CoinSimplexInt iRow1 = thisIndex[j-1];
901    CoinFactorizationDouble regionValue0 = region[iRow0];
902    CoinFactorizationDouble regionValue1 = region[iRow1];
903    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
904    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
905    iRow0 = thisIndex[j-2];
906    iRow1 = thisIndex[j-3];
907    regionValue0 = region[iRow0];
908    regionValue1 = region[iRow1];
909    region[iRow0] = regionValue0 - thisElement[j-2] * pivotValue;
910    region[iRow1] = regionValue1 - thisElement[j-3] * pivotValue;
911  }
912}
913void CoinAbcScatterUpdate4NPlus1(int number,CoinFactorizationDouble pivotValue,
914                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
915                          const int *  COIN_RESTRICT thisIndex,
916                          CoinFactorizationDouble * COIN_RESTRICT region)
917{
918  assert ((number&3)==1);
919  int iRow0 = thisIndex[number-1];
920  CoinFactorizationDouble regionValue0 = region[iRow0];
921  region[iRow0] = regionValue0 - thisElement[number-1] * pivotValue;
922  number -= 1;
923  for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
924    CoinSimplexInt iRow0 = thisIndex[j];
925    CoinSimplexInt iRow1 = thisIndex[j-1];
926    CoinFactorizationDouble regionValue0 = region[iRow0];
927    CoinFactorizationDouble regionValue1 = region[iRow1];
928    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
929    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
930    iRow0 = thisIndex[j-2];
931    iRow1 = thisIndex[j-3];
932    regionValue0 = region[iRow0];
933    regionValue1 = region[iRow1];
934    region[iRow0] = regionValue0 - thisElement[j-2] * pivotValue;
935    region[iRow1] = regionValue1 - thisElement[j-3] * pivotValue;
936  }
937}
938void CoinAbcScatterUpdate4NPlus2(int number,CoinFactorizationDouble pivotValue,
939                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
940                          const int *  COIN_RESTRICT thisIndex,
941                          CoinFactorizationDouble * COIN_RESTRICT region)
942{
943  assert ((number&3)==2);
944  int iRow0 = thisIndex[number-1];
945  int iRow1 = thisIndex[number-2];
946  CoinFactorizationDouble regionValue0 = region[iRow0];
947  CoinFactorizationDouble regionValue1 = region[iRow1];
948  region[iRow0] = regionValue0 - thisElement[number-1] * pivotValue;
949  region[iRow1] = regionValue1 - thisElement[number-2] * pivotValue;
950  number -= 2;
951  for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
952    CoinSimplexInt iRow0 = thisIndex[j];
953    CoinSimplexInt iRow1 = thisIndex[j-1];
954    CoinFactorizationDouble regionValue0 = region[iRow0];
955    CoinFactorizationDouble regionValue1 = region[iRow1];
956    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
957    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
958    iRow0 = thisIndex[j-2];
959    iRow1 = thisIndex[j-3];
960    regionValue0 = region[iRow0];
961    regionValue1 = region[iRow1];
962    region[iRow0] = regionValue0 - thisElement[j-2] * pivotValue;
963    region[iRow1] = regionValue1 - thisElement[j-3] * pivotValue;
964  }
965}
966void CoinAbcScatterUpdate4NPlus3(int number,CoinFactorizationDouble pivotValue,
967                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
968                          const int *  COIN_RESTRICT thisIndex,
969                          CoinFactorizationDouble * COIN_RESTRICT region)
970{
971  assert ((number&3)==3);
972  int iRow0 = thisIndex[number-1];
973  int iRow1 = thisIndex[number-2];
974  CoinFactorizationDouble regionValue0 = region[iRow0];
975  CoinFactorizationDouble regionValue1 = region[iRow1];
976  region[iRow0] = regionValue0 - thisElement[number-1] * pivotValue;
977  region[iRow1] = regionValue1 - thisElement[number-2] * pivotValue;
978  iRow0 = thisIndex[number-3];
979  regionValue0 = region[iRow0];
980  region[iRow0] = regionValue0 - thisElement[number-3] * pivotValue;
981  number -= 3;
982  for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
983    CoinSimplexInt iRow0 = thisIndex[j];
984    CoinSimplexInt iRow1 = thisIndex[j-1];
985    CoinFactorizationDouble regionValue0 = region[iRow0];
986    CoinFactorizationDouble regionValue1 = region[iRow1];
987    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
988    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
989    iRow0 = thisIndex[j-2];
990    iRow1 = thisIndex[j-3];
991    regionValue0 = region[iRow0];
992    regionValue1 = region[iRow1];
993    region[iRow0] = regionValue0 - thisElement[j-2] * pivotValue;
994    region[iRow1] = regionValue1 - thisElement[j-3] * pivotValue;
995  }
996}
997void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble /*multiplier*/,
998                           const CoinFactorizationDouble *  COIN_RESTRICT element,
999                           CoinFactorizationDouble *  COIN_RESTRICT /*region*/)
1000{
1001#ifndef NDEBUG
1002  assert (numberIn==0);
1003#endif
1004}
1005// start alternatives
1006#if 0
1007void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
1008                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1009                          CoinFactorizationDouble * COIN_RESTRICT region)
1010{
1011#ifndef NDEBUG
1012  assert (numberIn==1);
1013#endif
1014  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1015  element++;
1016  int iColumn0=thisColumn[0];
1017  CoinFactorizationDouble value0=region[iColumn0];
1018  value0 += multiplier*element[0];
1019  region[iColumn0]=value0;
1020}
1021void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
1022                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1023                          CoinFactorizationDouble * COIN_RESTRICT region)
1024{
1025#ifndef NDEBUG
1026  assert (numberIn==2);
1027#endif
1028  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1029#if NEW_CHUNK_SIZE==2
1030  int nFull=2&(~(NEW_CHUNK_SIZE-1));
1031  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1032    coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1033    int iColumn0=thisColumn[0];
1034    int iColumn1=thisColumn[1];
1035    CoinFactorizationDouble value0=region[iColumn0];
1036    CoinFactorizationDouble value1=region[iColumn1];
1037    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1038    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1039    region[iColumn0]=value0;
1040    region[iColumn1]=value1;
1041    element+=NEW_CHUNK_SIZE_INCREMENT;
1042    thisColumn = reinterpret_cast<const int *>(element);
1043  }
1044#endif
1045#if NEW_CHUNK_SIZE==4
1046  element++;
1047  int iColumn0=thisColumn[0];
1048  int iColumn1=thisColumn[1];
1049  CoinFactorizationDouble value0=region[iColumn0];
1050  CoinFactorizationDouble value1=region[iColumn1];
1051  value0 += multiplier*element[0];
1052  value1 += multiplier*element[1];
1053  region[iColumn0]=value0;
1054  region[iColumn1]=value1;
1055#endif
1056}
1057void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
1058                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1059                          CoinFactorizationDouble * COIN_RESTRICT region)
1060{
1061#ifndef NDEBUG
1062  assert (numberIn==3);
1063#endif
1064  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1065#if NEW_CHUNK_SIZE==2
1066  int nFull=3&(~(NEW_CHUNK_SIZE-1));
1067  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1068    coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1069    int iColumn0=thisColumn[0];
1070    int iColumn1=thisColumn[1];
1071    CoinFactorizationDouble value0=region[iColumn0];
1072    CoinFactorizationDouble value1=region[iColumn1];
1073    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1074    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1075    region[iColumn0]=value0;
1076    region[iColumn1]=value1;
1077    element+=NEW_CHUNK_SIZE_INCREMENT;
1078    thisColumn = reinterpret_cast<const int *>(element);
1079  }
1080#endif
1081#if NEW_CHUNK_SIZE==2
1082  element++;
1083  int iColumn0=thisColumn[0];
1084  CoinFactorizationDouble value0=region[iColumn0];
1085  value0 += multiplier*element[0];
1086  region[iColumn0]=value0;
1087#else
1088  element+=2;
1089  int iColumn0=thisColumn[0];
1090  int iColumn1=thisColumn[1];
1091  int iColumn2=thisColumn[2];
1092  CoinFactorizationDouble value0=region[iColumn0];
1093  CoinFactorizationDouble value1=region[iColumn1];
1094  CoinFactorizationDouble value2=region[iColumn2];
1095  value0 += multiplier*element[0];
1096  value1 += multiplier*element[1];
1097  value2 += multiplier*element[2];
1098  region[iColumn0]=value0;
1099  region[iColumn1]=value1;
1100  region[iColumn2]=value2;
1101#endif
1102}
1103void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
1104                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1105                          CoinFactorizationDouble * COIN_RESTRICT region)
1106{
1107#ifndef NDEBUG
1108  assert (numberIn==4);
1109#endif
1110  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1111  int nFull=4&(~(NEW_CHUNK_SIZE-1));
1112  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1113    //coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1114#if NEW_CHUNK_SIZE==2
1115    int iColumn0=thisColumn[0];
1116    int iColumn1=thisColumn[1];
1117    CoinFactorizationDouble value0=region[iColumn0];
1118    CoinFactorizationDouble value1=region[iColumn1];
1119    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1120    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1121    region[iColumn0]=value0;
1122    region[iColumn1]=value1;
1123#elif NEW_CHUNK_SIZE==4
1124    int iColumn0=thisColumn[0];
1125    int iColumn1=thisColumn[1];
1126    int iColumn2=thisColumn[2];
1127    int iColumn3=thisColumn[3];
1128    CoinFactorizationDouble value0=region[iColumn0];
1129    CoinFactorizationDouble value1=region[iColumn1];
1130    CoinFactorizationDouble value2=region[iColumn2];
1131    CoinFactorizationDouble value3=region[iColumn3];
1132    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1133    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1134    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1135    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1136    region[iColumn0]=value0;
1137    region[iColumn1]=value1;
1138    region[iColumn2]=value2;
1139    region[iColumn3]=value3;
1140#else
1141    abort();
1142#endif
1143    element+=NEW_CHUNK_SIZE_INCREMENT;
1144    thisColumn = reinterpret_cast<const int *>(element);
1145  }
1146}
1147void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
1148                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1149                          CoinFactorizationDouble * COIN_RESTRICT region)
1150{
1151#ifndef NDEBUG
1152  assert (numberIn==5);
1153#endif
1154  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1155  int nFull=5&(~(NEW_CHUNK_SIZE-1));
1156  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1157    coin_prefetch_const(element+6);
1158#if NEW_CHUNK_SIZE==2
1159    int iColumn0=thisColumn[0];
1160    int iColumn1=thisColumn[1];
1161    CoinFactorizationDouble value0=region[iColumn0];
1162    CoinFactorizationDouble value1=region[iColumn1];
1163    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1164    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1165    region[iColumn0]=value0;
1166    region[iColumn1]=value1;
1167#elif NEW_CHUNK_SIZE==4
1168    int iColumn0=thisColumn[0];
1169    int iColumn1=thisColumn[1];
1170    int iColumn2=thisColumn[2];
1171    int iColumn3=thisColumn[3];
1172    CoinFactorizationDouble value0=region[iColumn0];
1173    CoinFactorizationDouble value1=region[iColumn1];
1174    CoinFactorizationDouble value2=region[iColumn2];
1175    CoinFactorizationDouble value3=region[iColumn3];
1176    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1177    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1178    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1179    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1180    region[iColumn0]=value0;
1181    region[iColumn1]=value1;
1182    region[iColumn2]=value2;
1183    region[iColumn3]=value3;
1184#else
1185    abort();
1186#endif
1187    element+=NEW_CHUNK_SIZE_INCREMENT;
1188    thisColumn = reinterpret_cast<const int *>(element);
1189  }
1190  element++;
1191  int iColumn0=thisColumn[0];
1192  CoinFactorizationDouble value0=region[iColumn0];
1193  value0 += multiplier*element[0];
1194  region[iColumn0]=value0;
1195}
1196void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
1197                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1198                          CoinFactorizationDouble * COIN_RESTRICT region)
1199{
1200#ifndef NDEBUG
1201  assert (numberIn==6);
1202#endif
1203  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1204  int nFull=6&(~(NEW_CHUNK_SIZE-1));
1205  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1206    coin_prefetch_const(element+6);
1207#if NEW_CHUNK_SIZE==2
1208    int iColumn0=thisColumn[0];
1209    int iColumn1=thisColumn[1];
1210    CoinFactorizationDouble value0=region[iColumn0];
1211    CoinFactorizationDouble value1=region[iColumn1];
1212    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1213    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1214    region[iColumn0]=value0;
1215    region[iColumn1]=value1;
1216#elif NEW_CHUNK_SIZE==4
1217    int iColumn0=thisColumn[0];
1218    int iColumn1=thisColumn[1];
1219    int iColumn2=thisColumn[2];
1220    int iColumn3=thisColumn[3];
1221    CoinFactorizationDouble value0=region[iColumn0];
1222    CoinFactorizationDouble value1=region[iColumn1];
1223    CoinFactorizationDouble value2=region[iColumn2];
1224    CoinFactorizationDouble value3=region[iColumn3];
1225    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1226    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1227    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1228    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1229    region[iColumn0]=value0;
1230    region[iColumn1]=value1;
1231    region[iColumn2]=value2;
1232    region[iColumn3]=value3;
1233#else
1234    abort();
1235#endif
1236    element+=NEW_CHUNK_SIZE_INCREMENT;
1237    thisColumn = reinterpret_cast<const int *>(element);
1238  }
1239#if NEW_CHUNK_SIZE==4
1240  element++;
1241  int iColumn0=thisColumn[0];
1242  int iColumn1=thisColumn[1];
1243  CoinFactorizationDouble value0=region[iColumn0];
1244  CoinFactorizationDouble value1=region[iColumn1];
1245  value0 += multiplier*element[0];
1246  value1 += multiplier*element[1];
1247  region[iColumn0]=value0;
1248  region[iColumn1]=value1;
1249#endif
1250}
1251void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
1252                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1253                          CoinFactorizationDouble * COIN_RESTRICT region)
1254{
1255#ifndef NDEBUG
1256  assert (numberIn==7);
1257#endif
1258  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1259  int nFull=7&(~(NEW_CHUNK_SIZE-1));
1260  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1261    coin_prefetch_const(element+6);
1262#if NEW_CHUNK_SIZE==2
1263    int iColumn0=thisColumn[0];
1264    int iColumn1=thisColumn[1];
1265    CoinFactorizationDouble value0=region[iColumn0];
1266    CoinFactorizationDouble value1=region[iColumn1];
1267    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1268    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1269    region[iColumn0]=value0;
1270    region[iColumn1]=value1;
1271#elif NEW_CHUNK_SIZE==4
1272    int iColumn0=thisColumn[0];
1273    int iColumn1=thisColumn[1];
1274    int iColumn2=thisColumn[2];
1275    int iColumn3=thisColumn[3];
1276    CoinFactorizationDouble value0=region[iColumn0];
1277    CoinFactorizationDouble value1=region[iColumn1];
1278    CoinFactorizationDouble value2=region[iColumn2];
1279    CoinFactorizationDouble value3=region[iColumn3];
1280    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1281    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1282    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1283    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1284    region[iColumn0]=value0;
1285    region[iColumn1]=value1;
1286    region[iColumn2]=value2;
1287    region[iColumn3]=value3;
1288#else
1289    abort();
1290#endif
1291    element+=NEW_CHUNK_SIZE_INCREMENT;
1292    thisColumn = reinterpret_cast<const int *>(element);
1293  }
1294#if NEW_CHUNK_SIZE==2
1295  element++;
1296  int iColumn0=thisColumn[0];
1297  CoinFactorizationDouble value0=region[iColumn0];
1298  value0 += multiplier*element[0];
1299  region[iColumn0]=value0;
1300#else
1301  element+=2;
1302  int iColumn0=thisColumn[0];
1303  int iColumn1=thisColumn[1];
1304  int iColumn2=thisColumn[2];
1305  CoinFactorizationDouble value0=region[iColumn0];
1306  CoinFactorizationDouble value1=region[iColumn1];
1307  CoinFactorizationDouble value2=region[iColumn2];
1308  value0 += multiplier*element[0];
1309  value1 += multiplier*element[1];
1310  value2 += multiplier*element[2];
1311  region[iColumn0]=value0;
1312  region[iColumn1]=value1;
1313  region[iColumn2]=value2;
1314#endif
1315}
1316void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
1317                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1318                          CoinFactorizationDouble * COIN_RESTRICT region)
1319{
1320#ifndef NDEBUG
1321  assert (numberIn==8);
1322#endif
1323  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1324  int nFull=8&(~(NEW_CHUNK_SIZE-1));
1325  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1326    coin_prefetch_const(element+6);
1327#if NEW_CHUNK_SIZE==2
1328    int iColumn0=thisColumn[0];
1329    int iColumn1=thisColumn[1];
1330    CoinFactorizationDouble value0=region[iColumn0];
1331    CoinFactorizationDouble value1=region[iColumn1];
1332    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1333    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1334    region[iColumn0]=value0;
1335    region[iColumn1]=value1;
1336#elif NEW_CHUNK_SIZE==4
1337    int iColumn0=thisColumn[0];
1338    int iColumn1=thisColumn[1];
1339    int iColumn2=thisColumn[2];
1340    int iColumn3=thisColumn[3];
1341    CoinFactorizationDouble value0=region[iColumn0];
1342    CoinFactorizationDouble value1=region[iColumn1];
1343    CoinFactorizationDouble value2=region[iColumn2];
1344    CoinFactorizationDouble value3=region[iColumn3];
1345    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1346    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1347    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1348    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1349    region[iColumn0]=value0;
1350    region[iColumn1]=value1;
1351    region[iColumn2]=value2;
1352    region[iColumn3]=value3;
1353#else
1354    abort();
1355#endif
1356    element+=NEW_CHUNK_SIZE_INCREMENT;
1357    thisColumn = reinterpret_cast<const int *>(element);
1358  }
1359}
1360void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
1361                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1362                                 CoinFactorizationDouble * COIN_RESTRICT region)
1363{
1364  assert ((numberIn&3)==0);
1365  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1366  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1367  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1368    coin_prefetch_const(element+6);
1369#if NEW_CHUNK_SIZE==2
1370    int iColumn0=thisColumn[0];
1371    int iColumn1=thisColumn[1];
1372    CoinFactorizationDouble value0=region[iColumn0];
1373    CoinFactorizationDouble value1=region[iColumn1];
1374    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1375    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1376    region[iColumn0]=value0;
1377    region[iColumn1]=value1;
1378#elif NEW_CHUNK_SIZE==4
1379    int iColumn0=thisColumn[0];
1380    int iColumn1=thisColumn[1];
1381    int iColumn2=thisColumn[2];
1382    int iColumn3=thisColumn[3];
1383    CoinFactorizationDouble value0=region[iColumn0];
1384    CoinFactorizationDouble value1=region[iColumn1];
1385    CoinFactorizationDouble value2=region[iColumn2];
1386    CoinFactorizationDouble value3=region[iColumn3];
1387    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1388    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1389    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1390    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1391    region[iColumn0]=value0;
1392    region[iColumn1]=value1;
1393    region[iColumn2]=value2;
1394    region[iColumn3]=value3;
1395#else
1396    abort();
1397#endif
1398    element+=NEW_CHUNK_SIZE_INCREMENT;
1399    thisColumn = reinterpret_cast<const int *>(element);
1400  }
1401}
1402void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
1403                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1404                                 CoinFactorizationDouble * COIN_RESTRICT region)
1405{
1406  assert ((numberIn&3)==1);
1407  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1408  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1409  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1410    coin_prefetch_const(element+6);
1411#if NEW_CHUNK_SIZE==2
1412    int iColumn0=thisColumn[0];
1413    int iColumn1=thisColumn[1];
1414    CoinFactorizationDouble value0=region[iColumn0];
1415    CoinFactorizationDouble value1=region[iColumn1];
1416    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1417    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1418    region[iColumn0]=value0;
1419    region[iColumn1]=value1;
1420#elif NEW_CHUNK_SIZE==4
1421    int iColumn0=thisColumn[0];
1422    int iColumn1=thisColumn[1];
1423    int iColumn2=thisColumn[2];
1424    int iColumn3=thisColumn[3];
1425    CoinFactorizationDouble value0=region[iColumn0];
1426    CoinFactorizationDouble value1=region[iColumn1];
1427    CoinFactorizationDouble value2=region[iColumn2];
1428    CoinFactorizationDouble value3=region[iColumn3];
1429    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1430    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1431    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1432    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1433    region[iColumn0]=value0;
1434    region[iColumn1]=value1;
1435    region[iColumn2]=value2;
1436    region[iColumn3]=value3;
1437#else
1438    abort();
1439#endif
1440    element+=NEW_CHUNK_SIZE_INCREMENT;
1441    thisColumn = reinterpret_cast<const int *>(element);
1442  }
1443  element++;
1444  int iColumn0=thisColumn[0];
1445  CoinFactorizationDouble value0=region[iColumn0];
1446  value0 += multiplier*element[0];
1447  region[iColumn0]=value0;
1448}
1449void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
1450                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1451                                 CoinFactorizationDouble * COIN_RESTRICT region)
1452{
1453  assert ((numberIn&3)==2);
1454  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1455  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1456  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1457    coin_prefetch_const(element+6);
1458#if NEW_CHUNK_SIZE==2
1459    int iColumn0=thisColumn[0];
1460    int iColumn1=thisColumn[1];
1461    CoinFactorizationDouble value0=region[iColumn0];
1462    CoinFactorizationDouble value1=region[iColumn1];
1463    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1464    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1465    region[iColumn0]=value0;
1466    region[iColumn1]=value1;
1467#elif NEW_CHUNK_SIZE==4
1468    int iColumn0=thisColumn[0];
1469    int iColumn1=thisColumn[1];
1470    int iColumn2=thisColumn[2];
1471    int iColumn3=thisColumn[3];
1472    CoinFactorizationDouble value0=region[iColumn0];
1473    CoinFactorizationDouble value1=region[iColumn1];
1474    CoinFactorizationDouble value2=region[iColumn2];
1475    CoinFactorizationDouble value3=region[iColumn3];
1476    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1477    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1478    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1479    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1480    region[iColumn0]=value0;
1481    region[iColumn1]=value1;
1482    region[iColumn2]=value2;
1483    region[iColumn3]=value3;
1484#else
1485    abort();
1486#endif
1487    element+=NEW_CHUNK_SIZE_INCREMENT;
1488    thisColumn = reinterpret_cast<const int *>(element);
1489  }
1490#if NEW_CHUNK_SIZE==4
1491  element++;
1492  int iColumn0=thisColumn[0];
1493  int iColumn1=thisColumn[1];
1494  CoinFactorizationDouble value0=region[iColumn0];
1495  CoinFactorizationDouble value1=region[iColumn1];
1496  value0 += multiplier*element[0];
1497  value1 += multiplier*element[1];
1498  region[iColumn0]=value0;
1499  region[iColumn1]=value1;
1500#endif
1501}
1502void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
1503                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1504                                 CoinFactorizationDouble * COIN_RESTRICT region)
1505{
1506  assert ((numberIn&3)==3);
1507  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1508  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1509  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1510    coin_prefetch_const(element+6);
1511#if NEW_CHUNK_SIZE==2
1512    int iColumn0=thisColumn[0];
1513    int iColumn1=thisColumn[1];
1514    CoinFactorizationDouble value0=region[iColumn0];
1515    CoinFactorizationDouble value1=region[iColumn1];
1516    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1517    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1518    region[iColumn0]=value0;
1519    region[iColumn1]=value1;
1520#elif NEW_CHUNK_SIZE==4
1521    int iColumn0=thisColumn[0];
1522    int iColumn1=thisColumn[1];
1523    int iColumn2=thisColumn[2];
1524    int iColumn3=thisColumn[3];
1525    CoinFactorizationDouble value0=region[iColumn0];
1526    CoinFactorizationDouble value1=region[iColumn1];
1527    CoinFactorizationDouble value2=region[iColumn2];
1528    CoinFactorizationDouble value3=region[iColumn3];
1529    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1530    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1531    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1532    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1533    region[iColumn0]=value0;
1534    region[iColumn1]=value1;
1535    region[iColumn2]=value2;
1536    region[iColumn3]=value3;
1537#else
1538    abort();
1539#endif
1540    element+=NEW_CHUNK_SIZE_INCREMENT;
1541    thisColumn = reinterpret_cast<const int *>(element);
1542  }
1543#if NEW_CHUNK_SIZE==2
1544  element++;
1545  int iColumn0=thisColumn[0];
1546  CoinFactorizationDouble value0=region[iColumn0];
1547  value0 += multiplier*element[0];
1548  region[iColumn0]=value0;
1549#else
1550  element+=2;
1551  int iColumn0=thisColumn[0];
1552  int iColumn1=thisColumn[1];
1553  int iColumn2=thisColumn[2];
1554  CoinFactorizationDouble value0=region[iColumn0];
1555  CoinFactorizationDouble value1=region[iColumn1];
1556  CoinFactorizationDouble value2=region[iColumn2];
1557  value0 += multiplier*element[0];
1558  value1 += multiplier*element[1];
1559  value2 += multiplier*element[2];
1560  region[iColumn0]=value0;
1561  region[iColumn1]=value1;
1562  region[iColumn2]=value2;
1563#endif
1564}
1565#else
1566// alternative
1567#define CACHE_LINE_SIZE 16
1568#define OPERATION +=
1569#define functionName(zname) CoinAbc##zname
1570#define ABC_CREATE_SCATTER_FUNCTION 1
1571#include "CoinAbcHelperFunctions.hpp"
1572#undef OPERATION
1573#undef functionName
1574#define OPERATION -=
1575#define functionName(zname) CoinAbc##zname##Subtract
1576#include "CoinAbcHelperFunctions.hpp"
1577#undef OPERATION
1578#undef functionName
1579#define OPERATION +=
1580#define functionName(zname) CoinAbc##zname##Add
1581#include "CoinAbcHelperFunctions.hpp"
1582scatterUpdate AbcScatterLow[]={
1583  &CoinAbcScatterUpdate0,
1584  &CoinAbcScatterUpdate1,
1585  &CoinAbcScatterUpdate2,
1586  &CoinAbcScatterUpdate3,
1587  &CoinAbcScatterUpdate4,
1588  &CoinAbcScatterUpdate5,
1589  &CoinAbcScatterUpdate6,
1590  &CoinAbcScatterUpdate7,
1591  &CoinAbcScatterUpdate8};
1592scatterUpdate AbcScatterHigh[]={
1593  &CoinAbcScatterUpdate4N,
1594  &CoinAbcScatterUpdate4NPlus1,
1595  &CoinAbcScatterUpdate4NPlus2,
1596  &CoinAbcScatterUpdate4NPlus3};
1597scatterUpdate AbcScatterLowSubtract[]={
1598  &CoinAbcScatterUpdate0,
1599  &CoinAbcScatterUpdate1Subtract,
1600  &CoinAbcScatterUpdate2Subtract,
1601  &CoinAbcScatterUpdate3Subtract,
1602  &CoinAbcScatterUpdate4Subtract,
1603  &CoinAbcScatterUpdate5Subtract,
1604  &CoinAbcScatterUpdate6Subtract,
1605  &CoinAbcScatterUpdate7Subtract,
1606  &CoinAbcScatterUpdate8Subtract};
1607scatterUpdate AbcScatterHighSubtract[]={
1608  &CoinAbcScatterUpdate4NSubtract,
1609  &CoinAbcScatterUpdate4NPlus1Subtract,
1610  &CoinAbcScatterUpdate4NPlus2Subtract,
1611  &CoinAbcScatterUpdate4NPlus3Subtract};
1612scatterUpdate AbcScatterLowAdd[]={
1613  &CoinAbcScatterUpdate0,
1614  &CoinAbcScatterUpdate1Add,
1615  &CoinAbcScatterUpdate2Add,
1616  &CoinAbcScatterUpdate3Add,
1617  &CoinAbcScatterUpdate4Add,
1618  &CoinAbcScatterUpdate5Add,
1619  &CoinAbcScatterUpdate6Add,
1620  &CoinAbcScatterUpdate7Add,
1621  &CoinAbcScatterUpdate8Add};
1622scatterUpdate AbcScatterHighAdd[]={
1623  &CoinAbcScatterUpdate4NAdd,
1624  &CoinAbcScatterUpdate4NPlus1Add,
1625  &CoinAbcScatterUpdate4NPlus2Add,
1626  &CoinAbcScatterUpdate4NPlus3Add};
1627#endif
1628#include "CoinPragma.hpp"
1629
1630#include <math.h>
1631
1632#include <cfloat>
1633#include <cassert>
1634#include <string>
1635#include <stdio.h>
1636#include <iostream>
1637#include "CoinAbcCommonFactorization.hpp"
1638#if 1
1639#if AVX2==1
1640#include "emmintrin.h"
1641#elif AVX2==2
1642#include <immintrin.h>
1643#elif AVX2==3
1644#include "avx2intrin.h"
1645#endif
1646#endif
1647// cilk seems a bit fragile
1648#define CILK_FRAGILE 0
1649#if CILK_FRAGILE>1
1650#undef cilk_spawn
1651#undef cilk_sync
1652#define cilk_spawn
1653#define cilk_sync
1654#define ONWARD 0
1655#elif CILK_FRAGILE==1
1656#define ONWARD 0
1657#else
1658#define ONWARD 1
1659#endif
1660#define BLOCKING1 8 // factorization strip
1661#define BLOCKING2 8 // dgemm recursive
1662#define BLOCKING3 32 // dgemm parallel
1663void CoinAbcDgemm(int m, int n, int k, double * COIN_RESTRICT a,int lda,
1664                          double * COIN_RESTRICT b,double * COIN_RESTRICT c
1665#if ABC_PARALLEL==2
1666                          ,int parallelMode
1667#endif
1668)
1669{
1670  assert ((m&(BLOCKING8-1))==0&&(n&(BLOCKING8-1))==0&&(k&(BLOCKING8-1))==0);
1671  /* entry for column j and row i (when multiple of BLOCKING8)
1672     is at aBlocked+j*m+i*BLOCKING8
1673  */
1674  // k is always smallish
1675  if (m<=BLOCKING8&&n<=BLOCKING8) {
1676    assert (m==BLOCKING8&&n==BLOCKING8&&k==BLOCKING8);
1677    double * COIN_RESTRICT aBase2=a;
1678    double * COIN_RESTRICT bBase2=b;
1679    double * COIN_RESTRICT cBase2=c;
1680    for (int j=0;j<BLOCKING8;j++) {
1681      double * COIN_RESTRICT aBase=aBase2;
1682#if AVX2!=2
1683#if 1
1684      double c0=cBase2[0];
1685      double c1=cBase2[1];
1686      double c2=cBase2[2];
1687      double c3=cBase2[3];
1688      double c4=cBase2[4];
1689      double c5=cBase2[5];
1690      double c6=cBase2[6];
1691      double c7=cBase2[7];
1692      for (int l=0;l<BLOCKING8;l++) {
1693        double bValue = bBase2[l];
1694        if (bValue) {
1695          c0 -= bValue*aBase[0];
1696          c1 -= bValue*aBase[1];
1697          c2 -= bValue*aBase[2];
1698          c3 -= bValue*aBase[3];
1699          c4 -= bValue*aBase[4];
1700          c5 -= bValue*aBase[5];
1701          c6 -= bValue*aBase[6];
1702          c7 -= bValue*aBase[7];
1703        }
1704        aBase += BLOCKING8;
1705      }
1706      cBase2[0]=c0;
1707      cBase2[1]=c1;
1708      cBase2[2]=c2;
1709      cBase2[3]=c3;
1710      cBase2[4]=c4;
1711      cBase2[5]=c5;
1712      cBase2[6]=c6;
1713      cBase2[7]=c7;
1714#else
1715      for (int l=0;l<BLOCKING8;l++) {
1716        double bValue = bBase2[l];
1717        if (bValue) {
1718          for (int i=0;i<BLOCKING8;i++) {
1719            cBase2[i] -= bValue*aBase[i];
1720          }
1721        }
1722        aBase += BLOCKING8;
1723      }
1724#endif
1725#else
1726      //__m256d c0=_mm256_load_pd(cBase2);
1727      __m256d c0=*reinterpret_cast<__m256d *>(cBase2);
1728      //__m256d c1=_mm256_load_pd(cBase2+4);
1729      __m256d c1=*reinterpret_cast<__m256d *>(cBase2+4);
1730      for (int l=0;l<BLOCKING8;l++) {
1731        //__m256d bb = _mm256_broadcast_sd(bBase2+l);
1732        __m256d bb = static_cast<__m256d> (__builtin_ia32_vbroadcastsd256 (bBase2+l));
1733        //__m256d a0 = _mm256_load_pd(aBase);
1734        __m256d a0=*reinterpret_cast<__m256d *>(aBase);
1735        //__m256d a1 = _mm256_load_pd(aBase+4);
1736        __m256d a1=*reinterpret_cast<__m256d *>(aBase+4);
1737        c0 -= bb*a0;
1738        c1 -= bb*a1;
1739        aBase += BLOCKING8;
1740      }
1741      //_mm256_store_pd (cBase2, c0);
1742      *reinterpret_cast<__m256d *>(cBase2)=c0;
1743      //_mm256_store_pd (cBase2+4, c1);
1744      *reinterpret_cast<__m256d *>(cBase2+4)=c1;
1745#endif
1746      bBase2 += BLOCKING8;
1747      cBase2 += BLOCKING8;
1748    }
1749  } else if (m>n) {
1750    // make sure mNew1 multiple of BLOCKING8
1751#if BLOCKING8==8
1752    int mNew1=((m+15)>>4)<<3;
1753#elif BLOCKING8==4
1754    int mNew1=((m+7)>>3)<<2;
1755#elif BLOCKING8==2
1756    int mNew1=((m+3)>>2)<<1;
1757#else
1758    abort();
1759#endif
1760    assert (mNew1>0&&m-mNew1>0);
1761#if ABC_PARALLEL==2
1762    if (mNew1<=BLOCKING3||!parallelMode) {
1763#endif
1764      //printf("splitMa mNew1 %d\n",mNew1);
1765      CoinAbcDgemm(mNew1,n,k,a,lda,b,c
1766#if ABC_PARALLEL==2
1767                          ,0
1768#endif
1769);
1770      //printf("splitMb mNew1 %d\n",mNew1);
1771      CoinAbcDgemm(m-mNew1,n,k,a+mNew1*BLOCKING8,lda,b,c+mNew1*BLOCKING8
1772#if ABC_PARALLEL==2
1773                          ,0
1774#endif
1775);
1776#if ABC_PARALLEL==2
1777    } else {
1778      //printf("splitMa mNew1 %d\n",mNew1);
1779      cilk_spawn CoinAbcDgemm(mNew1,n,k,a,lda,b,c,ONWARD);
1780      //printf("splitMb mNew1 %d\n",mNew1);
1781      CoinAbcDgemm(m-mNew1,n,k,a+mNew1*BLOCKING8,lda,b,c+mNew1*BLOCKING8,ONWARD);
1782      cilk_sync;
1783    }
1784#endif
1785  } else {
1786    // make sure nNew1 multiple of BLOCKING8
1787#if BLOCKING8==8
1788    int nNew1=((n+15)>>4)<<3;
1789#elif BLOCKING8==4
1790    int nNew1=((n+7)>>3)<<2;
1791#elif BLOCKING8==2
1792    int nNew1=((n+3)>>2)<<1;
1793#else
1794    abort();
1795#endif
1796    assert (nNew1>0&&n-nNew1>0);
1797#if ABC_PARALLEL==2
1798    if (nNew1<=BLOCKING3||!parallelMode) {
1799#endif
1800      //printf("splitNa nNew1 %d\n",nNew1);
1801      CoinAbcDgemm(m,nNew1,k,a,lda,b,c
1802#if ABC_PARALLEL==2
1803                          ,0
1804#endif
1805                    );
1806      //printf("splitNb nNew1 %d\n",nNew1);
1807      CoinAbcDgemm(m,n-nNew1,k,a,lda,b+lda*nNew1,c+lda*nNew1
1808#if ABC_PARALLEL==2
1809                          ,0
1810#endif
1811                    );
1812#if ABC_PARALLEL==2
1813    } else {
1814      //printf("splitNa nNew1 %d\n",nNew1);
1815      cilk_spawn CoinAbcDgemm(m,nNew1,k,a,lda,b,c,ONWARD);
1816      //printf("splitNb nNew1 %d\n",nNew1);
1817      CoinAbcDgemm(m,n-nNew1,k,a,lda,b+lda*nNew1,c+lda*nNew1,ONWARD);
1818      cilk_sync;
1819    }
1820#endif
1821  }
1822}
1823#ifdef ABC_LONG_FACTORIZATION
1824// Start long double version
1825void CoinAbcDgemm(int m, int n, int k, long double * COIN_RESTRICT a,int lda,
1826                          long double * COIN_RESTRICT b,long double * COIN_RESTRICT c
1827#if ABC_PARALLEL==2
1828                          ,int parallelMode
1829#endif
1830)
1831{
1832  assert ((m&(BLOCKING8-1))==0&&(n&(BLOCKING8-1))==0&&(k&(BLOCKING8-1))==0);
1833  /* entry for column j and row i (when multiple of BLOCKING8)
1834     is at aBlocked+j*m+i*BLOCKING8
1835  */
1836  // k is always smallish
1837  if (m<=BLOCKING8&&n<=BLOCKING8) {
1838    assert (m==BLOCKING8&&n==BLOCKING8&&k==BLOCKING8);
1839    long double * COIN_RESTRICT aBase2=a;
1840    long double * COIN_RESTRICT bBase2=b;
1841    long double * COIN_RESTRICT cBase2=c;
1842    for (int j=0;j<BLOCKING8;j++) {
1843      long double * COIN_RESTRICT aBase=aBase2;
1844#if AVX2!=2
1845#if 1
1846      long double c0=cBase2[0];
1847      long double c1=cBase2[1];
1848      long double c2=cBase2[2];
1849      long double c3=cBase2[3];
1850      long double c4=cBase2[4];
1851      long double c5=cBase2[5];
1852      long double c6=cBase2[6];
1853      long double c7=cBase2[7];
1854      for (int l=0;l<BLOCKING8;l++) {
1855        long double bValue = bBase2[l];
1856        if (bValue) {
1857          c0 -= bValue*aBase[0];
1858          c1 -= bValue*aBase[1];
1859          c2 -= bValue*aBase[2];
1860          c3 -= bValue*aBase[3];
1861          c4 -= bValue*aBase[4];
1862          c5 -= bValue*aBase[5];
1863          c6 -= bValue*aBase[6];
1864          c7 -= bValue*aBase[7];
1865        }
1866        aBase += BLOCKING8;
1867      }
1868      cBase2[0]=c0;
1869      cBase2[1]=c1;
1870      cBase2[2]=c2;
1871      cBase2[3]=c3;
1872      cBase2[4]=c4;
1873      cBase2[5]=c5;
1874      cBase2[6]=c6;
1875      cBase2[7]=c7;
1876#else
1877      for (int l=0;l<BLOCKING8;l++) {
1878        long double bValue = bBase2[l];
1879        if (bValue) {
1880          for (int i=0;i<BLOCKING8;i++) {
1881            cBase2[i] -= bValue*aBase[i];
1882          }
1883        }
1884        aBase += BLOCKING8;
1885      }
1886#endif
1887#else
1888      //__m256d c0=_mm256_load_pd(cBase2);
1889      __m256d c0=*reinterpret_cast<__m256d *>(cBase2);
1890      //__m256d c1=_mm256_load_pd(cBase2+4);
1891      __m256d c1=*reinterpret_cast<__m256d *>(cBase2+4);
1892      for (int l=0;l<BLOCKING8;l++) {
1893        //__m256d bb = _mm256_broadcast_sd(bBase2+l);
1894        __m256d bb = static_cast<__m256d> (__builtin_ia32_vbroadcastsd256 (bBase2+l));
1895        //__m256d a0 = _mm256_load_pd(aBase);
1896        __m256d a0=*reinterpret_cast<__m256d *>(aBase);
1897        //__m256d a1 = _mm256_load_pd(aBase+4);
1898        __m256d a1=*reinterpret_cast<__m256d *>(aBase+4);
1899        c0 -= bb*a0;
1900        c1 -= bb*a1;
1901        aBase += BLOCKING8;
1902      }
1903      //_mm256_store_pd (cBase2, c0);
1904      *reinterpret_cast<__m256d *>(cBase2)=c0;
1905      //_mm256_store_pd (cBase2+4, c1);
1906      *reinterpret_cast<__m256d *>(cBase2+4)=c1;
1907#endif
1908      bBase2 += BLOCKING8;
1909      cBase2 += BLOCKING8;
1910    }
1911  } else if (m>n) {
1912    // make sure mNew1 multiple of BLOCKING8
1913#if BLOCKING8==8
1914    int mNew1=((m+15)>>4)<<3;
1915#elif BLOCKING8==4
1916    int mNew1=((m+7)>>3)<<2;
1917#elif BLOCKING8==2
1918    int mNew1=((m+3)>>2)<<1;
1919#else
1920    abort();
1921#endif
1922    assert (mNew1>0&&m-mNew1>0);
1923#if ABC_PARALLEL==2
1924    if (mNew1<=BLOCKING3||!parallelMode) {
1925#endif
1926      //printf("splitMa mNew1 %d\n",mNew1);
1927      CoinAbcDgemm(mNew1,n,k,a,lda,b,c
1928#if ABC_PARALLEL==2
1929                          ,0
1930#endif
1931);
1932      //printf("splitMb mNew1 %d\n",mNew1);
1933      CoinAbcDgemm(m-mNew1,n,k,a+mNew1*BLOCKING8,lda,b,c+mNew1*BLOCKING8
1934#if ABC_PARALLEL==2
1935                          ,0
1936#endif
1937);
1938#if ABC_PARALLEL==2
1939    } else {
1940      //printf("splitMa mNew1 %d\n",mNew1);
1941      cilk_spawn CoinAbcDgemm(mNew1,n,k,a,lda,b,c,ONWARD);
1942      //printf("splitMb mNew1 %d\n",mNew1);
1943      CoinAbcDgemm(m-mNew1,n,k,a+mNew1*BLOCKING8,lda,b,c+mNew1*BLOCKING8,ONWARD);
1944      cilk_sync;
1945    }
1946#endif
1947  } else {
1948    // make sure nNew1 multiple of BLOCKING8
1949#if BLOCKING8==8
1950    int nNew1=((n+15)>>4)<<3;
1951#elif BLOCKING8==4
1952    int nNew1=((n+7)>>3)<<2;
1953#elif BLOCKING8==2
1954    int nNew1=((n+3)>>2)<<1;
1955#else
1956    abort();
1957#endif
1958    assert (nNew1>0&&n-nNew1>0);
1959#if ABC_PARALLEL==2
1960    if (nNew1<=BLOCKING3||!parallelMode) {
1961#endif
1962      //printf("splitNa nNew1 %d\n",nNew1);
1963      CoinAbcDgemm(m,nNew1,k,a,lda,b,c
1964#if ABC_PARALLEL==2
1965                          ,0
1966#endif
1967                    );
1968      //printf("splitNb nNew1 %d\n",nNew1);
1969      CoinAbcDgemm(m,n-nNew1,k,a,lda,b+lda*nNew1,c+lda*nNew1
1970#if ABC_PARALLEL==2
1971                          ,0
1972#endif
1973                    );
1974#if ABC_PARALLEL==2
1975    } else {
1976      //printf("splitNa nNew1 %d\n",nNew1);
1977      cilk_spawn CoinAbcDgemm(m,nNew1,k,a,lda,b,c,ONWARD);
1978      //printf("splitNb nNew1 %d\n",nNew1);
1979      CoinAbcDgemm(m,n-nNew1,k,a,lda,b+lda*nNew1,c+lda*nNew1,ONWARD);
1980      cilk_sync;
1981    }
1982#endif
1983  }
1984}
1985#endif
1986// From Intel site
1987// get AVX intrinsics 
1988#include <immintrin.h> 
1989// get CPUID capability 
1990//#include <intrin.h> 
1991#define cpuid(func,ax,bx,cx,dx)\
1992        __asm__ __volatile__ ("cpuid":\
1993        "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func)); 
1994// written for clarity, not conciseness 
1995#define OSXSAVEFlag (1UL<<27) 
1996#define AVXFlag     ((1UL<<28)|OSXSAVEFlag) 
1997#define VAESFlag    ((1UL<<25)|AVXFlag|OSXSAVEFlag) 
1998#define FMAFlag     ((1UL<<12)|AVXFlag|OSXSAVEFlag) 
1999#define CLMULFlag   ((1UL<< 1)|AVXFlag|OSXSAVEFlag) 
2000   
2001bool DetectFeature(unsigned int feature) 
2002{ 
2003  int CPUInfo[4]; //, InfoType=1, ECX = 1; 
2004  //__cpuidex(CPUInfo, 1, 1);       // read the desired CPUID format 
2005  cpuid(1,CPUInfo[0],CPUInfo[1],CPUInfo[2],CPUInfo[3]);
2006  unsigned int ECX = CPUInfo[2];  // the output of CPUID in the ECX register.   
2007  if ((ECX & feature) != feature) // Missing feature   
2008    return false;   
2009#if 0
2010  long int val = _xgetbv(0);       // read XFEATURE_ENABLED_MASK register 
2011  if ((val&6) != 6)               // check OS has enabled both XMM and YMM support. 
2012    return false;   
2013#endif
2014  return true; 
2015} 
Note: See TracBrowser for help on using the repository browser.