source: trunk/Clp/src/CoinAbcHelperFunctions.cpp @ 2385

Last change on this file since 2385 was 2385, checked in by unxusr, 3 months ago

formatting

  • Property svn:keywords set to Id
File size: 66.3 KB
Line 
1/* $Id: CoinAbcHelperFunctions.cpp 2385 2019-01-06 19:43:06Z unxusr $ */
2// Copyright (C) 2003, International Business Machines
3// Corporation and others, Copyright (C) 2012, FasterCoin.  All Rights Reserved.
4// This code is licensed under the terms of the Eclipse Public License (EPL).
5
6#include <cfloat>
7#include <cstdlib>
8#include <cmath>
9#include "CoinHelperFunctions.hpp"
10#include "CoinAbcHelperFunctions.hpp"
11#include "CoinTypes.hpp"
12#include "CoinFinite.hpp"
13#include "CoinAbcCommon.hpp"
14//#define AVX2 1
15#if AVX2 == 1
16#define set_const_v2df(bb, b)                         \
17  {                                                   \
18    double *temp = reinterpret_cast< double * >(&bb); \
19    temp[0] = b;                                      \
20    temp[1] = b;                                      \
21  }
22#endif
23#if INLINE_SCATTER == 0
24void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
25  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
26  const int *COIN_RESTRICT thisIndex,
27  CoinFactorizationDouble *COIN_RESTRICT region)
28{
29#if UNROLL_SCATTER == 0
30  for (CoinBigIndex j = number - 1; j >= 0; j--) {
31    CoinSimplexInt iRow = thisIndex[j];
32    CoinFactorizationDouble regionValue = region[iRow];
33    CoinFactorizationDouble value = thisElement[j];
34    assert(value);
35    region[iRow] = regionValue - value * pivotValue;
36  }
37#elif UNROLL_SCATTER == 1
38  if ((number & 1) != 0) {
39    number--;
40    CoinSimplexInt iRow = thisIndex[number];
41    CoinFactorizationDouble regionValue = region[iRow];
42    CoinFactorizationDouble value = thisElement[number];
43    region[iRow] = regionValue - value * pivotValue;
44  }
45  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
46    CoinSimplexInt iRow0 = thisIndex[j];
47    CoinSimplexInt iRow1 = thisIndex[j - 1];
48    CoinFactorizationDouble regionValue0 = region[iRow0];
49    CoinFactorizationDouble regionValue1 = region[iRow1];
50    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
51    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
52  }
53#elif UNROLL_SCATTER == 2
54  if ((number & 1) != 0) {
55    number--;
56    CoinSimplexInt iRow = thisIndex[number];
57    CoinFactorizationDouble regionValue = region[iRow];
58    CoinFactorizationDouble value = thisElement[number];
59    region[iRow] = regionValue - value * pivotValue;
60  }
61  if ((number & 2) != 0) {
62    CoinSimplexInt iRow0 = thisIndex[number - 1];
63    CoinFactorizationDouble regionValue0 = region[iRow0];
64    CoinFactorizationDouble value0 = thisElement[number - 1];
65    CoinSimplexInt iRow1 = thisIndex[number - 2];
66    CoinFactorizationDouble regionValue1 = region[iRow1];
67    CoinFactorizationDouble value1 = thisElement[number - 2];
68    region[iRow0] = regionValue0 - value0 * pivotValue;
69    region[iRow1] = regionValue1 - value1 * pivotValue;
70    number -= 2;
71  }
72  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
73    CoinSimplexInt iRow0 = thisIndex[j];
74    CoinSimplexInt iRow1 = thisIndex[j - 1];
75    CoinFactorizationDouble regionValue0 = region[iRow0];
76    CoinFactorizationDouble regionValue1 = region[iRow1];
77    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
78    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
79    CoinSimplexInt iRow2 = thisIndex[j - 2];
80    CoinSimplexInt iRow3 = thisIndex[j - 3];
81    CoinFactorizationDouble regionValue2 = region[iRow2];
82    CoinFactorizationDouble regionValue3 = region[iRow3];
83    region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
84    region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
85  }
86#elif UNROLL_SCATTER == 3
87  CoinSimplexInt iRow0;
88  CoinSimplexInt iRow1;
89  CoinFactorizationDouble regionValue0;
90  CoinFactorizationDouble regionValue1;
91  switch (static_cast< unsigned int >(number)) {
92  case 0:
93    break;
94  case 1:
95    iRow0 = thisIndex[0];
96    regionValue0 = region[iRow0];
97    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
98    break;
99  case 2:
100    iRow0 = thisIndex[0];
101    iRow1 = thisIndex[1];
102    regionValue0 = region[iRow0];
103    regionValue1 = region[iRow1];
104    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
105    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
106    break;
107  case 3:
108    iRow0 = thisIndex[0];
109    iRow1 = thisIndex[1];
110    regionValue0 = region[iRow0];
111    regionValue1 = region[iRow1];
112    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
113    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
114    iRow0 = thisIndex[2];
115    regionValue0 = region[iRow0];
116    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
117    break;
118  case 4:
119    iRow0 = thisIndex[0];
120    iRow1 = thisIndex[1];
121    regionValue0 = region[iRow0];
122    regionValue1 = region[iRow1];
123    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
124    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
125    iRow0 = thisIndex[2];
126    iRow1 = thisIndex[3];
127    regionValue0 = region[iRow0];
128    regionValue1 = region[iRow1];
129    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
130    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
131    break;
132  case 5:
133    iRow0 = thisIndex[0];
134    iRow1 = thisIndex[1];
135    regionValue0 = region[iRow0];
136    regionValue1 = region[iRow1];
137    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
138    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
139    iRow0 = thisIndex[2];
140    iRow1 = thisIndex[3];
141    regionValue0 = region[iRow0];
142    regionValue1 = region[iRow1];
143    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
144    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
145    iRow0 = thisIndex[4];
146    regionValue0 = region[iRow0];
147    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
148    break;
149  case 6:
150    iRow0 = thisIndex[0];
151    iRow1 = thisIndex[1];
152    regionValue0 = region[iRow0];
153    regionValue1 = region[iRow1];
154    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
155    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
156    iRow0 = thisIndex[2];
157    iRow1 = thisIndex[3];
158    regionValue0 = region[iRow0];
159    regionValue1 = region[iRow1];
160    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
161    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
162    iRow0 = thisIndex[4];
163    iRow1 = thisIndex[5];
164    regionValue0 = region[iRow0];
165    regionValue1 = region[iRow1];
166    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
167    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
168    break;
169  case 7:
170    iRow0 = thisIndex[0];
171    iRow1 = thisIndex[1];
172    regionValue0 = region[iRow0];
173    regionValue1 = region[iRow1];
174    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
175    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
176    iRow0 = thisIndex[2];
177    iRow1 = thisIndex[3];
178    regionValue0 = region[iRow0];
179    regionValue1 = region[iRow1];
180    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
181    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
182    iRow0 = thisIndex[4];
183    iRow1 = thisIndex[5];
184    regionValue0 = region[iRow0];
185    regionValue1 = region[iRow1];
186    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
187    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
188    iRow0 = thisIndex[6];
189    regionValue0 = region[iRow0];
190    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
191    break;
192  case 8:
193    iRow0 = thisIndex[0];
194    iRow1 = thisIndex[1];
195    regionValue0 = region[iRow0];
196    regionValue1 = region[iRow1];
197    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
198    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
199    iRow0 = thisIndex[2];
200    iRow1 = thisIndex[3];
201    regionValue0 = region[iRow0];
202    regionValue1 = region[iRow1];
203    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
204    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
205    iRow0 = thisIndex[4];
206    iRow1 = thisIndex[5];
207    regionValue0 = region[iRow0];
208    regionValue1 = region[iRow1];
209    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
210    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
211    iRow0 = thisIndex[6];
212    iRow1 = thisIndex[7];
213    regionValue0 = region[iRow0];
214    regionValue1 = region[iRow1];
215    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
216    region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
217    break;
218  default:
219    if ((number & 1) != 0) {
220      number--;
221      CoinSimplexInt iRow = thisIndex[number];
222      CoinFactorizationDouble regionValue = region[iRow];
223      CoinFactorizationDouble value = thisElement[number];
224      region[iRow] = regionValue - value * pivotValue;
225    }
226    for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
227      CoinSimplexInt iRow0 = thisIndex[j];
228      CoinSimplexInt iRow1 = thisIndex[j - 1];
229      CoinFactorizationDouble regionValue0 = region[iRow0];
230      CoinFactorizationDouble regionValue1 = region[iRow1];
231      region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
232      region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
233    }
234    break;
235  }
236#endif
237}
238#endif
239#if INLINE_GATHER == 0
240CoinFactorizationDouble
241CoinAbcGatherUpdate(CoinSimplexInt number,
242  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
243  const int *COIN_RESTRICT thisIndex,
244  CoinFactorizationDouble *COIN_RESTRICT region)
245{
246#if UNROLL_GATHER == 0
247  CoinFactorizationDouble pivotValue = 0.0;
248  for (CoinBigIndex j = 0; j < number; j++) {
249    CoinFactorizationDouble value = thisElement[j];
250    CoinSimplexInt jRow = thisIndex[j];
251    value *= region[jRow];
252    pivotValue -= value;
253  }
254  return pivotValue;
255#else
256#error code
257#endif
258}
259#endif
260#if INLINE_MULTIPLY_ADD == 0
261void CoinAbcMultiplyIndexed(int number,
262  const CoinFactorizationDouble *COIN_RESTRICT multiplier,
263  const int *COIN_RESTRICT thisIndex,
264  CoinSimplexDouble *COIN_RESTRICT region)
265{
266#ifndef INTEL_COMPILER
267// was #pragma simd
268#endif
269#if UNROLL_MULTIPLY_ADD == 0
270  for (CoinSimplexInt i = 0; i < number; i++) {
271    CoinSimplexInt iRow = thisIndex[i];
272    CoinSimplexDouble value = region[iRow];
273    value *= multiplier[iRow];
274    region[iRow] = value;
275  }
276#else
277#error code
278#endif
279}
280void CoinAbcMultiplyIndexed(int number,
281  const long double *COIN_RESTRICT multiplier,
282  const int *COIN_RESTRICT thisIndex,
283  long double *COIN_RESTRICT region)
284{
285#ifndef INTEL_COMPILER
286// was #pragma simd
287#endif
288#if UNROLL_MULTIPLY_ADD == 0
289  for (CoinSimplexInt i = 0; i < number; i++) {
290    CoinSimplexInt iRow = thisIndex[i];
291    long double value = region[iRow];
292    value *= multiplier[iRow];
293    region[iRow] = value;
294  }
295#else
296#error code
297#endif
298}
299#endif
300double
301CoinAbcMaximumAbsElement(const double *region, int sizeIn)
302{
303  int size = sizeIn;
304  double maxValue = 0.0;
305  //printf("a\n");
306#ifndef INTEL_COMPILER
307// was #pragma simd
308#endif
309  /*cilk_*/ for (int i = 0; i < size; i++)
310    maxValue = CoinMax(maxValue, fabs(region[i]));
311  return maxValue;
312}
313void CoinAbcMinMaxAbsElement(const double *region, int sizeIn, double &minimum, double &maximum)
314{
315  double minValue = minimum;
316  double maxValue = maximum;
317  int size = sizeIn;
318  //printf("b\n");
319#ifndef INTEL_COMPILER
320// was #pragma simd
321#endif
322  /*cilk_*/ for (int i = 0; i < size; i++) {
323    double value = fabs(region[i]);
324    minValue = CoinMin(value, minValue);
325    maxValue = CoinMax(value, maxValue);
326  }
327  minimum = minValue;
328  maximum = maxValue;
329}
330void CoinAbcMinMaxAbsNormalValues(const double *region, int sizeIn, double &minimum, double &maximum)
331{
332  int size = sizeIn;
333  double minValue = minimum;
334  double maxValue = maximum;
335#define NORMAL_LOW_VALUE 1.0e-8
336#define NORMAL_HIGH_VALUE 1.0e8
337  //printf("c\n");
338#ifndef INTEL_COMPILER
339// was #pragma simd
340#endif
341  /*cilk_*/ for (int i = 0; i < size; i++) {
342    double value = fabs(region[i]);
343    if (value > NORMAL_LOW_VALUE && value < NORMAL_HIGH_VALUE) {
344      minValue = CoinMin(value, minValue);
345      maxValue = CoinMax(value, maxValue);
346    }
347  }
348  minimum = minValue;
349  maximum = maxValue;
350}
351void CoinAbcScale(double *region, double multiplier, int sizeIn)
352{
353  int size = sizeIn;
354  // used printf("d\n");
355#ifndef INTEL_COMPILER
356// was #pragma simd
357#endif
358#pragma cilk grainsize = CILK_FOR_GRAINSIZE
359  cilk_for(int i = 0; i < size; i++)
360  {
361    region[i] *= multiplier;
362  }
363}
364void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int sizeIn)
365{
366  int size = sizeIn;
367  // used printf("e\n");
368#ifndef INTEL_COMPILER
369// was #pragma simd
370#endif
371#pragma cilk grainsize = CILK_FOR_GRAINSIZE
372  cilk_for(int i = 0; i < size; i++)
373  {
374    double value = fabs(region[i]);
375    if (value > killIfLessThanThis) {
376      if (value != COIN_DBL_MAX) {
377        value *= multiplier;
378        if (value > killIfLessThanThis)
379          region[i] *= multiplier;
380        else
381          region[i] = 0.0;
382      }
383    } else {
384      region[i] = 0.0;
385    }
386  }
387}
388// maximum fabs(region[i]) and then region[i]*=multiplier
389double
390CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int sizeIn)
391{
392  double maxValue = 0.0;
393  int size = sizeIn;
394  //printf("f\n");
395#ifndef INTEL_COMPILER
396// was #pragma simd
397#endif
398  /*cilk_*/ for (int i = 0; i < size; i++) {
399    double value = fabs(region[i]);
400    region[i] *= multiplier;
401    maxValue = CoinMax(value, maxValue);
402  }
403  return maxValue;
404}
405void CoinAbcSetElements(double *region, int sizeIn, double value)
406{
407  int size = sizeIn;
408  printf("g\n");
409#ifndef INTEL_COMPILER
410// was #pragma simd
411#endif
412#pragma cilk grainsize = CILK_FOR_GRAINSIZE
413  cilk_for(int i = 0; i < size; i++)
414    region[i]
415    = value;
416}
417void CoinAbcMultiplyAdd(const double *region1, int sizeIn, double multiplier1,
418  double *regionChanged, double multiplier2)
419{
420  //printf("h\n");
421  int size = sizeIn;
422  if (multiplier1 == 1.0) {
423    if (multiplier2 == 1.0) {
424#ifndef INTEL_COMPILER
425// was #pragma simd
426#endif
427      /*cilk_*/ for (int i = 0; i < size; i++)
428        regionChanged[i] = region1[i] + regionChanged[i];
429    } else if (multiplier2 == -1.0) {
430#ifndef INTEL_COMPILER
431// was #pragma simd
432#endif
433      /*cilk_*/ for (int i = 0; i < size; i++)
434        regionChanged[i] = region1[i] - regionChanged[i];
435    } else if (multiplier2 == 0.0) {
436#ifndef INTEL_COMPILER
437// was #pragma simd
438#endif
439      /*cilk_*/ for (int i = 0; i < size; i++)
440        regionChanged[i] = region1[i];
441    } else {
442#ifndef INTEL_COMPILER
443// was #pragma simd
444#endif
445      /*cilk_*/ for (int i = 0; i < size; i++)
446        regionChanged[i] = region1[i] + multiplier2 * regionChanged[i];
447    }
448  } else if (multiplier1 == -1.0) {
449    if (multiplier2 == 1.0) {
450#ifndef INTEL_COMPILER
451// was #pragma simd
452#endif
453      /*cilk_*/ for (int i = 0; i < size; i++)
454        regionChanged[i] = -region1[i] + regionChanged[i];
455    } else if (multiplier2 == -1.0) {
456#ifndef INTEL_COMPILER
457// was #pragma simd
458#endif
459      /*cilk_*/ for (int i = 0; i < size; i++)
460        regionChanged[i] = -region1[i] - regionChanged[i];
461    } else if (multiplier2 == 0.0) {
462#ifndef INTEL_COMPILER
463// was #pragma simd
464#endif
465      /*cilk_*/ for (int i = 0; i < size; i++)
466        regionChanged[i] = -region1[i];
467    } else {
468#ifndef INTEL_COMPILER
469// was #pragma simd
470#endif
471      /*cilk_*/ for (int i = 0; i < size; i++)
472        regionChanged[i] = -region1[i] + multiplier2 * regionChanged[i];
473    }
474  } else if (multiplier1 == 0.0) {
475    if (multiplier2 == 1.0) {
476      // nothing to do
477    } else if (multiplier2 == -1.0) {
478#ifndef INTEL_COMPILER
479// was #pragma simd
480#endif
481      /*cilk_*/ for (int i = 0; i < size; i++)
482        regionChanged[i] = -regionChanged[i];
483    } else if (multiplier2 == 0.0) {
484#ifndef INTEL_COMPILER
485// was #pragma simd
486#endif
487      /*cilk_*/ for (int i = 0; i < size; i++)
488        regionChanged[i] = 0.0;
489    } else {
490#ifndef INTEL_COMPILER
491// was #pragma simd
492#endif
493      /*cilk_*/ for (int i = 0; i < size; i++)
494        regionChanged[i] = multiplier2 * regionChanged[i];
495    }
496  } else {
497    if (multiplier2 == 1.0) {
498#ifndef INTEL_COMPILER
499// was #pragma simd
500#endif
501      /*cilk_*/ for (int i = 0; i < size; i++)
502        regionChanged[i] = multiplier1 * region1[i] + regionChanged[i];
503    } else if (multiplier2 == -1.0) {
504#ifndef INTEL_COMPILER
505// was #pragma simd
506#endif
507      /*cilk_*/ for (int i = 0; i < size; i++)
508        regionChanged[i] = multiplier1 * region1[i] - regionChanged[i];
509    } else if (multiplier2 == 0.0) {
510#ifndef INTEL_COMPILER
511// was #pragma simd
512#endif
513      /*cilk_*/ for (int i = 0; i < size; i++)
514        regionChanged[i] = multiplier1 * region1[i];
515    } else {
516#ifndef INTEL_COMPILER
517// was #pragma simd
518#endif
519      /*cilk_*/ for (int i = 0; i < size; i++)
520        regionChanged[i] = multiplier1 * region1[i] + multiplier2 * regionChanged[i];
521    }
522  }
523}
524double
525CoinAbcInnerProduct(const double *region1, int sizeIn, const double *region2)
526{
527  int size = sizeIn;
528  //printf("i\n");
529  double value = 0.0;
530#ifndef INTEL_COMPILER
531// was #pragma simd
532#endif
533  /*cilk_*/ for (int i = 0; i < size; i++)
534    value += region1[i] * region2[i];
535  return value;
536}
537void CoinAbcGetNorms(const double *region, int sizeIn, double &norm1, double &norm2)
538{
539  int size = sizeIn;
540  //printf("j\n");
541  norm1 = 0.0;
542  norm2 = 0.0;
543#ifndef INTEL_COMPILER
544// was #pragma simd
545#endif
546  /*cilk_*/ for (int i = 0; i < size; i++) {
547    norm2 += region[i] * region[i];
548    norm1 = CoinMax(norm1, fabs(region[i]));
549  }
550}
551// regionTo[index[i]]=regionFrom[i]
552void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int numberIn)
553{
554  int number = numberIn;
555  // used printf("k\n");
556#ifndef INTEL_COMPILER
557// was #pragma simd
558#endif
559#pragma cilk grainsize = CILK_FOR_GRAINSIZE
560  cilk_for(int i = 0; i < number; i++)
561  {
562    int k = index[i];
563    regionTo[k] = regionFrom[i];
564  }
565}
566// regionTo[i]=regionFrom[index[i]]
567void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int numberIn)
568{
569  int number = numberIn;
570  // used printf("l\n");
571#ifndef INTEL_COMPILER
572// was #pragma simd
573#endif
574#pragma cilk grainsize = CILK_FOR_GRAINSIZE
575  cilk_for(int i = 0; i < number; i++)
576  {
577    int k = index[i];
578    regionTo[i] = regionFrom[k];
579  }
580}
581// regionTo[index[i]]=0.0
582void CoinAbcScatterZeroTo(double *regionTo, const int *index, int numberIn)
583{
584  int number = numberIn;
585  // used printf("m\n");
586#ifndef INTEL_COMPILER
587// was #pragma simd
588#endif
589#pragma cilk grainsize = CILK_FOR_GRAINSIZE
590  cilk_for(int i = 0; i < number; i++)
591  {
592    int k = index[i];
593    regionTo[k] = 0.0;
594  }
595}
596// regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
597void CoinAbcScatterToList(const double *regionFrom, double *regionTo,
598  const int *indexList, const int *indexScatter, int numberIn)
599{
600  int number = numberIn;
601  //printf("n\n");
602#ifndef INTEL_COMPILER
603// was #pragma simd
604#endif
605  /*cilk_*/ for (int i = 0; i < number; i++) {
606    int j = indexList[i];
607    double value = regionFrom[j];
608    int k = indexScatter[j];
609    regionTo[k] = value;
610  }
611}
612void CoinAbcInverseSqrts(double *array, int nIn)
613{
614  int n = nIn;
615  // used printf("o\n");
616#ifndef INTEL_COMPILER
617// was #pragma simd
618#endif
619#pragma cilk grainsize = CILK_FOR_GRAINSIZE
620  cilk_for(int i = 0; i < n; i++)
621    array[i]
622    = 1.0 / sqrt(array[i]);
623}
624void CoinAbcReciprocal(double *array, int nIn, const double *input)
625{
626  int n = nIn;
627  // used printf("p\n");
628#ifndef INTEL_COMPILER
629// was #pragma simd
630#endif
631#pragma cilk grainsize = CILK_FOR_GRAINSIZE
632  cilk_for(int i = 0; i < n; i++)
633    array[i]
634    = 1.0 / input[i];
635}
636void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size)
637{
638  memcpy(array, arrayFrom, size * sizeof(double));
639}
640void CoinAbcMemcpyLong(int *array, const int *arrayFrom, int size)
641{
642  memcpy(array, arrayFrom, size * sizeof(int));
643}
644void CoinAbcMemcpyLong(unsigned char *array, const unsigned char *arrayFrom, int size)
645{
646  memcpy(array, arrayFrom, size * sizeof(unsigned char));
647}
648void CoinAbcMemset0Long(double *array, int size)
649{
650  memset(array, 0, size * sizeof(double));
651}
652void CoinAbcMemset0Long(int *array, int size)
653{
654  memset(array, 0, size * sizeof(int));
655}
656void CoinAbcMemset0Long(unsigned char *array, int size)
657{
658  memset(array, 0, size * sizeof(unsigned char));
659}
660void CoinAbcMemmove(double *array, const double *arrayFrom, int size)
661{
662  memmove(array, arrayFrom, size * sizeof(double));
663}
664void CoinAbcMemmove(int *array, const int *arrayFrom, int size)
665{
666  memmove(array, arrayFrom, size * sizeof(int));
667}
668void CoinAbcMemmove(unsigned char *array, const unsigned char *arrayFrom, int size)
669{
670  memmove(array, arrayFrom, size * sizeof(unsigned char));
671}
672// This moves down and zeroes out end
673void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size)
674{
675  assert(arrayFrom - array >= 0);
676  memmove(array, arrayFrom, size * sizeof(double));
677  double *endArray = array + size;
678  double *endArrayFrom = arrayFrom + size;
679  double *startZero = CoinMax(arrayFrom, endArray);
680  memset(startZero, 0, (endArrayFrom - startZero) * sizeof(double));
681}
682// This compacts several sections and zeroes out end
683int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths)
684{
685  int totalLength = alreadyDone;
686  for (int i = 0; i < numberSections; i++) {
687    totalLength += lengths[i];
688  }
689  for (int i = 0; i < numberSections; i++) {
690    int start = starts[i];
691    int length = lengths[i];
692    int end = start + length;
693    memmove(array + alreadyDone, array + start, length * sizeof(double));
694    if (end > totalLength) {
695      start = CoinMax(start, totalLength);
696      memset(array + start, 0, (end - start) * sizeof(double));
697    }
698    alreadyDone += length;
699  }
700  return totalLength;
701}
702// This compacts several sections
703int CoinAbcCompact(int numberSections, int alreadyDone, int *array, const int *starts, const int *lengths)
704{
705  for (int i = 0; i < numberSections; i++) {
706    memmove(array + alreadyDone, array + starts[i], lengths[i] * sizeof(int));
707    alreadyDone += lengths[i];
708  }
709  return alreadyDone;
710}
711void CoinAbcScatterUpdate0(int number, CoinFactorizationDouble /*pivotValue*/,
712  const CoinFactorizationDouble *COIN_RESTRICT /*thisElement*/,
713  const int *COIN_RESTRICT /*thisIndex*/,
714  CoinFactorizationDouble *COIN_RESTRICT /*region*/)
715{
716  assert(number == 0);
717}
718void CoinAbcScatterUpdate1(int number, CoinFactorizationDouble pivotValue,
719  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
720  const int *COIN_RESTRICT thisIndex,
721  CoinFactorizationDouble *COIN_RESTRICT region)
722{
723  assert(number == 1);
724  int iRow0 = thisIndex[0];
725  CoinFactorizationDouble regionValue0 = region[iRow0];
726  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
727}
728void CoinAbcScatterUpdate2(int number, CoinFactorizationDouble pivotValue,
729  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
730  const int *COIN_RESTRICT thisIndex,
731  CoinFactorizationDouble *COIN_RESTRICT region)
732{
733  assert(number == 2);
734  int iRow0 = thisIndex[0];
735  int iRow1 = thisIndex[1];
736  CoinFactorizationDouble regionValue0 = region[iRow0];
737  CoinFactorizationDouble regionValue1 = region[iRow1];
738  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
739  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
740}
741void CoinAbcScatterUpdate3(int number, CoinFactorizationDouble pivotValue,
742  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
743  const int *COIN_RESTRICT thisIndex,
744  CoinFactorizationDouble *COIN_RESTRICT region)
745{
746  assert(number == 3);
747  int iRow0 = thisIndex[0];
748  int iRow1 = thisIndex[1];
749  CoinFactorizationDouble regionValue0 = region[iRow0];
750  CoinFactorizationDouble regionValue1 = region[iRow1];
751  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
752  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
753  iRow0 = thisIndex[2];
754  regionValue0 = region[iRow0];
755  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
756}
757void CoinAbcScatterUpdate4(int number, CoinFactorizationDouble pivotValue,
758  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
759  const int *COIN_RESTRICT thisIndex,
760  CoinFactorizationDouble *COIN_RESTRICT region)
761{
762  assert(number == 4);
763  int iRow0 = thisIndex[0];
764  int iRow1 = thisIndex[1];
765  CoinFactorizationDouble regionValue0 = region[iRow0];
766  CoinFactorizationDouble regionValue1 = region[iRow1];
767  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
768  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
769  iRow0 = thisIndex[2];
770  iRow1 = thisIndex[3];
771  regionValue0 = region[iRow0];
772  regionValue1 = region[iRow1];
773  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
774  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
775}
776void CoinAbcScatterUpdate5(int number, CoinFactorizationDouble pivotValue,
777  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
778  const int *COIN_RESTRICT thisIndex,
779  CoinFactorizationDouble *COIN_RESTRICT region)
780{
781  assert(number == 5);
782  int iRow0 = thisIndex[0];
783  int iRow1 = thisIndex[1];
784  CoinFactorizationDouble regionValue0 = region[iRow0];
785  CoinFactorizationDouble regionValue1 = region[iRow1];
786  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
787  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
788  iRow0 = thisIndex[2];
789  iRow1 = thisIndex[3];
790  regionValue0 = region[iRow0];
791  regionValue1 = region[iRow1];
792  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
793  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
794  iRow0 = thisIndex[4];
795  regionValue0 = region[iRow0];
796  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
797}
798void CoinAbcScatterUpdate6(int number, CoinFactorizationDouble pivotValue,
799  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
800  const int *COIN_RESTRICT thisIndex,
801  CoinFactorizationDouble *COIN_RESTRICT region)
802{
803  assert(number == 6);
804  int iRow0 = thisIndex[0];
805  int iRow1 = thisIndex[1];
806  CoinFactorizationDouble regionValue0 = region[iRow0];
807  CoinFactorizationDouble regionValue1 = region[iRow1];
808  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
809  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
810  iRow0 = thisIndex[2];
811  iRow1 = thisIndex[3];
812  regionValue0 = region[iRow0];
813  regionValue1 = region[iRow1];
814  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
815  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
816  iRow0 = thisIndex[4];
817  iRow1 = thisIndex[5];
818  regionValue0 = region[iRow0];
819  regionValue1 = region[iRow1];
820  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
821  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
822}
823void CoinAbcScatterUpdate7(int number, CoinFactorizationDouble pivotValue,
824  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
825  const int *COIN_RESTRICT thisIndex,
826  CoinFactorizationDouble *COIN_RESTRICT region)
827{
828  assert(number == 7);
829  int iRow0 = thisIndex[0];
830  int iRow1 = thisIndex[1];
831  CoinFactorizationDouble regionValue0 = region[iRow0];
832  CoinFactorizationDouble regionValue1 = region[iRow1];
833  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
834  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
835  iRow0 = thisIndex[2];
836  iRow1 = thisIndex[3];
837  regionValue0 = region[iRow0];
838  regionValue1 = region[iRow1];
839  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
840  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
841  iRow0 = thisIndex[4];
842  iRow1 = thisIndex[5];
843  regionValue0 = region[iRow0];
844  regionValue1 = region[iRow1];
845  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
846  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
847  iRow0 = thisIndex[6];
848  regionValue0 = region[iRow0];
849  region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
850}
851void CoinAbcScatterUpdate8(int number, CoinFactorizationDouble pivotValue,
852  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
853  const int *COIN_RESTRICT thisIndex,
854  CoinFactorizationDouble *COIN_RESTRICT region)
855{
856  assert(number == 8);
857  int iRow0 = thisIndex[0];
858  int iRow1 = thisIndex[1];
859  CoinFactorizationDouble regionValue0 = region[iRow0];
860  CoinFactorizationDouble regionValue1 = region[iRow1];
861  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
862  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
863  iRow0 = thisIndex[2];
864  iRow1 = thisIndex[3];
865  regionValue0 = region[iRow0];
866  regionValue1 = region[iRow1];
867  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
868  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
869  iRow0 = thisIndex[4];
870  iRow1 = thisIndex[5];
871  regionValue0 = region[iRow0];
872  regionValue1 = region[iRow1];
873  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
874  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
875  iRow0 = thisIndex[6];
876  iRow1 = thisIndex[7];
877  regionValue0 = region[iRow0];
878  regionValue1 = region[iRow1];
879  region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
880  region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
881}
882void CoinAbcScatterUpdate4N(int number, CoinFactorizationDouble pivotValue,
883  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
884  const int *COIN_RESTRICT thisIndex,
885  CoinFactorizationDouble *COIN_RESTRICT region)
886{
887  assert((number & 3) == 0);
888  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
889    CoinSimplexInt iRow0 = thisIndex[j];
890    CoinSimplexInt iRow1 = thisIndex[j - 1];
891    CoinFactorizationDouble regionValue0 = region[iRow0];
892    CoinFactorizationDouble regionValue1 = region[iRow1];
893    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
894    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
895    iRow0 = thisIndex[j - 2];
896    iRow1 = thisIndex[j - 3];
897    regionValue0 = region[iRow0];
898    regionValue1 = region[iRow1];
899    region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
900    region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
901  }
902}
903void CoinAbcScatterUpdate4NPlus1(int number, CoinFactorizationDouble pivotValue,
904  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
905  const int *COIN_RESTRICT thisIndex,
906  CoinFactorizationDouble *COIN_RESTRICT region)
907{
908  assert((number & 3) == 1);
909  int iRow0 = thisIndex[number - 1];
910  CoinFactorizationDouble regionValue0 = region[iRow0];
911  region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
912  number -= 1;
913  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
914    CoinSimplexInt iRow0 = thisIndex[j];
915    CoinSimplexInt iRow1 = thisIndex[j - 1];
916    CoinFactorizationDouble regionValue0 = region[iRow0];
917    CoinFactorizationDouble regionValue1 = region[iRow1];
918    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
919    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
920    iRow0 = thisIndex[j - 2];
921    iRow1 = thisIndex[j - 3];
922    regionValue0 = region[iRow0];
923    regionValue1 = region[iRow1];
924    region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
925    region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
926  }
927}
928void CoinAbcScatterUpdate4NPlus2(int number, CoinFactorizationDouble pivotValue,
929  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
930  const int *COIN_RESTRICT thisIndex,
931  CoinFactorizationDouble *COIN_RESTRICT region)
932{
933  assert((number & 3) == 2);
934  int iRow0 = thisIndex[number - 1];
935  int iRow1 = thisIndex[number - 2];
936  CoinFactorizationDouble regionValue0 = region[iRow0];
937  CoinFactorizationDouble regionValue1 = region[iRow1];
938  region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
939  region[iRow1] = regionValue1 - thisElement[number - 2] * pivotValue;
940  number -= 2;
941  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
942    CoinSimplexInt iRow0 = thisIndex[j];
943    CoinSimplexInt iRow1 = thisIndex[j - 1];
944    CoinFactorizationDouble regionValue0 = region[iRow0];
945    CoinFactorizationDouble regionValue1 = region[iRow1];
946    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
947    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
948    iRow0 = thisIndex[j - 2];
949    iRow1 = thisIndex[j - 3];
950    regionValue0 = region[iRow0];
951    regionValue1 = region[iRow1];
952    region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
953    region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
954  }
955}
956void CoinAbcScatterUpdate4NPlus3(int number, CoinFactorizationDouble pivotValue,
957  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
958  const int *COIN_RESTRICT thisIndex,
959  CoinFactorizationDouble *COIN_RESTRICT region)
960{
961  assert((number & 3) == 3);
962  int iRow0 = thisIndex[number - 1];
963  int iRow1 = thisIndex[number - 2];
964  CoinFactorizationDouble regionValue0 = region[iRow0];
965  CoinFactorizationDouble regionValue1 = region[iRow1];
966  region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
967  region[iRow1] = regionValue1 - thisElement[number - 2] * pivotValue;
968  iRow0 = thisIndex[number - 3];
969  regionValue0 = region[iRow0];
970  region[iRow0] = regionValue0 - thisElement[number - 3] * pivotValue;
971  number -= 3;
972  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
973    CoinSimplexInt iRow0 = thisIndex[j];
974    CoinSimplexInt iRow1 = thisIndex[j - 1];
975    CoinFactorizationDouble regionValue0 = region[iRow0];
976    CoinFactorizationDouble regionValue1 = region[iRow1];
977    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
978    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
979    iRow0 = thisIndex[j - 2];
980    iRow1 = thisIndex[j - 3];
981    regionValue0 = region[iRow0];
982    regionValue1 = region[iRow1];
983    region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
984    region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
985  }
986}
987void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble /*multiplier*/,
988  const CoinFactorizationDouble *COIN_RESTRICT element,
989  CoinFactorizationDouble *COIN_RESTRICT /*region*/)
990{
991#ifndef NDEBUG
992  assert(numberIn == 0);
993#endif
994}
995// start alternatives
996#if 0
997void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
998                          const CoinFactorizationDouble *  COIN_RESTRICT element,
999                          CoinFactorizationDouble * COIN_RESTRICT region)
1000{
1001#ifndef NDEBUG
1002  assert (numberIn==1);
1003#endif
1004  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1005  element++;
1006  int iColumn0=thisColumn[0];
1007  CoinFactorizationDouble value0=region[iColumn0];
1008  value0 += multiplier*element[0];
1009  region[iColumn0]=value0;
1010}
1011void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
1012                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1013                          CoinFactorizationDouble * COIN_RESTRICT region)
1014{
1015#ifndef NDEBUG
1016  assert (numberIn==2);
1017#endif
1018  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1019#if NEW_CHUNK_SIZE == 2
1020  int nFull=2&(~(NEW_CHUNK_SIZE-1));
1021  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1022    coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1023    int iColumn0=thisColumn[0];
1024    int iColumn1=thisColumn[1];
1025    CoinFactorizationDouble value0=region[iColumn0];
1026    CoinFactorizationDouble value1=region[iColumn1];
1027    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1028    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1029    region[iColumn0]=value0;
1030    region[iColumn1]=value1;
1031    element+=NEW_CHUNK_SIZE_INCREMENT;
1032    thisColumn = reinterpret_cast<const int *>(element);
1033  }
1034#endif
1035#if NEW_CHUNK_SIZE == 4
1036  element++;
1037  int iColumn0=thisColumn[0];
1038  int iColumn1=thisColumn[1];
1039  CoinFactorizationDouble value0=region[iColumn0];
1040  CoinFactorizationDouble value1=region[iColumn1];
1041  value0 += multiplier*element[0];
1042  value1 += multiplier*element[1];
1043  region[iColumn0]=value0;
1044  region[iColumn1]=value1;
1045#endif
1046}
1047void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
1048                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1049                          CoinFactorizationDouble * COIN_RESTRICT region)
1050{
1051#ifndef NDEBUG
1052  assert (numberIn==3);
1053#endif
1054  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1055#if NEW_CHUNK_SIZE == 2
1056  int nFull=3&(~(NEW_CHUNK_SIZE-1));
1057  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1058    coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1059    int iColumn0=thisColumn[0];
1060    int iColumn1=thisColumn[1];
1061    CoinFactorizationDouble value0=region[iColumn0];
1062    CoinFactorizationDouble value1=region[iColumn1];
1063    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1064    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1065    region[iColumn0]=value0;
1066    region[iColumn1]=value1;
1067    element+=NEW_CHUNK_SIZE_INCREMENT;
1068    thisColumn = reinterpret_cast<const int *>(element);
1069  }
1070#endif
1071#if NEW_CHUNK_SIZE == 2
1072  element++;
1073  int iColumn0=thisColumn[0];
1074  CoinFactorizationDouble value0=region[iColumn0];
1075  value0 += multiplier*element[0];
1076  region[iColumn0]=value0;
1077#else
1078  element+=2;
1079  int iColumn0=thisColumn[0];
1080  int iColumn1=thisColumn[1];
1081  int iColumn2=thisColumn[2];
1082  CoinFactorizationDouble value0=region[iColumn0];
1083  CoinFactorizationDouble value1=region[iColumn1];
1084  CoinFactorizationDouble value2=region[iColumn2];
1085  value0 += multiplier*element[0];
1086  value1 += multiplier*element[1];
1087  value2 += multiplier*element[2];
1088  region[iColumn0]=value0;
1089  region[iColumn1]=value1;
1090  region[iColumn2]=value2;
1091#endif
1092}
1093void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
1094                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1095                          CoinFactorizationDouble * COIN_RESTRICT region)
1096{
1097#ifndef NDEBUG
1098  assert (numberIn==4);
1099#endif
1100  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1101  int nFull=4&(~(NEW_CHUNK_SIZE-1));
1102  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1103    //coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
1104#if NEW_CHUNK_SIZE == 2
1105    int iColumn0=thisColumn[0];
1106    int iColumn1=thisColumn[1];
1107    CoinFactorizationDouble value0=region[iColumn0];
1108    CoinFactorizationDouble value1=region[iColumn1];
1109    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1110    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1111    region[iColumn0]=value0;
1112    region[iColumn1]=value1;
1113#elif NEW_CHUNK_SIZE == 4
1114    int iColumn0=thisColumn[0];
1115    int iColumn1=thisColumn[1];
1116    int iColumn2=thisColumn[2];
1117    int iColumn3=thisColumn[3];
1118    CoinFactorizationDouble value0=region[iColumn0];
1119    CoinFactorizationDouble value1=region[iColumn1];
1120    CoinFactorizationDouble value2=region[iColumn2];
1121    CoinFactorizationDouble value3=region[iColumn3];
1122    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1123    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1124    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1125    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1126    region[iColumn0]=value0;
1127    region[iColumn1]=value1;
1128    region[iColumn2]=value2;
1129    region[iColumn3]=value3;
1130#else
1131    abort();
1132#endif
1133    element+=NEW_CHUNK_SIZE_INCREMENT;
1134    thisColumn = reinterpret_cast<const int *>(element);
1135  }
1136}
1137void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
1138                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1139                          CoinFactorizationDouble * COIN_RESTRICT region)
1140{
1141#ifndef NDEBUG
1142  assert (numberIn==5);
1143#endif
1144  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1145  int nFull=5&(~(NEW_CHUNK_SIZE-1));
1146  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1147    coin_prefetch_const(element+6);
1148#if NEW_CHUNK_SIZE == 2
1149    int iColumn0=thisColumn[0];
1150    int iColumn1=thisColumn[1];
1151    CoinFactorizationDouble value0=region[iColumn0];
1152    CoinFactorizationDouble value1=region[iColumn1];
1153    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1154    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1155    region[iColumn0]=value0;
1156    region[iColumn1]=value1;
1157#elif NEW_CHUNK_SIZE == 4
1158    int iColumn0=thisColumn[0];
1159    int iColumn1=thisColumn[1];
1160    int iColumn2=thisColumn[2];
1161    int iColumn3=thisColumn[3];
1162    CoinFactorizationDouble value0=region[iColumn0];
1163    CoinFactorizationDouble value1=region[iColumn1];
1164    CoinFactorizationDouble value2=region[iColumn2];
1165    CoinFactorizationDouble value3=region[iColumn3];
1166    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1167    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1168    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1169    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1170    region[iColumn0]=value0;
1171    region[iColumn1]=value1;
1172    region[iColumn2]=value2;
1173    region[iColumn3]=value3;
1174#else
1175    abort();
1176#endif
1177    element+=NEW_CHUNK_SIZE_INCREMENT;
1178    thisColumn = reinterpret_cast<const int *>(element);
1179  }
1180  element++;
1181  int iColumn0=thisColumn[0];
1182  CoinFactorizationDouble value0=region[iColumn0];
1183  value0 += multiplier*element[0];
1184  region[iColumn0]=value0;
1185}
1186void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
1187                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1188                          CoinFactorizationDouble * COIN_RESTRICT region)
1189{
1190#ifndef NDEBUG
1191  assert (numberIn==6);
1192#endif
1193  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1194  int nFull=6&(~(NEW_CHUNK_SIZE-1));
1195  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1196    coin_prefetch_const(element+6);
1197#if NEW_CHUNK_SIZE == 2
1198    int iColumn0=thisColumn[0];
1199    int iColumn1=thisColumn[1];
1200    CoinFactorizationDouble value0=region[iColumn0];
1201    CoinFactorizationDouble value1=region[iColumn1];
1202    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1203    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1204    region[iColumn0]=value0;
1205    region[iColumn1]=value1;
1206#elif NEW_CHUNK_SIZE == 4
1207    int iColumn0=thisColumn[0];
1208    int iColumn1=thisColumn[1];
1209    int iColumn2=thisColumn[2];
1210    int iColumn3=thisColumn[3];
1211    CoinFactorizationDouble value0=region[iColumn0];
1212    CoinFactorizationDouble value1=region[iColumn1];
1213    CoinFactorizationDouble value2=region[iColumn2];
1214    CoinFactorizationDouble value3=region[iColumn3];
1215    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1216    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1217    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1218    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1219    region[iColumn0]=value0;
1220    region[iColumn1]=value1;
1221    region[iColumn2]=value2;
1222    region[iColumn3]=value3;
1223#else
1224    abort();
1225#endif
1226    element+=NEW_CHUNK_SIZE_INCREMENT;
1227    thisColumn = reinterpret_cast<const int *>(element);
1228  }
1229#if NEW_CHUNK_SIZE == 4
1230  element++;
1231  int iColumn0=thisColumn[0];
1232  int iColumn1=thisColumn[1];
1233  CoinFactorizationDouble value0=region[iColumn0];
1234  CoinFactorizationDouble value1=region[iColumn1];
1235  value0 += multiplier*element[0];
1236  value1 += multiplier*element[1];
1237  region[iColumn0]=value0;
1238  region[iColumn1]=value1;
1239#endif
1240}
1241void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
1242                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1243                          CoinFactorizationDouble * COIN_RESTRICT region)
1244{
1245#ifndef NDEBUG
1246  assert (numberIn==7);
1247#endif
1248  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1249  int nFull=7&(~(NEW_CHUNK_SIZE-1));
1250  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1251    coin_prefetch_const(element+6);
1252#if NEW_CHUNK_SIZE == 2
1253    int iColumn0=thisColumn[0];
1254    int iColumn1=thisColumn[1];
1255    CoinFactorizationDouble value0=region[iColumn0];
1256    CoinFactorizationDouble value1=region[iColumn1];
1257    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1258    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1259    region[iColumn0]=value0;
1260    region[iColumn1]=value1;
1261#elif NEW_CHUNK_SIZE == 4
1262    int iColumn0=thisColumn[0];
1263    int iColumn1=thisColumn[1];
1264    int iColumn2=thisColumn[2];
1265    int iColumn3=thisColumn[3];
1266    CoinFactorizationDouble value0=region[iColumn0];
1267    CoinFactorizationDouble value1=region[iColumn1];
1268    CoinFactorizationDouble value2=region[iColumn2];
1269    CoinFactorizationDouble value3=region[iColumn3];
1270    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1271    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1272    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1273    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1274    region[iColumn0]=value0;
1275    region[iColumn1]=value1;
1276    region[iColumn2]=value2;
1277    region[iColumn3]=value3;
1278#else
1279    abort();
1280#endif
1281    element+=NEW_CHUNK_SIZE_INCREMENT;
1282    thisColumn = reinterpret_cast<const int *>(element);
1283  }
1284#if NEW_CHUNK_SIZE == 2
1285  element++;
1286  int iColumn0=thisColumn[0];
1287  CoinFactorizationDouble value0=region[iColumn0];
1288  value0 += multiplier*element[0];
1289  region[iColumn0]=value0;
1290#else
1291  element+=2;
1292  int iColumn0=thisColumn[0];
1293  int iColumn1=thisColumn[1];
1294  int iColumn2=thisColumn[2];
1295  CoinFactorizationDouble value0=region[iColumn0];
1296  CoinFactorizationDouble value1=region[iColumn1];
1297  CoinFactorizationDouble value2=region[iColumn2];
1298  value0 += multiplier*element[0];
1299  value1 += multiplier*element[1];
1300  value2 += multiplier*element[2];
1301  region[iColumn0]=value0;
1302  region[iColumn1]=value1;
1303  region[iColumn2]=value2;
1304#endif
1305}
1306void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
1307                          const CoinFactorizationDouble *  COIN_RESTRICT element,
1308                          CoinFactorizationDouble * COIN_RESTRICT region)
1309{
1310#ifndef NDEBUG
1311  assert (numberIn==8);
1312#endif
1313  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1314  int nFull=8&(~(NEW_CHUNK_SIZE-1));
1315  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1316    coin_prefetch_const(element+6);
1317#if NEW_CHUNK_SIZE == 2
1318    int iColumn0=thisColumn[0];
1319    int iColumn1=thisColumn[1];
1320    CoinFactorizationDouble value0=region[iColumn0];
1321    CoinFactorizationDouble value1=region[iColumn1];
1322    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1323    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1324    region[iColumn0]=value0;
1325    region[iColumn1]=value1;
1326#elif NEW_CHUNK_SIZE == 4
1327    int iColumn0=thisColumn[0];
1328    int iColumn1=thisColumn[1];
1329    int iColumn2=thisColumn[2];
1330    int iColumn3=thisColumn[3];
1331    CoinFactorizationDouble value0=region[iColumn0];
1332    CoinFactorizationDouble value1=region[iColumn1];
1333    CoinFactorizationDouble value2=region[iColumn2];
1334    CoinFactorizationDouble value3=region[iColumn3];
1335    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1336    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1337    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1338    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1339    region[iColumn0]=value0;
1340    region[iColumn1]=value1;
1341    region[iColumn2]=value2;
1342    region[iColumn3]=value3;
1343#else
1344    abort();
1345#endif
1346    element+=NEW_CHUNK_SIZE_INCREMENT;
1347    thisColumn = reinterpret_cast<const int *>(element);
1348  }
1349}
1350void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
1351                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1352                                 CoinFactorizationDouble * COIN_RESTRICT region)
1353{
1354  assert ((numberIn&3)==0);
1355  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1356  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1357  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1358    coin_prefetch_const(element+6);
1359#if NEW_CHUNK_SIZE == 2
1360    int iColumn0=thisColumn[0];
1361    int iColumn1=thisColumn[1];
1362    CoinFactorizationDouble value0=region[iColumn0];
1363    CoinFactorizationDouble value1=region[iColumn1];
1364    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1365    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1366    region[iColumn0]=value0;
1367    region[iColumn1]=value1;
1368#elif NEW_CHUNK_SIZE == 4
1369    int iColumn0=thisColumn[0];
1370    int iColumn1=thisColumn[1];
1371    int iColumn2=thisColumn[2];
1372    int iColumn3=thisColumn[3];
1373    CoinFactorizationDouble value0=region[iColumn0];
1374    CoinFactorizationDouble value1=region[iColumn1];
1375    CoinFactorizationDouble value2=region[iColumn2];
1376    CoinFactorizationDouble value3=region[iColumn3];
1377    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1378    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1379    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1380    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1381    region[iColumn0]=value0;
1382    region[iColumn1]=value1;
1383    region[iColumn2]=value2;
1384    region[iColumn3]=value3;
1385#else
1386    abort();
1387#endif
1388    element+=NEW_CHUNK_SIZE_INCREMENT;
1389    thisColumn = reinterpret_cast<const int *>(element);
1390  }
1391}
1392void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
1393                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1394                                 CoinFactorizationDouble * COIN_RESTRICT region)
1395{
1396  assert ((numberIn&3)==1);
1397  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1398  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1399  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1400    coin_prefetch_const(element+6);
1401#if NEW_CHUNK_SIZE == 2
1402    int iColumn0=thisColumn[0];
1403    int iColumn1=thisColumn[1];
1404    CoinFactorizationDouble value0=region[iColumn0];
1405    CoinFactorizationDouble value1=region[iColumn1];
1406    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1407    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1408    region[iColumn0]=value0;
1409    region[iColumn1]=value1;
1410#elif NEW_CHUNK_SIZE == 4
1411    int iColumn0=thisColumn[0];
1412    int iColumn1=thisColumn[1];
1413    int iColumn2=thisColumn[2];
1414    int iColumn3=thisColumn[3];
1415    CoinFactorizationDouble value0=region[iColumn0];
1416    CoinFactorizationDouble value1=region[iColumn1];
1417    CoinFactorizationDouble value2=region[iColumn2];
1418    CoinFactorizationDouble value3=region[iColumn3];
1419    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1420    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1421    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1422    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1423    region[iColumn0]=value0;
1424    region[iColumn1]=value1;
1425    region[iColumn2]=value2;
1426    region[iColumn3]=value3;
1427#else
1428    abort();
1429#endif
1430    element+=NEW_CHUNK_SIZE_INCREMENT;
1431    thisColumn = reinterpret_cast<const int *>(element);
1432  }
1433  element++;
1434  int iColumn0=thisColumn[0];
1435  CoinFactorizationDouble value0=region[iColumn0];
1436  value0 += multiplier*element[0];
1437  region[iColumn0]=value0;
1438}
1439void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
1440                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1441                                 CoinFactorizationDouble * COIN_RESTRICT region)
1442{
1443  assert ((numberIn&3)==2);
1444  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1445  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1446  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1447    coin_prefetch_const(element+6);
1448#if NEW_CHUNK_SIZE == 2
1449    int iColumn0=thisColumn[0];
1450    int iColumn1=thisColumn[1];
1451    CoinFactorizationDouble value0=region[iColumn0];
1452    CoinFactorizationDouble value1=region[iColumn1];
1453    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1454    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1455    region[iColumn0]=value0;
1456    region[iColumn1]=value1;
1457#elif NEW_CHUNK_SIZE == 4
1458    int iColumn0=thisColumn[0];
1459    int iColumn1=thisColumn[1];
1460    int iColumn2=thisColumn[2];
1461    int iColumn3=thisColumn[3];
1462    CoinFactorizationDouble value0=region[iColumn0];
1463    CoinFactorizationDouble value1=region[iColumn1];
1464    CoinFactorizationDouble value2=region[iColumn2];
1465    CoinFactorizationDouble value3=region[iColumn3];
1466    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1467    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1468    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1469    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1470    region[iColumn0]=value0;
1471    region[iColumn1]=value1;
1472    region[iColumn2]=value2;
1473    region[iColumn3]=value3;
1474#else
1475    abort();
1476#endif
1477    element+=NEW_CHUNK_SIZE_INCREMENT;
1478    thisColumn = reinterpret_cast<const int *>(element);
1479  }
1480#if NEW_CHUNK_SIZE == 4
1481  element++;
1482  int iColumn0=thisColumn[0];
1483  int iColumn1=thisColumn[1];
1484  CoinFactorizationDouble value0=region[iColumn0];
1485  CoinFactorizationDouble value1=region[iColumn1];
1486  value0 += multiplier*element[0];
1487  value1 += multiplier*element[1];
1488  region[iColumn0]=value0;
1489  region[iColumn1]=value1;
1490#endif
1491}
1492void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
1493                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1494                                 CoinFactorizationDouble * COIN_RESTRICT region)
1495{
1496  assert ((numberIn&3)==3);
1497  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
1498  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1499  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1500    coin_prefetch_const(element+6);
1501#if NEW_CHUNK_SIZE == 2
1502    int iColumn0=thisColumn[0];
1503    int iColumn1=thisColumn[1];
1504    CoinFactorizationDouble value0=region[iColumn0];
1505    CoinFactorizationDouble value1=region[iColumn1];
1506    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1507    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1508    region[iColumn0]=value0;
1509    region[iColumn1]=value1;
1510#elif NEW_CHUNK_SIZE == 4
1511    int iColumn0=thisColumn[0];
1512    int iColumn1=thisColumn[1];
1513    int iColumn2=thisColumn[2];
1514    int iColumn3=thisColumn[3];
1515    CoinFactorizationDouble value0=region[iColumn0];
1516    CoinFactorizationDouble value1=region[iColumn1];
1517    CoinFactorizationDouble value2=region[iColumn2];
1518    CoinFactorizationDouble value3=region[iColumn3];
1519    value0 += multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
1520    value1 += multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
1521    value2 += multiplier*element[2+NEW_CHUNK_SIZE_OFFSET];
1522    value3 += multiplier*element[3+NEW_CHUNK_SIZE_OFFSET];
1523    region[iColumn0]=value0;
1524    region[iColumn1]=value1;
1525    region[iColumn2]=value2;
1526    region[iColumn3]=value3;
1527#else
1528    abort();
1529#endif
1530    element+=NEW_CHUNK_SIZE_INCREMENT;
1531    thisColumn = reinterpret_cast<const int *>(element);
1532  }
1533#if NEW_CHUNK_SIZE == 2
1534  element++;
1535  int iColumn0=thisColumn[0];
1536  CoinFactorizationDouble value0=region[iColumn0];
1537  value0 += multiplier*element[0];
1538  region[iColumn0]=value0;
1539#else
1540  element+=2;
1541  int iColumn0=thisColumn[0];
1542  int iColumn1=thisColumn[1];
1543  int iColumn2=thisColumn[2];
1544  CoinFactorizationDouble value0=region[iColumn0];
1545  CoinFactorizationDouble value1=region[iColumn1];
1546  CoinFactorizationDouble value2=region[iColumn2];
1547  value0 += multiplier*element[0];
1548  value1 += multiplier*element[1];
1549  value2 += multiplier*element[2];
1550  region[iColumn0]=value0;
1551  region[iColumn1]=value1;
1552  region[iColumn2]=value2;
1553#endif
1554}
1555#else
1556// alternative
1557#define CACHE_LINE_SIZE 16
1558#define OPERATION +=
1559#define functionName(zname) CoinAbc##zname
1560#define ABC_CREATE_SCATTER_FUNCTION 1
1561#include "CoinAbcHelperFunctions.hpp"
1562#undef OPERATION
1563#undef functionName
1564#define OPERATION -=
1565#define functionName(zname) CoinAbc##zname##Subtract
1566#include "CoinAbcHelperFunctions.hpp"
1567#undef OPERATION
1568#undef functionName
1569#define OPERATION +=
1570#define functionName(zname) CoinAbc##zname##Add
1571#include "CoinAbcHelperFunctions.hpp"
1572scatterUpdate AbcScatterLow[] = {
1573  &CoinAbcScatterUpdate0,
1574  &CoinAbcScatterUpdate1,
1575  &CoinAbcScatterUpdate2,
1576  &CoinAbcScatterUpdate3,
1577  &CoinAbcScatterUpdate4,
1578  &CoinAbcScatterUpdate5,
1579  &CoinAbcScatterUpdate6,
1580  &CoinAbcScatterUpdate7,
1581  &CoinAbcScatterUpdate8
1582};
1583scatterUpdate AbcScatterHigh[] = {
1584  &CoinAbcScatterUpdate4N,
1585  &CoinAbcScatterUpdate4NPlus1,
1586  &CoinAbcScatterUpdate4NPlus2,
1587  &CoinAbcScatterUpdate4NPlus3
1588};
1589scatterUpdate AbcScatterLowSubtract[] = {
1590  &CoinAbcScatterUpdate0,
1591  &CoinAbcScatterUpdate1Subtract,
1592  &CoinAbcScatterUpdate2Subtract,
1593  &CoinAbcScatterUpdate3Subtract,
1594  &CoinAbcScatterUpdate4Subtract,
1595  &CoinAbcScatterUpdate5Subtract,
1596  &CoinAbcScatterUpdate6Subtract,
1597  &CoinAbcScatterUpdate7Subtract,
1598  &CoinAbcScatterUpdate8Subtract
1599};
1600scatterUpdate AbcScatterHighSubtract[] = {
1601  &CoinAbcScatterUpdate4NSubtract,
1602  &CoinAbcScatterUpdate4NPlus1Subtract,
1603  &CoinAbcScatterUpdate4NPlus2Subtract,
1604  &CoinAbcScatterUpdate4NPlus3Subtract
1605};
1606scatterUpdate AbcScatterLowAdd[] = {
1607  &CoinAbcScatterUpdate0,
1608  &CoinAbcScatterUpdate1Add,
1609  &CoinAbcScatterUpdate2Add,
1610  &CoinAbcScatterUpdate3Add,
1611  &CoinAbcScatterUpdate4Add,
1612  &CoinAbcScatterUpdate5Add,
1613  &CoinAbcScatterUpdate6Add,
1614  &CoinAbcScatterUpdate7Add,
1615  &CoinAbcScatterUpdate8Add
1616};
1617scatterUpdate AbcScatterHighAdd[] = {
1618  &CoinAbcScatterUpdate4NAdd,
1619  &CoinAbcScatterUpdate4NPlus1Add,
1620  &CoinAbcScatterUpdate4NPlus2Add,
1621  &CoinAbcScatterUpdate4NPlus3Add
1622};
1623#endif
1624#include "CoinPragma.hpp"
1625
1626#include <math.h>
1627
1628#include <cfloat>
1629#include <cassert>
1630#include <string>
1631#include <stdio.h>
1632#include <iostream>
1633#include "CoinAbcCommonFactorization.hpp"
1634#if 1
1635#if AVX2 == 1
1636#include "emmintrin.h"
1637#elif AVX2 == 2
1638#include <immintrin.h>
1639#elif AVX2 == 3
1640#include "avx2intrin.h"
1641#endif
1642#endif
1643// cilk seems a bit fragile
1644#define CILK_FRAGILE 0
1645#if CILK_FRAGILE > 1
1646#undef cilk_spawn
1647#undef cilk_sync
1648#define cilk_spawn
1649#define cilk_sync
1650#define ONWARD 0
1651#elif CILK_FRAGILE == 1
1652#define ONWARD 0
1653#else
1654#define ONWARD 1
1655#endif
1656#define BLOCKING1 8 // factorization strip
1657#define BLOCKING2 8 // dgemm recursive
1658#define BLOCKING3 32 // dgemm parallel
1659void CoinAbcDgemm(int m, int n, int k, double *COIN_RESTRICT a, int lda,
1660  double *COIN_RESTRICT b, double *COIN_RESTRICT c
1661#if ABC_PARALLEL == 2
1662  ,
1663  int parallelMode
1664#endif
1665)
1666{
1667  assert((m & (BLOCKING8 - 1)) == 0 && (n & (BLOCKING8 - 1)) == 0 && (k & (BLOCKING8 - 1)) == 0);
1668  /* entry for column j and row i (when multiple of BLOCKING8)
1669     is at aBlocked+j*m+i*BLOCKING8
1670  */
1671  // k is always smallish
1672  if (m <= BLOCKING8 && n <= BLOCKING8) {
1673    assert(m == BLOCKING8 && n == BLOCKING8 && k == BLOCKING8);
1674    double *COIN_RESTRICT aBase2 = a;
1675    double *COIN_RESTRICT bBase2 = b;
1676    double *COIN_RESTRICT cBase2 = c;
1677    for (int j = 0; j < BLOCKING8; j++) {
1678      double *COIN_RESTRICT aBase = aBase2;
1679#if AVX2 != 2
1680#if 1
1681      double c0 = cBase2[0];
1682      double c1 = cBase2[1];
1683      double c2 = cBase2[2];
1684      double c3 = cBase2[3];
1685      double c4 = cBase2[4];
1686      double c5 = cBase2[5];
1687      double c6 = cBase2[6];
1688      double c7 = cBase2[7];
1689      for (int l = 0; l < BLOCKING8; l++) {
1690        double bValue = bBase2[l];
1691        if (bValue) {
1692          c0 -= bValue * aBase[0];
1693          c1 -= bValue * aBase[1];
1694          c2 -= bValue * aBase[2];
1695          c3 -= bValue * aBase[3];
1696          c4 -= bValue * aBase[4];
1697          c5 -= bValue * aBase[5];
1698          c6 -= bValue * aBase[6];
1699          c7 -= bValue * aBase[7];
1700        }
1701        aBase += BLOCKING8;
1702      }
1703      cBase2[0] = c0;
1704      cBase2[1] = c1;
1705      cBase2[2] = c2;
1706      cBase2[3] = c3;
1707      cBase2[4] = c4;
1708      cBase2[5] = c5;
1709      cBase2[6] = c6;
1710      cBase2[7] = c7;
1711#else
1712      for (int l = 0; l < BLOCKING8; l++) {
1713        double bValue = bBase2[l];
1714        if (bValue) {
1715          for (int i = 0; i < BLOCKING8; i++) {
1716            cBase2[i] -= bValue * aBase[i];
1717          }
1718        }
1719        aBase += BLOCKING8;
1720      }
1721#endif
1722#else
1723      //__m256d c0=_mm256_load_pd(cBase2);
1724      __m256d c0 = *reinterpret_cast< __m256d * >(cBase2);
1725      //__m256d c1=_mm256_load_pd(cBase2+4);
1726      __m256d c1 = *reinterpret_cast< __m256d * >(cBase2 + 4);
1727      for (int l = 0; l < BLOCKING8; l++) {
1728        //__m256d bb = _mm256_broadcast_sd(bBase2+l);
1729        __m256d bb = static_cast< __m256d >(__builtin_ia32_vbroadcastsd256(bBase2 + l));
1730        //__m256d a0 = _mm256_load_pd(aBase);
1731        __m256d a0 = *reinterpret_cast< __m256d * >(aBase);
1732        //__m256d a1 = _mm256_load_pd(aBase+4);
1733        __m256d a1 = *reinterpret_cast< __m256d * >(aBase + 4);
1734        c0 -= bb * a0;
1735        c1 -= bb * a1;
1736        aBase += BLOCKING8;
1737      }
1738      //_mm256_store_pd (cBase2, c0);
1739      *reinterpret_cast< __m256d * >(cBase2) = c0;
1740      //_mm256_store_pd (cBase2+4, c1);
1741      *reinterpret_cast< __m256d * >(cBase2 + 4) = c1;
1742#endif
1743      bBase2 += BLOCKING8;
1744      cBase2 += BLOCKING8;
1745    }
1746  } else if (m > n) {
1747    // make sure mNew1 multiple of BLOCKING8
1748#if BLOCKING8 == 8
1749    int mNew1 = ((m + 15) >> 4) << 3;
1750#elif BLOCKING8 == 4
1751    int mNew1 = ((m + 7) >> 3) << 2;
1752#elif BLOCKING8 == 2
1753    int mNew1 = ((m + 3) >> 2) << 1;
1754#else
1755    abort();
1756#endif
1757    assert(mNew1 > 0 && m - mNew1 > 0);
1758#if ABC_PARALLEL == 2
1759    if (mNew1 <= BLOCKING3 || !parallelMode) {
1760#endif
1761      //printf("splitMa mNew1 %d\n",mNew1);
1762      CoinAbcDgemm(mNew1, n, k, a, lda, b, c
1763#if ABC_PARALLEL == 2
1764        ,
1765        0
1766#endif
1767      );
1768      //printf("splitMb mNew1 %d\n",mNew1);
1769      CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8
1770#if ABC_PARALLEL == 2
1771        ,
1772        0
1773#endif
1774      );
1775#if ABC_PARALLEL == 2
1776    } else {
1777      //printf("splitMa mNew1 %d\n",mNew1);
1778      cilk_spawn CoinAbcDgemm(mNew1, n, k, a, lda, b, c, ONWARD);
1779      //printf("splitMb mNew1 %d\n",mNew1);
1780      CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8, ONWARD);
1781      cilk_sync;
1782    }
1783#endif
1784  } else {
1785    // make sure nNew1 multiple of BLOCKING8
1786#if BLOCKING8 == 8
1787    int nNew1 = ((n + 15) >> 4) << 3;
1788#elif BLOCKING8 == 4
1789    int nNew1 = ((n + 7) >> 3) << 2;
1790#elif BLOCKING8 == 2
1791    int nNew1 = ((n + 3) >> 2) << 1;
1792#else
1793    abort();
1794#endif
1795    assert(nNew1 > 0 && n - nNew1 > 0);
1796#if ABC_PARALLEL == 2
1797    if (nNew1 <= BLOCKING3 || !parallelMode) {
1798#endif
1799      //printf("splitNa nNew1 %d\n",nNew1);
1800      CoinAbcDgemm(m, nNew1, k, a, lda, b, c
1801#if ABC_PARALLEL == 2
1802        ,
1803        0
1804#endif
1805      );
1806      //printf("splitNb nNew1 %d\n",nNew1);
1807      CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1
1808#if ABC_PARALLEL == 2
1809        ,
1810        0
1811#endif
1812      );
1813#if ABC_PARALLEL == 2
1814    } else {
1815      //printf("splitNa nNew1 %d\n",nNew1);
1816      cilk_spawn CoinAbcDgemm(m, nNew1, k, a, lda, b, c, ONWARD);
1817      //printf("splitNb nNew1 %d\n",nNew1);
1818      CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1, ONWARD);
1819      cilk_sync;
1820    }
1821#endif
1822  }
1823}
1824#ifdef ABC_LONG_FACTORIZATION
1825// Start long double version
1826void CoinAbcDgemm(int m, int n, int k, long double *COIN_RESTRICT a, int lda,
1827  long double *COIN_RESTRICT b, long double *COIN_RESTRICT c
1828#if ABC_PARALLEL == 2
1829  ,
1830  int parallelMode
1831#endif
1832)
1833{
1834  assert((m & (BLOCKING8 - 1)) == 0 && (n & (BLOCKING8 - 1)) == 0 && (k & (BLOCKING8 - 1)) == 0);
1835  /* entry for column j and row i (when multiple of BLOCKING8)
1836     is at aBlocked+j*m+i*BLOCKING8
1837  */
1838  // k is always smallish
1839  if (m <= BLOCKING8 && n <= BLOCKING8) {
1840    assert(m == BLOCKING8 && n == BLOCKING8 && k == BLOCKING8);
1841    long double *COIN_RESTRICT aBase2 = a;
1842    long double *COIN_RESTRICT bBase2 = b;
1843    long double *COIN_RESTRICT cBase2 = c;
1844    for (int j = 0; j < BLOCKING8; j++) {
1845      long double *COIN_RESTRICT aBase = aBase2;
1846#if AVX2 != 2
1847#if 1
1848      long double c0 = cBase2[0];
1849      long double c1 = cBase2[1];
1850      long double c2 = cBase2[2];
1851      long double c3 = cBase2[3];
1852      long double c4 = cBase2[4];
1853      long double c5 = cBase2[5];
1854      long double c6 = cBase2[6];
1855      long double c7 = cBase2[7];
1856      for (int l = 0; l < BLOCKING8; l++) {
1857        long double bValue = bBase2[l];
1858        if (bValue) {
1859          c0 -= bValue * aBase[0];
1860          c1 -= bValue * aBase[1];
1861          c2 -= bValue * aBase[2];
1862          c3 -= bValue * aBase[3];
1863          c4 -= bValue * aBase[4];
1864          c5 -= bValue * aBase[5];
1865          c6 -= bValue * aBase[6];
1866          c7 -= bValue * aBase[7];
1867        }
1868        aBase += BLOCKING8;
1869      }
1870      cBase2[0] = c0;
1871      cBase2[1] = c1;
1872      cBase2[2] = c2;
1873      cBase2[3] = c3;
1874      cBase2[4] = c4;
1875      cBase2[5] = c5;
1876      cBase2[6] = c6;
1877      cBase2[7] = c7;
1878#else
1879      for (int l = 0; l < BLOCKING8; l++) {
1880        long double bValue = bBase2[l];
1881        if (bValue) {
1882          for (int i = 0; i < BLOCKING8; i++) {
1883            cBase2[i] -= bValue * aBase[i];
1884          }
1885        }
1886        aBase += BLOCKING8;
1887      }
1888#endif
1889#else
1890      //__m256d c0=_mm256_load_pd(cBase2);
1891      __m256d c0 = *reinterpret_cast< __m256d * >(cBase2);
1892      //__m256d c1=_mm256_load_pd(cBase2+4);
1893      __m256d c1 = *reinterpret_cast< __m256d * >(cBase2 + 4);
1894      for (int l = 0; l < BLOCKING8; l++) {
1895        //__m256d bb = _mm256_broadcast_sd(bBase2+l);
1896        __m256d bb = static_cast< __m256d >(__builtin_ia32_vbroadcastsd256(bBase2 + l));
1897        //__m256d a0 = _mm256_load_pd(aBase);
1898        __m256d a0 = *reinterpret_cast< __m256d * >(aBase);
1899        //__m256d a1 = _mm256_load_pd(aBase+4);
1900        __m256d a1 = *reinterpret_cast< __m256d * >(aBase + 4);
1901        c0 -= bb * a0;
1902        c1 -= bb * a1;
1903        aBase += BLOCKING8;
1904      }
1905      //_mm256_store_pd (cBase2, c0);
1906      *reinterpret_cast< __m256d * >(cBase2) = c0;
1907      //_mm256_store_pd (cBase2+4, c1);
1908      *reinterpret_cast< __m256d * >(cBase2 + 4) = c1;
1909#endif
1910      bBase2 += BLOCKING8;
1911      cBase2 += BLOCKING8;
1912    }
1913  } else if (m > n) {
1914    // make sure mNew1 multiple of BLOCKING8
1915#if BLOCKING8 == 8
1916    int mNew1 = ((m + 15) >> 4) << 3;
1917#elif BLOCKING8 == 4
1918    int mNew1 = ((m + 7) >> 3) << 2;
1919#elif BLOCKING8 == 2
1920    int mNew1 = ((m + 3) >> 2) << 1;
1921#else
1922    abort();
1923#endif
1924    assert(mNew1 > 0 && m - mNew1 > 0);
1925#if ABC_PARALLEL == 2
1926    if (mNew1 <= BLOCKING3 || !parallelMode) {
1927#endif
1928      //printf("splitMa mNew1 %d\n",mNew1);
1929      CoinAbcDgemm(mNew1, n, k, a, lda, b, c
1930#if ABC_PARALLEL == 2
1931        ,
1932        0
1933#endif
1934      );
1935      //printf("splitMb mNew1 %d\n",mNew1);
1936      CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8
1937#if ABC_PARALLEL == 2
1938        ,
1939        0
1940#endif
1941      );
1942#if ABC_PARALLEL == 2
1943    } else {
1944      //printf("splitMa mNew1 %d\n",mNew1);
1945      cilk_spawn CoinAbcDgemm(mNew1, n, k, a, lda, b, c, ONWARD);
1946      //printf("splitMb mNew1 %d\n",mNew1);
1947      CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8, ONWARD);
1948      cilk_sync;
1949    }
1950#endif
1951  } else {
1952    // make sure nNew1 multiple of BLOCKING8
1953#if BLOCKING8 == 8
1954    int nNew1 = ((n + 15) >> 4) << 3;
1955#elif BLOCKING8 == 4
1956    int nNew1 = ((n + 7) >> 3) << 2;
1957#elif BLOCKING8 == 2
1958    int nNew1 = ((n + 3) >> 2) << 1;
1959#else
1960    abort();
1961#endif
1962    assert(nNew1 > 0 && n - nNew1 > 0);
1963#if ABC_PARALLEL == 2
1964    if (nNew1 <= BLOCKING3 || !parallelMode) {
1965#endif
1966      //printf("splitNa nNew1 %d\n",nNew1);
1967      CoinAbcDgemm(m, nNew1, k, a, lda, b, c
1968#if ABC_PARALLEL == 2
1969        ,
1970        0
1971#endif
1972      );
1973      //printf("splitNb nNew1 %d\n",nNew1);
1974      CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1
1975#if ABC_PARALLEL == 2
1976        ,
1977        0
1978#endif
1979      );
1980#if ABC_PARALLEL == 2
1981    } else {
1982      //printf("splitNa nNew1 %d\n",nNew1);
1983      cilk_spawn CoinAbcDgemm(m, nNew1, k, a, lda, b, c, ONWARD);
1984      //printf("splitNb nNew1 %d\n",nNew1);
1985      CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1, ONWARD);
1986      cilk_sync;
1987    }
1988#endif
1989  }
1990}
1991#endif
1992#if 0
1993// From Intel site
1994// get AVX intrinsics
1995#include <immintrin.h> 
1996// get CPUID capability 
1997//#include <intrin.h>
1998#define cpuid(func, ax, bx, cx, dx)                             \
1999  __asm__ __volatile__("cpuid"                                  \
2000                       : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
2001                       : "a"(func)); 
2002// written for clarity, not conciseness
2003#define OSXSAVEFlag (1UL << 27)
2004#define AVXFlag ((1UL << 28) | OSXSAVEFlag)
2005#define VAESFlag ((1UL << 25) | AVXFlag | OSXSAVEFlag)
2006#define FMAFlag ((1UL << 12) | AVXFlag | OSXSAVEFlag)
2007#define CLMULFlag ((1UL << 1) | AVXFlag | OSXSAVEFlag) 
2008   
2009bool DetectFeature(unsigned int feature) 
2010
2011  int CPUInfo[4]; //, InfoType=1, ECX = 1; 
2012  //__cpuidex(CPUInfo, 1, 1);       // read the desired CPUID format 
2013  cpuid(1,CPUInfo[0],CPUInfo[1],CPUInfo[2],CPUInfo[3]);
2014  unsigned int ECX = CPUInfo[2];  // the output of CPUID in the ECX register.   
2015  if ((ECX & feature) != feature) // Missing feature   
2016    return false;
2017#if 0
2018  long int val = _xgetbv(0);       // read XFEATURE_ENABLED_MASK register 
2019  if ((val&6) != 6)               // check OS has enabled both XMM and YMM support. 
2020    return false;
2021#endif
2022  return true; 
2023}
2024#endif
2025
2026/* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
2027*/
Note: See TracBrowser for help on using the repository browser.