source: stable/1.17/Clp/src/CoinAbcHelperFunctions.hpp @ 2439

Last change on this file since 2439 was 2385, checked in by unxusr, 10 months ago

formatting

  • Property svn:keywords set to Id
File size: 59.5 KB
Line 
1/* $Id: CoinAbcHelperFunctions.hpp 2385 2019-01-06 19:43:06Z stefan $ */
2// Copyright (C) 2003, International Business Machines
3// Corporation and others, Copyright (C) 2012, FasterCoin.  All Rights Reserved.
4// This code is licensed under the terms of the Eclipse Public License (EPL).
5
6#ifndef CoinAbcHelperFunctions_H
7#define CoinAbcHelperFunctions_H
8
9#include "ClpConfig.h"
10#ifdef HAVE_CMATH
11#include <cmath>
12#else
13#ifdef HAVE_MATH_H
14#include <math.h>
15#else
16#include <cmath>
17#endif
18#endif
19#include "CoinAbcCommon.hpp"
20#ifndef abc_assert
21#define abc_assert(condition)                               \
22  {                                                         \
23    if (!condition) {                                       \
24      printf("abc_assert in %s at line %d - %s is false\n", \
25        __FILE__, __LINE__, __STRING(condition));           \
26      abort();                                              \
27    }                                                       \
28  }
29#endif
30// cilk_for granularity.
31#define CILK_FOR_GRAINSIZE 128
32//#define AVX2 2
33#if AVX2 == 1
34#include "emmintrin.h"
35#elif AVX2 == 2
36#include <immintrin.h>
37#elif AVX2 == 3
38#include "avx2intrin.h"
39#endif
40//#define __AVX__ 1
41//#define __AVX2__ 1
42/**
43    Note (JJF) I have added some operations on arrays even though they may
44    duplicate CoinDenseVector.
45
46*/
47
48#define UNROLL_SCATTER 2
49#define INLINE_SCATTER 1
50#if INLINE_SCATTER == 0
51void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
52  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
53  const int *COIN_RESTRICT thisIndex,
54  CoinFactorizationDouble *COIN_RESTRICT region);
55#else
56void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
57  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
58  const int *COIN_RESTRICT thisIndex,
59  CoinFactorizationDouble *COIN_RESTRICT region)
60{
61#if UNROLL_SCATTER == 0
62  for (CoinBigIndex j = number - 1; j >= 0; j--) {
63    CoinSimplexInt iRow = thisIndex[j];
64    CoinFactorizationDouble regionValue = region[iRow];
65    CoinFactorizationDouble value = thisElement[j];
66    assert(value);
67    region[iRow] = regionValue - value * pivotValue;
68  }
69#elif UNROLL_SCATTER == 1
70  if ((number & 1) != 0) {
71    number--;
72    CoinSimplexInt iRow = thisIndex[number];
73    CoinFactorizationDouble regionValue = region[iRow];
74    CoinFactorizationDouble value = thisElement[number];
75    region[iRow] = regionValue - value * pivotValue;
76  }
77  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
78    CoinSimplexInt iRow0 = thisIndex[j];
79    CoinSimplexInt iRow1 = thisIndex[j - 1];
80    CoinFactorizationDouble regionValue0 = region[iRow0];
81    CoinFactorizationDouble regionValue1 = region[iRow1];
82    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
83    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
84  }
85#elif UNROLL_SCATTER == 2
86  if ((number & 1) != 0) {
87    number--;
88    CoinSimplexInt iRow = thisIndex[number];
89    CoinFactorizationDouble regionValue = region[iRow];
90    CoinFactorizationDouble value = thisElement[number];
91    region[iRow] = regionValue - value * pivotValue;
92  }
93  if ((number & 2) != 0) {
94    CoinSimplexInt iRow0 = thisIndex[number - 1];
95    CoinFactorizationDouble regionValue0 = region[iRow0];
96    CoinFactorizationDouble value0 = thisElement[number - 1];
97    CoinSimplexInt iRow1 = thisIndex[number - 2];
98    CoinFactorizationDouble regionValue1 = region[iRow1];
99    CoinFactorizationDouble value1 = thisElement[number - 2];
100    region[iRow0] = regionValue0 - value0 * pivotValue;
101    region[iRow1] = regionValue1 - value1 * pivotValue;
102    number -= 2;
103  }
104#pragma cilk grainsize = CILK_FOR_GRAINSIZE
105  cilk_for(CoinBigIndex j = number - 1; j >= 0; j -= 4)
106  {
107    CoinSimplexInt iRow0 = thisIndex[j];
108    CoinSimplexInt iRow1 = thisIndex[j - 1];
109    CoinFactorizationDouble regionValue0 = region[iRow0];
110    CoinFactorizationDouble regionValue1 = region[iRow1];
111    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
112    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
113    CoinSimplexInt iRow2 = thisIndex[j - 2];
114    CoinSimplexInt iRow3 = thisIndex[j - 3];
115    CoinFactorizationDouble regionValue2 = region[iRow2];
116    CoinFactorizationDouble regionValue3 = region[iRow3];
117    region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
118    region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
119  }
120#elif UNROLL_SCATTER == 3
121  CoinSimplexInt iRow0;
122  CoinSimplexInt iRow1;
123  CoinFactorizationDouble regionValue0;
124  CoinFactorizationDouble regionValue1;
125  switch (static_cast< unsigned int >(number)) {
126  case 0:
127    break;
128  case 1:
129    iRow0 = thisIndex[0];
130    regionValue0 = region[iRow0];
131    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
132    break;
133  case 2:
134    iRow0 = thisIndex[0];
135    iRow1 = thisIndex[1];
136    regionValue0 = region[iRow0];
137    regionValue1 = region[iRow1];
138    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
139    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
140    break;
141  case 3:
142    iRow0 = thisIndex[0];
143    iRow1 = thisIndex[1];
144    regionValue0 = region[iRow0];
145    regionValue1 = region[iRow1];
146    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
147    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
148    iRow0 = thisIndex[2];
149    regionValue0 = region[iRow0];
150    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
151    break;
152  case 4:
153    iRow0 = thisIndex[0];
154    iRow1 = thisIndex[1];
155    regionValue0 = region[iRow0];
156    regionValue1 = region[iRow1];
157    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
158    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
159    iRow0 = thisIndex[2];
160    iRow1 = thisIndex[3];
161    regionValue0 = region[iRow0];
162    regionValue1 = region[iRow1];
163    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
164    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
165    break;
166  case 5:
167    iRow0 = thisIndex[0];
168    iRow1 = thisIndex[1];
169    regionValue0 = region[iRow0];
170    regionValue1 = region[iRow1];
171    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
172    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
173    iRow0 = thisIndex[2];
174    iRow1 = thisIndex[3];
175    regionValue0 = region[iRow0];
176    regionValue1 = region[iRow1];
177    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
178    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
179    iRow0 = thisIndex[4];
180    regionValue0 = region[iRow0];
181    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
182    break;
183  case 6:
184    iRow0 = thisIndex[0];
185    iRow1 = thisIndex[1];
186    regionValue0 = region[iRow0];
187    regionValue1 = region[iRow1];
188    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
189    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
190    iRow0 = thisIndex[2];
191    iRow1 = thisIndex[3];
192    regionValue0 = region[iRow0];
193    regionValue1 = region[iRow1];
194    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
195    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
196    iRow0 = thisIndex[4];
197    iRow1 = thisIndex[5];
198    regionValue0 = region[iRow0];
199    regionValue1 = region[iRow1];
200    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
201    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
202    break;
203  case 7:
204    iRow0 = thisIndex[0];
205    iRow1 = thisIndex[1];
206    regionValue0 = region[iRow0];
207    regionValue1 = region[iRow1];
208    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
209    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
210    iRow0 = thisIndex[2];
211    iRow1 = thisIndex[3];
212    regionValue0 = region[iRow0];
213    regionValue1 = region[iRow1];
214    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
215    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
216    iRow0 = thisIndex[4];
217    iRow1 = thisIndex[5];
218    regionValue0 = region[iRow0];
219    regionValue1 = region[iRow1];
220    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
221    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
222    iRow0 = thisIndex[6];
223    regionValue0 = region[iRow0];
224    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
225    break;
226  case 8:
227    iRow0 = thisIndex[0];
228    iRow1 = thisIndex[1];
229    regionValue0 = region[iRow0];
230    regionValue1 = region[iRow1];
231    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
232    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
233    iRow0 = thisIndex[2];
234    iRow1 = thisIndex[3];
235    regionValue0 = region[iRow0];
236    regionValue1 = region[iRow1];
237    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
238    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
239    iRow0 = thisIndex[4];
240    iRow1 = thisIndex[5];
241    regionValue0 = region[iRow0];
242    regionValue1 = region[iRow1];
243    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
244    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
245    iRow0 = thisIndex[6];
246    iRow1 = thisIndex[7];
247    regionValue0 = region[iRow0];
248    regionValue1 = region[iRow1];
249    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
250    region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
251    break;
252  default:
253    if ((number & 1) != 0) {
254      number--;
255      CoinSimplexInt iRow = thisIndex[number];
256      CoinFactorizationDouble regionValue = region[iRow];
257      CoinFactorizationDouble value = thisElement[number];
258      region[iRow] = regionValue - value * pivotValue;
259    }
260    for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
261      CoinSimplexInt iRow0 = thisIndex[j];
262      CoinSimplexInt iRow1 = thisIndex[j - 1];
263      CoinFactorizationDouble regionValue0 = region[iRow0];
264      CoinFactorizationDouble regionValue1 = region[iRow1];
265      region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
266      region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
267    }
268    break;
269  }
270#endif
271}
272void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
273  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
274  CoinFactorizationDouble *COIN_RESTRICT region)
275{
276#if UNROLL_SCATTER == 0
277  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
278  for (CoinBigIndex j = number - 1; j >= 0; j--) {
279    CoinSimplexInt iRow = thisIndex[j];
280    CoinFactorizationDouble regionValue = region[iRow];
281    CoinFactorizationDouble value = thisElement[j];
282    assert(value);
283    region[iRow] = regionValue - value * pivotValue;
284  }
285#elif UNROLL_SCATTER == 1
286  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
287  if ((number & 1) != 0) {
288    number--;
289    CoinSimplexInt iRow = thisIndex[number];
290    CoinFactorizationDouble regionValue = region[iRow];
291    CoinFactorizationDouble value = thisElement[number];
292    region[iRow] = regionValue - value * pivotValue;
293  }
294  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
295    CoinSimplexInt iRow0 = thisIndex[j];
296    CoinSimplexInt iRow1 = thisIndex[j - 1];
297    CoinFactorizationDouble regionValue0 = region[iRow0];
298    CoinFactorizationDouble regionValue1 = region[iRow1];
299    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
300    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
301  }
302#elif UNROLL_SCATTER == 2
303  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
304  if ((number & 1) != 0) {
305    number--;
306    CoinSimplexInt iRow = thisIndex[number];
307    CoinFactorizationDouble regionValue = region[iRow];
308    CoinFactorizationDouble value = thisElement[number];
309    region[iRow] = regionValue - value * pivotValue;
310  }
311  if ((number & 2) != 0) {
312    CoinSimplexInt iRow0 = thisIndex[number - 1];
313    CoinFactorizationDouble regionValue0 = region[iRow0];
314    CoinFactorizationDouble value0 = thisElement[number - 1];
315    CoinSimplexInt iRow1 = thisIndex[number - 2];
316    CoinFactorizationDouble regionValue1 = region[iRow1];
317    CoinFactorizationDouble value1 = thisElement[number - 2];
318    region[iRow0] = regionValue0 - value0 * pivotValue;
319    region[iRow1] = regionValue1 - value1 * pivotValue;
320    number -= 2;
321  }
322#if AVX2 == 22
323  CoinFactorizationDouble temp[4] __attribute__((aligned(32)));
324  __m256d pv = _mm256_broadcast_sd(&pivotValue);
325  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
326    __m256d elements = _mm256_loadu_pd(thisElement + j - 3);
327    CoinSimplexInt iRow0 = thisIndex[j - 3];
328    CoinSimplexInt iRow1 = thisIndex[j - 2];
329    CoinSimplexInt iRow2 = thisIndex[j - 1];
330    CoinSimplexInt iRow3 = thisIndex[j - 0];
331    temp[0] = region[iRow0];
332    temp[1] = region[iRow1];
333    temp[2] = region[iRow2];
334    temp[3] = region[iRow3];
335    __m256d t0 = _mm256_load_pd(temp);
336    t0 -= pv * elements;
337    _mm256_store_pd(temp, t0);
338    region[iRow0] = temp[0];
339    region[iRow1] = temp[1];
340    region[iRow2] = temp[2];
341    region[iRow3] = temp[3];
342  }
343#else
344#pragma cilk grainsize = CILK_FOR_GRAINSIZE
345  cilk_for(CoinBigIndex j = number - 1; j >= 0; j -= 4)
346  {
347    CoinSimplexInt iRow0 = thisIndex[j];
348    CoinSimplexInt iRow1 = thisIndex[j - 1];
349    CoinFactorizationDouble regionValue0 = region[iRow0];
350    CoinFactorizationDouble regionValue1 = region[iRow1];
351    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
352    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
353    CoinSimplexInt iRow2 = thisIndex[j - 2];
354    CoinSimplexInt iRow3 = thisIndex[j - 3];
355    CoinFactorizationDouble regionValue2 = region[iRow2];
356    CoinFactorizationDouble regionValue3 = region[iRow3];
357    region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
358    region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
359  }
360#endif
361#elif UNROLL_SCATTER == 3
362  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
363  CoinSimplexInt iRow0;
364  CoinSimplexInt iRow1;
365  CoinFactorizationDouble regionValue0;
366  CoinFactorizationDouble regionValue1;
367  switch (static_cast< unsigned int >(number)) {
368  case 0:
369    break;
370  case 1:
371    iRow0 = thisIndex[0];
372    regionValue0 = region[iRow0];
373    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
374    break;
375  case 2:
376    iRow0 = thisIndex[0];
377    iRow1 = thisIndex[1];
378    regionValue0 = region[iRow0];
379    regionValue1 = region[iRow1];
380    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
381    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
382    break;
383  case 3:
384    iRow0 = thisIndex[0];
385    iRow1 = thisIndex[1];
386    regionValue0 = region[iRow0];
387    regionValue1 = region[iRow1];
388    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
389    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
390    iRow0 = thisIndex[2];
391    regionValue0 = region[iRow0];
392    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
393    break;
394  case 4:
395    iRow0 = thisIndex[0];
396    iRow1 = thisIndex[1];
397    regionValue0 = region[iRow0];
398    regionValue1 = region[iRow1];
399    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
400    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
401    iRow0 = thisIndex[2];
402    iRow1 = thisIndex[3];
403    regionValue0 = region[iRow0];
404    regionValue1 = region[iRow1];
405    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
406    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
407    break;
408  case 5:
409    iRow0 = thisIndex[0];
410    iRow1 = thisIndex[1];
411    regionValue0 = region[iRow0];
412    regionValue1 = region[iRow1];
413    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
414    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
415    iRow0 = thisIndex[2];
416    iRow1 = thisIndex[3];
417    regionValue0 = region[iRow0];
418    regionValue1 = region[iRow1];
419    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
420    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
421    iRow0 = thisIndex[4];
422    regionValue0 = region[iRow0];
423    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
424    break;
425  case 6:
426    iRow0 = thisIndex[0];
427    iRow1 = thisIndex[1];
428    regionValue0 = region[iRow0];
429    regionValue1 = region[iRow1];
430    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
431    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
432    iRow0 = thisIndex[2];
433    iRow1 = thisIndex[3];
434    regionValue0 = region[iRow0];
435    regionValue1 = region[iRow1];
436    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
437    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
438    iRow0 = thisIndex[4];
439    iRow1 = thisIndex[5];
440    regionValue0 = region[iRow0];
441    regionValue1 = region[iRow1];
442    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
443    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
444    break;
445  case 7:
446    iRow0 = thisIndex[0];
447    iRow1 = thisIndex[1];
448    regionValue0 = region[iRow0];
449    regionValue1 = region[iRow1];
450    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
451    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
452    iRow0 = thisIndex[2];
453    iRow1 = thisIndex[3];
454    regionValue0 = region[iRow0];
455    regionValue1 = region[iRow1];
456    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
457    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
458    iRow0 = thisIndex[4];
459    iRow1 = thisIndex[5];
460    regionValue0 = region[iRow0];
461    regionValue1 = region[iRow1];
462    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
463    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
464    iRow0 = thisIndex[6];
465    regionValue0 = region[iRow0];
466    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
467    break;
468  case 8:
469    iRow0 = thisIndex[0];
470    iRow1 = thisIndex[1];
471    regionValue0 = region[iRow0];
472    regionValue1 = region[iRow1];
473    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
474    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
475    iRow0 = thisIndex[2];
476    iRow1 = thisIndex[3];
477    regionValue0 = region[iRow0];
478    regionValue1 = region[iRow1];
479    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
480    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
481    iRow0 = thisIndex[4];
482    iRow1 = thisIndex[5];
483    regionValue0 = region[iRow0];
484    regionValue1 = region[iRow1];
485    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
486    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
487    iRow0 = thisIndex[6];
488    iRow1 = thisIndex[7];
489    regionValue0 = region[iRow0];
490    regionValue1 = region[iRow1];
491    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
492    region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
493    break;
494  default:
495    if ((number & 1) != 0) {
496      number--;
497      CoinSimplexInt iRow = thisIndex[number];
498      CoinFactorizationDouble regionValue = region[iRow];
499      CoinFactorizationDouble value = thisElement[number];
500      region[iRow] = regionValue - value * pivotValue;
501    }
502    for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
503      CoinSimplexInt iRow0 = thisIndex[j];
504      CoinSimplexInt iRow1 = thisIndex[j - 1];
505      CoinFactorizationDouble regionValue0 = region[iRow0];
506      CoinFactorizationDouble regionValue1 = region[iRow1];
507      region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
508      region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
509    }
510    break;
511  }
512#endif
513}
514#endif
515//#define COIN_PREFETCH
516#ifdef COIN_PREFETCH
517#if 1
518#define coin_prefetch(mem)              \
519  __asm__ __volatile__("prefetchnta %0" \
520                       :                \
521                       : "m"(*(reinterpret_cast< char * >(mem))))
522#define coin_prefetch_const(mem)        \
523  __asm__ __volatile__("prefetchnta %0" \
524                       :                \
525                       : "m"(*(reinterpret_cast< const char * >(mem))))
526#else
527#define coin_prefetch(mem)           \
528  __asm__ __volatile__("prefetch %0" \
529                       :             \
530                       : "m"(*(reinterpret_cast< char * >(mem))))
531#define coin_prefetch_const(mem)     \
532  __asm__ __volatile__("prefetch %0" \
533                       :             \
534                       : "m"(*(reinterpret_cast< const char * >(mem))))
535#endif
536#else
537// dummy
538#define coin_prefetch(mem)
539#define coin_prefetch_const(mem)
540#endif
541#define NEW_CHUNK_SIZE 4
542#define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE + NEW_CHUNK_SIZE / 2);
543#define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE / 2)
544// leaf, pure, nothrow and hot give warnings
545// fastcall and sseregparm give wrong results
546//#define SCATTER_ATTRIBUTE __attribute__ ((leaf,fastcall,pure,sseregparm,nothrow,hot))
547#define SCATTER_ATTRIBUTE
548typedef void (*scatterUpdate)(int, CoinFactorizationDouble, const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE;
549typedef struct {
550  scatterUpdate functionPointer;
551  CoinBigIndex offset;
552  int number;
553} scatterStruct;
554void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble multiplier,
555  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
556  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
557void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
558  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
559  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
560void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
561  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
562  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
563void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
564  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
565  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
566void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
567  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
568  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
569void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
570  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
571  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
572void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
573  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
574  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
575void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
576  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
577  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
578void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
579  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
580  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
581void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
582  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
583  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
584void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
585  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
586  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
587void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
588  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
589  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
590void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
591  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
592  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
593void CoinAbcScatterUpdate1Subtract(int numberIn, CoinFactorizationDouble multiplier,
594  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
595  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
596void CoinAbcScatterUpdate2Subtract(int numberIn, CoinFactorizationDouble multiplier,
597  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
598  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
599void CoinAbcScatterUpdate3Subtract(int numberIn, CoinFactorizationDouble multiplier,
600  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
601  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
602void CoinAbcScatterUpdate4Subtract(int numberIn, CoinFactorizationDouble multiplier,
603  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
604  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
605void CoinAbcScatterUpdate5Subtract(int numberIn, CoinFactorizationDouble multiplier,
606  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
607  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
608void CoinAbcScatterUpdate6Subtract(int numberIn, CoinFactorizationDouble multiplier,
609  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
610  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
611void CoinAbcScatterUpdate7Subtract(int numberIn, CoinFactorizationDouble multiplier,
612  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
613  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
614void CoinAbcScatterUpdate8Subtract(int numberIn, CoinFactorizationDouble multiplier,
615  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
616  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
617void CoinAbcScatterUpdate4NSubtract(int numberIn, CoinFactorizationDouble multiplier,
618  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
619  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
620void CoinAbcScatterUpdate4NPlus1Subtract(int numberIn, CoinFactorizationDouble multiplier,
621  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
622  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
623void CoinAbcScatterUpdate4NPlus2Subtract(int numberIn, CoinFactorizationDouble multiplier,
624  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
625  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
626void CoinAbcScatterUpdate4NPlus3Subtract(int numberIn, CoinFactorizationDouble multiplier,
627  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
628  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
629void CoinAbcScatterUpdate1Add(int numberIn, CoinFactorizationDouble multiplier,
630  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
631  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
632void CoinAbcScatterUpdate2Add(int numberIn, CoinFactorizationDouble multiplier,
633  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
634  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
635void CoinAbcScatterUpdate3Add(int numberIn, CoinFactorizationDouble multiplier,
636  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
637  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
638void CoinAbcScatterUpdate4Add(int numberIn, CoinFactorizationDouble multiplier,
639  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
640  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
641void CoinAbcScatterUpdate5Add(int numberIn, CoinFactorizationDouble multiplier,
642  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
643  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
644void CoinAbcScatterUpdate6Add(int numberIn, CoinFactorizationDouble multiplier,
645  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
646  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
647void CoinAbcScatterUpdate7Add(int numberIn, CoinFactorizationDouble multiplier,
648  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
649  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
650void CoinAbcScatterUpdate8Add(int numberIn, CoinFactorizationDouble multiplier,
651  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
652  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
653void CoinAbcScatterUpdate4NAdd(int numberIn, CoinFactorizationDouble multiplier,
654  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
655  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
656void CoinAbcScatterUpdate4NPlus1Add(int numberIn, CoinFactorizationDouble multiplier,
657  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
658  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
659void CoinAbcScatterUpdate4NPlus2Add(int numberIn, CoinFactorizationDouble multiplier,
660  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
661  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
662void CoinAbcScatterUpdate4NPlus3Add(int numberIn, CoinFactorizationDouble multiplier,
663  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
664  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
665#if INLINE_SCATTER == 0
666void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
667  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
668  const int *COIN_RESTRICT thisIndex,
669  CoinFactorizationDouble *COIN_RESTRICT region,
670  double *COIN_RESTRICT work);
671#else
672#if 0
673void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
674                                            const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
675                                            const int *  COIN_RESTRICT thisIndex,
676                                            CoinFactorizationDouble * COIN_RESTRICT region,
677                                            double * COIN_RESTRICT /*work*/)
678{
679#if UNROLL_SCATTER == 0
680  for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
681    CoinSimplexInt iRow = thisIndex[j];
682    CoinFactorizationDouble regionValue = region[iRow];
683    CoinFactorizationDouble value = thisElement[j];
684    assert (value);
685    region[iRow] = regionValue - value * pivotValue;
686  }
687#elif UNROLL_SCATTER == 1
688  if ((number&1)!=0) {
689    CoinSimplexInt iRow = thisIndex[0];
690    thisIndex++;
691    CoinFactorizationDouble regionValue = region[iRow];
692    CoinFactorizationDouble value = thisElement[0];
693    thisElement++;
694    region[iRow] = regionValue - value * pivotValue;
695  }
696  number = number>>1;
697  CoinFactorizationDouble work2[4];
698  for ( ; number !=0; number-- ) {
699    CoinSimplexInt iRow0 = thisIndex[0];
700    CoinSimplexInt iRow1 = thisIndex[1];
701    work2[0] = region[iRow0];
702    work2[1] = region[iRow1];
703#if 0
704    work2[2] = region[iRow0];
705    work2[3] = region[iRow1];
706    //__v4df b = __builtin_ia32_maskloadpd256(work2);
707    __v4df b = __builtin_ia32_loadupd256(work2);
708    //__v4df b = _mm256_load_pd(work2);
709#endif
710    work2[0] -= thisElement[0] * pivotValue;
711    work2[1] -= thisElement[1] * pivotValue;
712    region[iRow0] = work2[0];
713    region[iRow1] = work2[1];
714    thisIndex+=2;
715    thisElement+=2;
716  }
717#endif
718}
719#endif
720#endif
721#define UNROLL_GATHER 0
722#define INLINE_GATHER 1
723#if INLINE_GATHER == 0
724CoinFactorizationDouble CoinAbcGatherUpdate(CoinSimplexInt number,
725  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
726  const int *COIN_RESTRICT thisIndex,
727  CoinFactorizationDouble *COIN_RESTRICT region);
728#else
729CoinFactorizationDouble ABC_INLINE inline CoinAbcGatherUpdate(CoinSimplexInt number,
730  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
731  const int *COIN_RESTRICT thisIndex,
732  CoinFactorizationDouble *COIN_RESTRICT region)
733{
734#if UNROLL_GATHER == 0
735  CoinFactorizationDouble pivotValue = 0.0;
736  for (CoinBigIndex j = 0; j < number; j++) {
737    CoinFactorizationDouble value = thisElement[j];
738    CoinSimplexInt jRow = thisIndex[j];
739    value *= region[jRow];
740    pivotValue -= value;
741  }
742  return pivotValue;
743#else
744#error code
745#endif
746}
747#endif
748#define UNROLL_MULTIPLY_INDEXED 0
749#define INLINE_MULTIPLY_INDEXED 0
750#if INLINE_MULTIPLY_INDEXED == 0
751void CoinAbcMultiplyIndexed(int number,
752  const double *COIN_RESTRICT multiplier,
753  const int *COIN_RESTRICT thisIndex,
754  CoinFactorizationDouble *COIN_RESTRICT region);
755void CoinAbcMultiplyIndexed(int number,
756  const long double *COIN_RESTRICT multiplier,
757  const int *COIN_RESTRICT thisIndex,
758  long double *COIN_RESTRICT region);
759#else
760void ABC_INLINE inline CoinAbcMultiplyIndexed(int number,
761  const double *COIN_RESTRICT multiplier,
762  const int *COIN_RESTRICT thisIndex,
763  CoinFactorizationDouble *COIN_RESTRICT region)
764{
765}
766#endif
767double CoinAbcMaximumAbsElement(const double *region, int size);
768void CoinAbcMinMaxAbsElement(const double *region, int size, double &minimum, double &maximum);
769void CoinAbcMinMaxAbsNormalValues(const double *region, int size, double &minimum, double &maximum);
770void CoinAbcScale(double *region, double multiplier, int size);
771void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int size);
772/// maximum fabs(region[i]) and then region[i]*=multiplier
773double CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int size);
774void CoinAbcSetElements(double *region, int size, double value);
775void CoinAbcMultiplyAdd(const double *region1, int size, double multiplier1,
776  double *regionChanged, double multiplier2);
777double CoinAbcInnerProduct(const double *region1, int size, const double *region2);
778void CoinAbcGetNorms(const double *region, int size, double &norm1, double &norm2);
779/// regionTo[index[i]]=regionFrom[i]
780void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int number);
781/// regionTo[i]=regionFrom[index[i]]
782void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int number);
783/// regionTo[index[i]]=0.0
784void CoinAbcScatterZeroTo(double *regionTo, const int *index, int number);
785/// regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
786void CoinAbcScatterToList(const double *regionFrom, double *regionTo,
787  const int *indexList, const int *indexScatter, int number);
788/// array[i]=1.0/sqrt(array[i])
789void CoinAbcInverseSqrts(double *array, int n);
790void CoinAbcReciprocal(double *array, int n, const double *input);
791void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size);
792void CoinAbcMemcpyLong(int *array, const int *arrayFrom, int size);
793void CoinAbcMemcpyLong(unsigned char *array, const unsigned char *arrayFrom, int size);
794void CoinAbcMemset0Long(double *array, int size);
795void CoinAbcMemset0Long(int *array, int size);
796void CoinAbcMemset0Long(unsigned char *array, int size);
797void CoinAbcMemmove(double *array, const double *arrayFrom, int size);
798void CoinAbcMemmove(int *array, const int *arrayFrom, int size);
799void CoinAbcMemmove(unsigned char *array, const unsigned char *arrayFrom, int size);
800/// This moves down and zeroes out end
801void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size);
802/// This compacts several sections and zeroes out end (returns number)
803int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths);
804/// This compacts several sections (returns number)
805int CoinAbcCompact(int numberSections, int alreadyDone, int *array, const int *starts, const int *lengths);
806#endif
807#if ABC_CREATE_SCATTER_FUNCTION
808SCATTER_ATTRIBUTE void functionName(ScatterUpdate1)(int numberIn, CoinFactorizationDouble multiplier,
809  const CoinFactorizationDouble *COIN_RESTRICT element,
810  CoinFactorizationDouble *COIN_RESTRICT region)
811{
812#ifndef NDEBUG
813  assert(numberIn == 1);
814#endif
815  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 1);
816  int iColumn0 = thisColumn[0];
817  double value0 = region[iColumn0];
818  value0 OPERATION multiplier *element[0];
819  region[iColumn0] = value0;
820}
821SCATTER_ATTRIBUTE void functionName(ScatterUpdate2)(int numberIn, CoinFactorizationDouble multiplier,
822  const CoinFactorizationDouble *COIN_RESTRICT element,
823  CoinFactorizationDouble *COIN_RESTRICT region)
824{
825#ifndef NDEBUG
826  assert(numberIn == 2);
827#endif
828  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 2);
829#if NEW_CHUNK_SIZE == 2
830  int nFull = 2 & (~(NEW_CHUNK_SIZE - 1));
831  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
832    coin_prefetch(element + NEW_CHUNK_SIZE_INCREMENT);
833    int iColumn0 = thisColumn[0];
834    int iColumn1 = thisColumn[1];
835    CoinFactorizationDouble value0 = region[iColumn0];
836    CoinFactorizationDouble value1 = region[iColumn1];
837    value0 OPERATION multiplier *element[0 + NEW_CHUNK_SIZE_OFFSET];
838    value1 OPERATION multiplier *element[1 + NEW_CHUNK_SIZE_OFFSET];
839    region[iColumn0] = value0;
840    region[iColumn1] = value1;
841    element += NEW_CHUNK_SIZE_INCREMENT;
842    thisColumn = reinterpret_cast< const int * >(element);
843  }
844#endif
845#if NEW_CHUNK_SIZE == 4
846  int iColumn0 = thisColumn[0];
847  int iColumn1 = thisColumn[1];
848  CoinFactorizationDouble value0 = region[iColumn0];
849  CoinFactorizationDouble value1 = region[iColumn1];
850  value0 OPERATION multiplier *element[0];
851  value1 OPERATION multiplier *element[1];
852  region[iColumn0] = value0;
853  region[iColumn1] = value1;
854#endif
855}
856SCATTER_ATTRIBUTE void functionName(ScatterUpdate3)(int numberIn, CoinFactorizationDouble multiplier,
857  const CoinFactorizationDouble *COIN_RESTRICT element,
858  CoinFactorizationDouble *COIN_RESTRICT region)
859{
860#ifndef NDEBUG
861  assert(numberIn == 3);
862#endif
863  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 3);
864#if AVX2 == 1
865  double temp[2];
866#endif
867#if NEW_CHUNK_SIZE == 2
868  int nFull = 3 & (~(NEW_CHUNK_SIZE - 1));
869  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
870    //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
871    int iColumn0 = thisColumn[0];
872    int iColumn1 = thisColumn[1];
873    CoinFactorizationDouble value0 = region[iColumn0];
874    CoinFactorizationDouble value1 = region[iColumn1];
875    value0 OPERATION multiplier *element[0];
876    value1 OPERATION multiplier *element[1];
877    region[iColumn0] = value0;
878    region[iColumn1] = value1;
879    element += NEW_CHUNK_SIZE;
880    thisColumn + = NEW_CHUNK_SIZE;
881  }
882#endif
883#if NEW_CHUNK_SIZE == 2
884  int iColumn0 = thisColumn[0];
885  double value0 = region[iColumn0];
886  value0 OPERATION multiplier *element[0];
887  region[iColumn0] = value0;
888#else
889  int iColumn0 = thisColumn[0];
890  int iColumn1 = thisColumn[1];
891  int iColumn2 = thisColumn[2];
892#if AVX2 == 1
893  __v2df bb;
894  double value2 = region[iColumn2];
895  value2 OPERATION multiplier *element[2];
896  set_const_v2df(bb, multiplier);
897  temp[0] = region[iColumn0];
898  temp[1] = region[iColumn1];
899  region[iColumn2] = value2;
900  __v2df v0 = __builtin_ia32_loadupd(temp);
901  __v2df a = __builtin_ia32_loadupd(element);
902  a *= bb;
903  v0 OPERATION a;
904  __builtin_ia32_storeupd(temp, v0);
905  region[iColumn0] = temp[0];
906  region[iColumn1] = temp[1];
907#else
908  double value0 = region[iColumn0];
909  double value1 = region[iColumn1];
910  double value2 = region[iColumn2];
911  value0 OPERATION multiplier *element[0];
912  value1 OPERATION multiplier *element[1];
913  value2 OPERATION multiplier *element[2];
914  region[iColumn0] = value0;
915  region[iColumn1] = value1;
916  region[iColumn2] = value2;
917#endif
918#endif
919}
920SCATTER_ATTRIBUTE void functionName(ScatterUpdate4)(int numberIn, CoinFactorizationDouble multiplier,
921  const CoinFactorizationDouble *COIN_RESTRICT element,
922  CoinFactorizationDouble *COIN_RESTRICT region)
923{
924#ifndef NDEBUG
925  assert(numberIn == 4);
926#endif
927  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 4);
928  int nFull = 4 & (~(NEW_CHUNK_SIZE - 1));
929#if AVX2 == 1
930  double temp[4];
931#endif
932  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
933    //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
934#if NEW_CHUNK_SIZE == 2
935    int iColumn0 = thisColumn[0];
936    int iColumn1 = thisColumn[1];
937    double value0 = region[iColumn0];
938    double value1 = region[iColumn1];
939    value0 OPERATION multiplier *element[0];
940    value1 OPERATION multiplier *element[1];
941    region[iColumn0] = value0;
942    region[iColumn1] = value1;
943#elif NEW_CHUNK_SIZE == 4
944    int iColumn0 = thisColumn[0];
945    int iColumn1 = thisColumn[1];
946    int iColumn2 = thisColumn[2];
947    int iColumn3 = thisColumn[3];
948#if AVX2 == 1
949    __v2df bb;
950    set_const_v2df(bb, multiplier);
951    temp[0] = region[iColumn0];
952    temp[1] = region[iColumn1];
953    temp[2] = region[iColumn2];
954    temp[3] = region[iColumn3];
955    __v2df v0 = __builtin_ia32_loadupd(temp);
956    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
957    __v2df a = __builtin_ia32_loadupd(element);
958    a *= bb;
959    v0 OPERATION a;
960    a = __builtin_ia32_loadupd(element + 2);
961    a *= bb;
962    v1 OPERATION a;
963    __builtin_ia32_storeupd(temp, v0);
964    __builtin_ia32_storeupd(temp + 2, v1);
965    region[iColumn0] = temp[0];
966    region[iColumn1] = temp[1];
967    region[iColumn2] = temp[2];
968    region[iColumn3] = temp[3];
969#else
970    double value0 = region[iColumn0];
971    double value1 = region[iColumn1];
972    double value2 = region[iColumn2];
973    double value3 = region[iColumn3];
974    value0 OPERATION multiplier *element[0];
975    value1 OPERATION multiplier *element[1];
976    value2 OPERATION multiplier *element[2];
977    value3 OPERATION multiplier *element[3];
978    region[iColumn0] = value0;
979    region[iColumn1] = value1;
980    region[iColumn2] = value2;
981    region[iColumn3] = value3;
982#endif
983#else
984    abort();
985#endif
986    element += NEW_CHUNK_SIZE;
987    thisColumn += NEW_CHUNK_SIZE;
988  }
989}
990SCATTER_ATTRIBUTE void functionName(ScatterUpdate5)(int numberIn, CoinFactorizationDouble multiplier,
991  const CoinFactorizationDouble *COIN_RESTRICT element,
992  CoinFactorizationDouble *COIN_RESTRICT region)
993{
994#ifndef NDEBUG
995  assert(numberIn == 5);
996#endif
997  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 5);
998  int nFull = 5 & (~(NEW_CHUNK_SIZE - 1));
999#if AVX2 == 1
1000  double temp[4];
1001#endif
1002  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1003    //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
1004#if NEW_CHUNK_SIZE == 2
1005    int iColumn0 = thisColumn[0];
1006    int iColumn1 = thisColumn[1];
1007    double value0 = region[iColumn0];
1008    double value1 = region[iColumn1];
1009    value0 OPERATION multiplier *element[0];
1010    value1 OPERATION multiplier *element[1];
1011    region[iColumn0] = value0;
1012    region[iColumn1] = value1;
1013#elif NEW_CHUNK_SIZE == 4
1014    int iColumn0 = thisColumn[0];
1015    int iColumn1 = thisColumn[1];
1016    int iColumn2 = thisColumn[2];
1017    int iColumn3 = thisColumn[3];
1018#if AVX2 == 1
1019    __v2df bb;
1020    set_const_v2df(bb, multiplier);
1021    temp[0] = region[iColumn0];
1022    temp[1] = region[iColumn1];
1023    temp[2] = region[iColumn2];
1024    temp[3] = region[iColumn3];
1025    __v2df v0 = __builtin_ia32_loadupd(temp);
1026    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1027    __v2df a = __builtin_ia32_loadupd(element);
1028    a *= bb;
1029    v0 OPERATION a;
1030    a = __builtin_ia32_loadupd(element + 2);
1031    a *= bb;
1032    v1 OPERATION a;
1033    __builtin_ia32_storeupd(temp, v0);
1034    __builtin_ia32_storeupd(temp + 2, v1);
1035    region[iColumn0] = temp[0];
1036    region[iColumn1] = temp[1];
1037    region[iColumn2] = temp[2];
1038    region[iColumn3] = temp[3];
1039#else
1040    double value0 = region[iColumn0];
1041    double value1 = region[iColumn1];
1042    double value2 = region[iColumn2];
1043    double value3 = region[iColumn3];
1044    value0 OPERATION multiplier *element[0];
1045    value1 OPERATION multiplier *element[1];
1046    value2 OPERATION multiplier *element[2];
1047    value3 OPERATION multiplier *element[3];
1048    region[iColumn0] = value0;
1049    region[iColumn1] = value1;
1050    region[iColumn2] = value2;
1051    region[iColumn3] = value3;
1052#endif
1053#else
1054    abort();
1055#endif
1056    element += NEW_CHUNK_SIZE;
1057    thisColumn += NEW_CHUNK_SIZE;
1058  }
1059  int iColumn0 = thisColumn[0];
1060  double value0 = region[iColumn0];
1061  value0 OPERATION multiplier *element[0];
1062  region[iColumn0] = value0;
1063}
1064SCATTER_ATTRIBUTE void functionName(ScatterUpdate6)(int numberIn, CoinFactorizationDouble multiplier,
1065  const CoinFactorizationDouble *COIN_RESTRICT element,
1066  CoinFactorizationDouble *COIN_RESTRICT region)
1067{
1068#ifndef NDEBUG
1069  assert(numberIn == 6);
1070#endif
1071  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 6);
1072  int nFull = 6 & (~(NEW_CHUNK_SIZE - 1));
1073#if AVX2 == 1
1074  double temp[4];
1075#endif
1076  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1077    coin_prefetch_const(element + 6);
1078#if NEW_CHUNK_SIZE == 2
1079    int iColumn0 = thisColumn[0];
1080    int iColumn1 = thisColumn[1];
1081    double value0 = region[iColumn0];
1082    double value1 = region[iColumn1];
1083    value0 OPERATION multiplier *element[0];
1084    value1 OPERATION multiplier *element[1];
1085    region[iColumn0] = value0;
1086    region[iColumn1] = value1;
1087#elif NEW_CHUNK_SIZE == 4
1088    int iColumn0 = thisColumn[0];
1089    int iColumn1 = thisColumn[1];
1090    int iColumn2 = thisColumn[2];
1091    int iColumn3 = thisColumn[3];
1092#if AVX2 == 1
1093    __v2df bb;
1094    set_const_v2df(bb, multiplier);
1095    temp[0] = region[iColumn0];
1096    temp[1] = region[iColumn1];
1097    temp[2] = region[iColumn2];
1098    temp[3] = region[iColumn3];
1099    __v2df v0 = __builtin_ia32_loadupd(temp);
1100    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1101    __v2df a = __builtin_ia32_loadupd(element);
1102    a *= bb;
1103    v0 OPERATION a;
1104    a = __builtin_ia32_loadupd(element + 2);
1105    a *= bb;
1106    v1 OPERATION a;
1107    __builtin_ia32_storeupd(temp, v0);
1108    __builtin_ia32_storeupd(temp + 2, v1);
1109    region[iColumn0] = temp[0];
1110    region[iColumn1] = temp[1];
1111    region[iColumn2] = temp[2];
1112    region[iColumn3] = temp[3];
1113#else
1114    double value0 = region[iColumn0];
1115    double value1 = region[iColumn1];
1116    double value2 = region[iColumn2];
1117    double value3 = region[iColumn3];
1118    value0 OPERATION multiplier *element[0];
1119    value1 OPERATION multiplier *element[1];
1120    value2 OPERATION multiplier *element[2];
1121    value3 OPERATION multiplier *element[3];
1122    region[iColumn0] = value0;
1123    region[iColumn1] = value1;
1124    region[iColumn2] = value2;
1125    region[iColumn3] = value3;
1126#endif
1127#else
1128    abort();
1129#endif
1130    element += NEW_CHUNK_SIZE;
1131    thisColumn += NEW_CHUNK_SIZE;
1132  }
1133#if NEW_CHUNK_SIZE == 4
1134  int iColumn0 = thisColumn[0];
1135  int iColumn1 = thisColumn[1];
1136  double value0 = region[iColumn0];
1137  double value1 = region[iColumn1];
1138  value0 OPERATION multiplier *element[0];
1139  value1 OPERATION multiplier *element[1];
1140  region[iColumn0] = value0;
1141  region[iColumn1] = value1;
1142#endif
1143}
1144SCATTER_ATTRIBUTE void functionName(ScatterUpdate7)(int numberIn, CoinFactorizationDouble multiplier,
1145  const CoinFactorizationDouble *COIN_RESTRICT element,
1146  CoinFactorizationDouble *COIN_RESTRICT region)
1147{
1148#ifndef NDEBUG
1149  assert(numberIn == 7);
1150#endif
1151  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 7);
1152  int nFull = 7 & (~(NEW_CHUNK_SIZE - 1));
1153#if AVX2 == 1
1154  double temp[4];
1155#endif
1156  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1157    coin_prefetch_const(element + 6);
1158#if NEW_CHUNK_SIZE == 2
1159    int iColumn0 = thisColumn[0];
1160    int iColumn1 = thisColumn[1];
1161    double value0 = region[iColumn0];
1162    double value1 = region[iColumn1];
1163    value0 OPERATION multiplier *element[0];
1164    value1 OPERATION multiplier *element[1];
1165    region[iColumn0] = value0;
1166    region[iColumn1] = value1;
1167#elif NEW_CHUNK_SIZE == 4
1168    int iColumn0 = thisColumn[0];
1169    int iColumn1 = thisColumn[1];
1170    int iColumn2 = thisColumn[2];
1171    int iColumn3 = thisColumn[3];
1172#if AVX2 == 1
1173    __v2df bb;
1174    set_const_v2df(bb, multiplier);
1175    temp[0] = region[iColumn0];
1176    temp[1] = region[iColumn1];
1177    temp[2] = region[iColumn2];
1178    temp[3] = region[iColumn3];
1179    __v2df v0 = __builtin_ia32_loadupd(temp);
1180    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1181    __v2df a = __builtin_ia32_loadupd(element);
1182    a *= bb;
1183    v0 OPERATION a;
1184    a = __builtin_ia32_loadupd(element + 2);
1185    a *= bb;
1186    v1 OPERATION a;
1187    __builtin_ia32_storeupd(temp, v0);
1188    __builtin_ia32_storeupd(temp + 2, v1);
1189    region[iColumn0] = temp[0];
1190    region[iColumn1] = temp[1];
1191    region[iColumn2] = temp[2];
1192    region[iColumn3] = temp[3];
1193#else
1194    double value0 = region[iColumn0];
1195    double value1 = region[iColumn1];
1196    double value2 = region[iColumn2];
1197    double value3 = region[iColumn3];
1198    value0 OPERATION multiplier *element[0];
1199    value1 OPERATION multiplier *element[1];
1200    value2 OPERATION multiplier *element[2];
1201    value3 OPERATION multiplier *element[3];
1202    region[iColumn0] = value0;
1203    region[iColumn1] = value1;
1204    region[iColumn2] = value2;
1205    region[iColumn3] = value3;
1206#endif
1207#else
1208    abort();
1209#endif
1210    element += NEW_CHUNK_SIZE;
1211    thisColumn += NEW_CHUNK_SIZE;
1212  }
1213#if NEW_CHUNK_SIZE == 2
1214  int iColumn0 = thisColumn[0];
1215  double value0 = region[iColumn0];
1216  value0 OPERATION multiplier *element[0];
1217  region[iColumn0] = value0;
1218#else
1219  int iColumn0 = thisColumn[0];
1220  int iColumn1 = thisColumn[1];
1221  int iColumn2 = thisColumn[2];
1222  double value0 = region[iColumn0];
1223  double value1 = region[iColumn1];
1224  double value2 = region[iColumn2];
1225  value0 OPERATION multiplier *element[0];
1226  value1 OPERATION multiplier *element[1];
1227  value2 OPERATION multiplier *element[2];
1228  region[iColumn0] = value0;
1229  region[iColumn1] = value1;
1230  region[iColumn2] = value2;
1231#endif
1232}
1233SCATTER_ATTRIBUTE void functionName(ScatterUpdate8)(int numberIn, CoinFactorizationDouble multiplier,
1234  const CoinFactorizationDouble *COIN_RESTRICT element,
1235  CoinFactorizationDouble *COIN_RESTRICT region)
1236{
1237#ifndef NDEBUG
1238  assert(numberIn == 8);
1239#endif
1240  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 8);
1241  int nFull = 8 & (~(NEW_CHUNK_SIZE - 1));
1242#if AVX2 == 1
1243  double temp[4];
1244#endif
1245  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1246    coin_prefetch_const(element + 6);
1247#if NEW_CHUNK_SIZE == 2
1248    int iColumn0 = thisColumn[0];
1249    int iColumn1 = thisColumn[1];
1250    double value0 = region[iColumn0];
1251    double value1 = region[iColumn1];
1252    value0 OPERATION multiplier *element[0];
1253    value1 OPERATION multiplier *element[1];
1254    region[iColumn0] = value0;
1255    region[iColumn1] = value1;
1256#elif NEW_CHUNK_SIZE == 4
1257    int iColumn0 = thisColumn[0];
1258    int iColumn1 = thisColumn[1];
1259    int iColumn2 = thisColumn[2];
1260    int iColumn3 = thisColumn[3];
1261#if AVX2 == 1
1262    __v2df bb;
1263    set_const_v2df(bb, multiplier);
1264    temp[0] = region[iColumn0];
1265    temp[1] = region[iColumn1];
1266    temp[2] = region[iColumn2];
1267    temp[3] = region[iColumn3];
1268    __v2df v0 = __builtin_ia32_loadupd(temp);
1269    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1270    __v2df a = __builtin_ia32_loadupd(element);
1271    a *= bb;
1272    v0 OPERATION a;
1273    a = __builtin_ia32_loadupd(element + 2);
1274    a *= bb;
1275    v1 OPERATION a;
1276    __builtin_ia32_storeupd(temp, v0);
1277    __builtin_ia32_storeupd(temp + 2, v1);
1278    region[iColumn0] = temp[0];
1279    region[iColumn1] = temp[1];
1280    region[iColumn2] = temp[2];
1281    region[iColumn3] = temp[3];
1282#else
1283    double value0 = region[iColumn0];
1284    double value1 = region[iColumn1];
1285    double value2 = region[iColumn2];
1286    double value3 = region[iColumn3];
1287    value0 OPERATION multiplier *element[0];
1288    value1 OPERATION multiplier *element[1];
1289    value2 OPERATION multiplier *element[2];
1290    value3 OPERATION multiplier *element[3];
1291    region[iColumn0] = value0;
1292    region[iColumn1] = value1;
1293    region[iColumn2] = value2;
1294    region[iColumn3] = value3;
1295#endif
1296#else
1297    abort();
1298#endif
1299    element += NEW_CHUNK_SIZE;
1300    thisColumn += NEW_CHUNK_SIZE;
1301  }
1302}
1303SCATTER_ATTRIBUTE void functionName(ScatterUpdate4N)(int numberIn, CoinFactorizationDouble multiplier,
1304  const CoinFactorizationDouble *COIN_RESTRICT element,
1305  CoinFactorizationDouble *COIN_RESTRICT region)
1306{
1307  assert((numberIn & 3) == 0);
1308  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1309  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1310#if AVX2 == 1
1311  double temp[4];
1312#endif
1313  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1314    coin_prefetch_const(element + 16);
1315    coin_prefetch_const(thisColumn + 32);
1316#if NEW_CHUNK_SIZE == 2
1317    int iColumn0 = thisColumn[0];
1318    int iColumn1 = thisColumn[1];
1319    double value0 = region[iColumn0];
1320    double value1 = region[iColumn1];
1321    value0 OPERATION multiplier *element[0];
1322    value1 OPERATION multiplier *element[1];
1323    region[iColumn0] = value0;
1324    region[iColumn1] = value1;
1325#elif NEW_CHUNK_SIZE == 4
1326    int iColumn0 = thisColumn[0];
1327    int iColumn1 = thisColumn[1];
1328    int iColumn2 = thisColumn[2];
1329    int iColumn3 = thisColumn[3];
1330#if AVX2 == 1
1331    __v2df bb;
1332    set_const_v2df(bb, multiplier);
1333    temp[0] = region[iColumn0];
1334    temp[1] = region[iColumn1];
1335    temp[2] = region[iColumn2];
1336    temp[3] = region[iColumn3];
1337    __v2df v0 = __builtin_ia32_loadupd(temp);
1338    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1339    __v2df a = __builtin_ia32_loadupd(element);
1340    a *= bb;
1341    v0 OPERATION a;
1342    a = __builtin_ia32_loadupd(element + 2);
1343    a *= bb;
1344    v1 OPERATION a;
1345    __builtin_ia32_storeupd(temp, v0);
1346    __builtin_ia32_storeupd(temp + 2, v1);
1347    region[iColumn0] = temp[0];
1348    region[iColumn1] = temp[1];
1349    region[iColumn2] = temp[2];
1350    region[iColumn3] = temp[3];
1351#else
1352    double value0 = region[iColumn0];
1353    double value1 = region[iColumn1];
1354    double value2 = region[iColumn2];
1355    double value3 = region[iColumn3];
1356    value0 OPERATION multiplier *element[0];
1357    value1 OPERATION multiplier *element[1];
1358    value2 OPERATION multiplier *element[2];
1359    value3 OPERATION multiplier *element[3];
1360    region[iColumn0] = value0;
1361    region[iColumn1] = value1;
1362    region[iColumn2] = value2;
1363    region[iColumn3] = value3;
1364#endif
1365#else
1366    abort();
1367#endif
1368    element += NEW_CHUNK_SIZE;
1369    thisColumn += NEW_CHUNK_SIZE;
1370  }
1371}
1372SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus1)(int numberIn, CoinFactorizationDouble multiplier,
1373  const CoinFactorizationDouble *COIN_RESTRICT element,
1374  CoinFactorizationDouble *COIN_RESTRICT region)
1375{
1376  assert((numberIn & 3) == 1);
1377  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1378  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1379#if AVX2 == 1
1380  double temp[4];
1381#endif
1382  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1383    coin_prefetch_const(element + 16);
1384    coin_prefetch_const(thisColumn + 32);
1385#if NEW_CHUNK_SIZE == 2
1386    int iColumn0 = thisColumn[0];
1387    int iColumn1 = thisColumn[1];
1388    double value0 = region[iColumn0];
1389    double value1 = region[iColumn1];
1390    value0 OPERATION multiplier *element[0];
1391    value1 OPERATION multiplier *element[1];
1392    region[iColumn0] = value0;
1393    region[iColumn1] = value1;
1394#elif NEW_CHUNK_SIZE == 4
1395    int iColumn0 = thisColumn[0];
1396    int iColumn1 = thisColumn[1];
1397    int iColumn2 = thisColumn[2];
1398    int iColumn3 = thisColumn[3];
1399#if AVX2 == 1
1400    __v2df bb;
1401    set_const_v2df(bb, multiplier);
1402    temp[0] = region[iColumn0];
1403    temp[1] = region[iColumn1];
1404    temp[2] = region[iColumn2];
1405    temp[3] = region[iColumn3];
1406    __v2df v0 = __builtin_ia32_loadupd(temp);
1407    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1408    __v2df a = __builtin_ia32_loadupd(element);
1409    a *= bb;
1410    v0 OPERATION a;
1411    a = __builtin_ia32_loadupd(element + 2);
1412    a *= bb;
1413    v1 OPERATION a;
1414    __builtin_ia32_storeupd(temp, v0);
1415    __builtin_ia32_storeupd(temp + 2, v1);
1416    region[iColumn0] = temp[0];
1417    region[iColumn1] = temp[1];
1418    region[iColumn2] = temp[2];
1419    region[iColumn3] = temp[3];
1420#else
1421    double value0 = region[iColumn0];
1422    double value1 = region[iColumn1];
1423    double value2 = region[iColumn2];
1424    double value3 = region[iColumn3];
1425    value0 OPERATION multiplier *element[0];
1426    value1 OPERATION multiplier *element[1];
1427    value2 OPERATION multiplier *element[2];
1428    value3 OPERATION multiplier *element[3];
1429    region[iColumn0] = value0;
1430    region[iColumn1] = value1;
1431    region[iColumn2] = value2;
1432    region[iColumn3] = value3;
1433#endif
1434#else
1435    abort();
1436#endif
1437    element += NEW_CHUNK_SIZE;
1438    thisColumn += NEW_CHUNK_SIZE;
1439  }
1440  int iColumn0 = thisColumn[0];
1441  double value0 = region[iColumn0];
1442  value0 OPERATION multiplier *element[0];
1443  region[iColumn0] = value0;
1444}
1445SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus2)(int numberIn, CoinFactorizationDouble multiplier,
1446  const CoinFactorizationDouble *COIN_RESTRICT element,
1447  CoinFactorizationDouble *COIN_RESTRICT region)
1448{
1449  assert((numberIn & 3) == 2);
1450  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1451  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1452#if AVX2 == 1
1453  double temp[4];
1454#endif
1455  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1456    coin_prefetch_const(element + 16);
1457    coin_prefetch_const(thisColumn + 32);
1458#if NEW_CHUNK_SIZE == 2
1459    int iColumn0 = thisColumn[0];
1460    int iColumn1 = thisColumn[1];
1461    double value0 = region[iColumn0];
1462    double value1 = region[iColumn1];
1463    value0 OPERATION multiplier *element[0];
1464    value1 OPERATION multiplier *element[1];
1465    region[iColumn0] = value0;
1466    region[iColumn1] = value1;
1467#elif NEW_CHUNK_SIZE == 4
1468    int iColumn0 = thisColumn[0];
1469    int iColumn1 = thisColumn[1];
1470    int iColumn2 = thisColumn[2];
1471    int iColumn3 = thisColumn[3];
1472#if AVX2 == 1
1473    __v2df bb;
1474    set_const_v2df(bb, multiplier);
1475    temp[0] = region[iColumn0];
1476    temp[1] = region[iColumn1];
1477    temp[2] = region[iColumn2];
1478    temp[3] = region[iColumn3];
1479    __v2df v0 = __builtin_ia32_loadupd(temp);
1480    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1481    __v2df a = __builtin_ia32_loadupd(element);
1482    a *= bb;
1483    v0 OPERATION a;
1484    a = __builtin_ia32_loadupd(element + 2);
1485    a *= bb;
1486    v1 OPERATION a;
1487    __builtin_ia32_storeupd(temp, v0);
1488    __builtin_ia32_storeupd(temp + 2, v1);
1489    region[iColumn0] = temp[0];
1490    region[iColumn1] = temp[1];
1491    region[iColumn2] = temp[2];
1492    region[iColumn3] = temp[3];
1493#else
1494    double value0 = region[iColumn0];
1495    double value1 = region[iColumn1];
1496    double value2 = region[iColumn2];
1497    double value3 = region[iColumn3];
1498    value0 OPERATION multiplier *element[0];
1499    value1 OPERATION multiplier *element[1];
1500    value2 OPERATION multiplier *element[2];
1501    value3 OPERATION multiplier *element[3];
1502    region[iColumn0] = value0;
1503    region[iColumn1] = value1;
1504    region[iColumn2] = value2;
1505    region[iColumn3] = value3;
1506#endif
1507#else
1508    abort();
1509#endif
1510    element += NEW_CHUNK_SIZE;
1511    thisColumn += NEW_CHUNK_SIZE;
1512  }
1513#if NEW_CHUNK_SIZE == 4
1514  int iColumn0 = thisColumn[0];
1515  int iColumn1 = thisColumn[1];
1516  double value0 = region[iColumn0];
1517  double value1 = region[iColumn1];
1518  value0 OPERATION multiplier *element[0];
1519  value1 OPERATION multiplier *element[1];
1520  region[iColumn0] = value0;
1521  region[iColumn1] = value1;
1522#endif
1523}
1524SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus3)(int numberIn, CoinFactorizationDouble multiplier,
1525  const CoinFactorizationDouble *COIN_RESTRICT element,
1526  CoinFactorizationDouble *COIN_RESTRICT region)
1527{
1528  assert((numberIn & 3) == 3);
1529  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
1530  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
1531#if AVX2 == 1
1532  double temp[4];
1533#endif
1534  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
1535    coin_prefetch_const(element + 16);
1536    coin_prefetch_const(thisColumn + 32);
1537#if NEW_CHUNK_SIZE == 2
1538    int iColumn0 = thisColumn[0];
1539    int iColumn1 = thisColumn[1];
1540    double value0 = region[iColumn0];
1541    double value1 = region[iColumn1];
1542    value0 OPERATION multiplier *element[0];
1543    value1 OPERATION multiplier *element[1];
1544    region[iColumn0] = value0;
1545    region[iColumn1] = value1;
1546#elif NEW_CHUNK_SIZE == 4
1547    int iColumn0 = thisColumn[0];
1548    int iColumn1 = thisColumn[1];
1549    int iColumn2 = thisColumn[2];
1550    int iColumn3 = thisColumn[3];
1551#if AVX2 == 1
1552    __v2df bb;
1553    set_const_v2df(bb, multiplier);
1554    temp[0] = region[iColumn0];
1555    temp[1] = region[iColumn1];
1556    temp[2] = region[iColumn2];
1557    temp[3] = region[iColumn3];
1558    __v2df v0 = __builtin_ia32_loadupd(temp);
1559    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
1560    __v2df a = __builtin_ia32_loadupd(element);
1561    a *= bb;
1562    v0 OPERATION a;
1563    a = __builtin_ia32_loadupd(element + 2);
1564    a *= bb;
1565    v1 OPERATION a;
1566    __builtin_ia32_storeupd(temp, v0);
1567    __builtin_ia32_storeupd(temp + 2, v1);
1568    region[iColumn0] = temp[0];
1569    region[iColumn1] = temp[1];
1570    region[iColumn2] = temp[2];
1571    region[iColumn3] = temp[3];
1572#else
1573    double value0 = region[iColumn0];
1574    double value1 = region[iColumn1];
1575    double value2 = region[iColumn2];
1576    double value3 = region[iColumn3];
1577    value0 OPERATION multiplier *element[0];
1578    value1 OPERATION multiplier *element[1];
1579    value2 OPERATION multiplier *element[2];
1580    value3 OPERATION multiplier *element[3];
1581    region[iColumn0] = value0;
1582    region[iColumn1] = value1;
1583    region[iColumn2] = value2;
1584    region[iColumn3] = value3;
1585#endif
1586#else
1587    abort();
1588#endif
1589    element += NEW_CHUNK_SIZE;
1590    thisColumn += NEW_CHUNK_SIZE;
1591  }
1592#if NEW_CHUNK_SIZE == 2
1593  int iColumn0 = thisColumn[0];
1594  double value0 = region[iColumn0];
1595  value0 OPERATION multiplier *element[0];
1596  region[iColumn0] = value0;
1597#else
1598  int iColumn0 = thisColumn[0];
1599  int iColumn1 = thisColumn[1];
1600  int iColumn2 = thisColumn[2];
1601  double value0 = region[iColumn0];
1602  double value1 = region[iColumn1];
1603  double value2 = region[iColumn2];
1604  value0 OPERATION multiplier *element[0];
1605  value1 OPERATION multiplier *element[1];
1606  value2 OPERATION multiplier *element[2];
1607  region[iColumn0] = value0;
1608  region[iColumn1] = value1;
1609  region[iColumn2] = value2;
1610#endif
1611}
1612#endif
1613
1614/* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
1615*/
Note: See TracBrowser for help on using the repository browser.