source: stable/1.15/Clp/src/CoinAbcHelperFunctions.hpp @ 1949

Last change on this file since 1949 was 1910, checked in by stefan, 7 years ago
  • add configure option --enable-aboca={1,2,3,4,yes,no}
  • compile Aboca source only if --enable-aboca set (instead of compiling empty source files)
  • fix svn properties
  • Property svn:keywords set to Id
File size: 58.4 KB
Line 
1/* $Id: CoinAbcHelperFunctions.hpp 1910 2013-01-27 02:00:13Z forrest $ */
2// Copyright (C) 2003, International Business Machines
3// Corporation and others, Copyright (C) 2012, FasterCoin.  All Rights Reserved.
4// This code is licensed under the terms of the Eclipse Public License (EPL).
5
6#ifndef CoinAbcHelperFunctions_H
7#define CoinAbcHelperFunctions_H
8
9#include "ClpConfig.h"
10#ifdef HAVE_CMATH
11# include <cmath>
12#else
13# ifdef HAVE_MATH_H
14#  include <math.h>
15# else
16# include <cmath>
17# endif
18#endif
19#include "CoinAbcCommon.hpp"
20#ifndef abc_assert
21#define abc_assert(condition)                                                   \
22  { if (!condition) {printf("abc_assert in %s at line %d - %s is false\n", \
23                            __FILE__, __LINE__, __STRING(condition)); abort();} }
24#endif
25// cilk_for granularity.
26#define CILK_FOR_GRAINSIZE 128
27//#define AVX2 2
28#if AVX2==1
29#include "emmintrin.h"
30#elif AVX2==2
31#include <immintrin.h>
32#elif AVX2==3
33#include "avx2intrin.h"
34#endif
35//#define __AVX__ 1
36//#define __AVX2__ 1
37/**
38    Note (JJF) I have added some operations on arrays even though they may
39    duplicate CoinDenseVector.
40
41*/
42
43#define UNROLL_SCATTER 2
44#define INLINE_SCATTER 1
45#if INLINE_SCATTER==0
46void CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
47                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
48                          const int *  COIN_RESTRICT thisIndex,
49                          CoinFactorizationDouble * COIN_RESTRICT region);
50#else
51void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
52                                            const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
53                                            const int *  COIN_RESTRICT thisIndex,
54                                            CoinFactorizationDouble * COIN_RESTRICT region)
55{
56#if UNROLL_SCATTER==0
57  for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
58    CoinSimplexInt iRow = thisIndex[j];
59    CoinFactorizationDouble regionValue = region[iRow];
60    CoinFactorizationDouble value = thisElement[j];
61    assert (value);
62    region[iRow] = regionValue - value * pivotValue;
63  }
64#elif UNROLL_SCATTER==1
65  if ((number&1)!=0) {
66    number--;
67    CoinSimplexInt iRow = thisIndex[number];
68    CoinFactorizationDouble regionValue = region[iRow];
69    CoinFactorizationDouble value = thisElement[number];
70    region[iRow] = regionValue - value * pivotValue;
71  }
72  for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
73    CoinSimplexInt iRow0 = thisIndex[j];
74    CoinSimplexInt iRow1 = thisIndex[j-1];
75    CoinFactorizationDouble regionValue0 = region[iRow0];
76    CoinFactorizationDouble regionValue1 = region[iRow1];
77    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
78    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
79  }
80#elif UNROLL_SCATTER==2
81  if ((number&1)!=0) {
82    number--;
83    CoinSimplexInt iRow = thisIndex[number];
84    CoinFactorizationDouble regionValue = region[iRow];
85    CoinFactorizationDouble value = thisElement[number]; 
86    region[iRow] = regionValue - value * pivotValue;
87  }
88  if ((number&2)!=0) {
89    CoinSimplexInt iRow0 = thisIndex[number-1];
90    CoinFactorizationDouble regionValue0 = region[iRow0];
91    CoinFactorizationDouble value0 = thisElement[number-1]; 
92    CoinSimplexInt iRow1 = thisIndex[number-2];
93    CoinFactorizationDouble regionValue1 = region[iRow1];
94    CoinFactorizationDouble value1 = thisElement[number-2]; 
95    region[iRow0] = regionValue0 - value0 * pivotValue;
96    region[iRow1] = regionValue1 - value1 * pivotValue;
97    number-=2;
98  }
99#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
100  for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
101    CoinSimplexInt iRow0 = thisIndex[j];
102    CoinSimplexInt iRow1 = thisIndex[j-1];
103    CoinFactorizationDouble regionValue0 = region[iRow0];
104    CoinFactorizationDouble regionValue1 = region[iRow1];
105    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
106    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
107    CoinSimplexInt iRow2 = thisIndex[j-2];
108    CoinSimplexInt iRow3 = thisIndex[j-3];
109    CoinFactorizationDouble regionValue2 = region[iRow2];
110    CoinFactorizationDouble regionValue3 = region[iRow3];
111    region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
112    region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
113  }
114#elif UNROLL_SCATTER==3
115  CoinSimplexInt iRow0;
116  CoinSimplexInt iRow1;
117  CoinFactorizationDouble regionValue0;
118  CoinFactorizationDouble regionValue1;
119  switch(static_cast<unsigned int>(number)) {
120  case 0:
121    break;
122  case 1:
123    iRow0 = thisIndex[0];
124    regionValue0 = region[iRow0];
125    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
126    break;
127  case 2:
128    iRow0 = thisIndex[0];
129    iRow1 = thisIndex[1];
130    regionValue0 = region[iRow0];
131    regionValue1 = region[iRow1];
132    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
133    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
134    break;
135  case 3:
136    iRow0 = thisIndex[0];
137    iRow1 = thisIndex[1];
138    regionValue0 = region[iRow0];
139    regionValue1 = region[iRow1];
140    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
141    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
142    iRow0 = thisIndex[2];
143    regionValue0 = region[iRow0];
144    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
145    break;
146  case 4:
147    iRow0 = thisIndex[0];
148    iRow1 = thisIndex[1];
149    regionValue0 = region[iRow0];
150    regionValue1 = region[iRow1];
151    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
152    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
153    iRow0 = thisIndex[2];
154    iRow1 = thisIndex[3];
155    regionValue0 = region[iRow0];
156    regionValue1 = region[iRow1];
157    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
158    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
159    break;
160  case 5:
161    iRow0 = thisIndex[0];
162    iRow1 = thisIndex[1];
163    regionValue0 = region[iRow0];
164    regionValue1 = region[iRow1];
165    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
166    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
167    iRow0 = thisIndex[2];
168    iRow1 = thisIndex[3];
169    regionValue0 = region[iRow0];
170    regionValue1 = region[iRow1];
171    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
172    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
173    iRow0 = thisIndex[4];
174    regionValue0 = region[iRow0];
175    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
176    break;
177  case 6:
178    iRow0 = thisIndex[0];
179    iRow1 = thisIndex[1];
180    regionValue0 = region[iRow0];
181    regionValue1 = region[iRow1];
182    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
183    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
184    iRow0 = thisIndex[2];
185    iRow1 = thisIndex[3];
186    regionValue0 = region[iRow0];
187    regionValue1 = region[iRow1];
188    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
189    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
190    iRow0 = thisIndex[4];
191    iRow1 = thisIndex[5];
192    regionValue0 = region[iRow0];
193    regionValue1 = region[iRow1];
194    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
195    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
196    break;
197  case 7:
198    iRow0 = thisIndex[0];
199    iRow1 = thisIndex[1];
200    regionValue0 = region[iRow0];
201    regionValue1 = region[iRow1];
202    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
203    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
204    iRow0 = thisIndex[2];
205    iRow1 = thisIndex[3];
206    regionValue0 = region[iRow0];
207    regionValue1 = region[iRow1];
208    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
209    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
210    iRow0 = thisIndex[4];
211    iRow1 = thisIndex[5];
212    regionValue0 = region[iRow0];
213    regionValue1 = region[iRow1];
214    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
215    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
216    iRow0 = thisIndex[6];
217    regionValue0 = region[iRow0];
218    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
219    break;
220  case 8:
221    iRow0 = thisIndex[0];
222    iRow1 = thisIndex[1];
223    regionValue0 = region[iRow0];
224    regionValue1 = region[iRow1];
225    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
226    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
227    iRow0 = thisIndex[2];
228    iRow1 = thisIndex[3];
229    regionValue0 = region[iRow0];
230    regionValue1 = region[iRow1];
231    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
232    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
233    iRow0 = thisIndex[4];
234    iRow1 = thisIndex[5];
235    regionValue0 = region[iRow0];
236    regionValue1 = region[iRow1];
237    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
238    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
239    iRow0 = thisIndex[6];
240    iRow1 = thisIndex[7];
241    regionValue0 = region[iRow0];
242    regionValue1 = region[iRow1];
243    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
244    region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
245    break;
246  default:
247    if ((number&1)!=0) {
248      number--;
249      CoinSimplexInt iRow = thisIndex[number];
250      CoinFactorizationDouble regionValue = region[iRow];
251      CoinFactorizationDouble value = thisElement[number];
252      region[iRow] = regionValue - value * pivotValue;
253    }
254    for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
255      CoinSimplexInt iRow0 = thisIndex[j];
256      CoinSimplexInt iRow1 = thisIndex[j-1];
257      CoinFactorizationDouble regionValue0 = region[iRow0];
258      CoinFactorizationDouble regionValue1 = region[iRow1];
259      region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
260      region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
261    }
262    break;
263  }
264#endif
265}
266void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
267                                            const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
268                                            CoinFactorizationDouble * COIN_RESTRICT region)
269{
270#if UNROLL_SCATTER==0
271  const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
272  for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
273    CoinSimplexInt iRow = thisIndex[j];
274    CoinFactorizationDouble regionValue = region[iRow];
275    CoinFactorizationDouble value = thisElement[j];
276    assert (value);
277    region[iRow] = regionValue - value * pivotValue;
278  }
279#elif UNROLL_SCATTER==1
280  const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
281  if ((number&1)!=0) {
282    number--;
283    CoinSimplexInt iRow = thisIndex[number];
284    CoinFactorizationDouble regionValue = region[iRow];
285    CoinFactorizationDouble value = thisElement[number];
286    region[iRow] = regionValue - value * pivotValue;
287  }
288  for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
289    CoinSimplexInt iRow0 = thisIndex[j];
290    CoinSimplexInt iRow1 = thisIndex[j-1];
291    CoinFactorizationDouble regionValue0 = region[iRow0];
292    CoinFactorizationDouble regionValue1 = region[iRow1];
293    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
294    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
295  }
296#elif UNROLL_SCATTER==2
297  const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
298  if ((number&1)!=0) {
299    number--;
300    CoinSimplexInt iRow = thisIndex[number];
301    CoinFactorizationDouble regionValue = region[iRow];
302    CoinFactorizationDouble value = thisElement[number]; 
303    region[iRow] = regionValue - value * pivotValue;
304  }
305  if ((number&2)!=0) {
306    CoinSimplexInt iRow0 = thisIndex[number-1];
307    CoinFactorizationDouble regionValue0 = region[iRow0];
308    CoinFactorizationDouble value0 = thisElement[number-1]; 
309    CoinSimplexInt iRow1 = thisIndex[number-2];
310    CoinFactorizationDouble regionValue1 = region[iRow1];
311    CoinFactorizationDouble value1 = thisElement[number-2]; 
312    region[iRow0] = regionValue0 - value0 * pivotValue;
313    region[iRow1] = regionValue1 - value1 * pivotValue;
314    number-=2;
315  }
316#if AVX2==22
317  CoinFactorizationDouble temp[4] __attribute__ ((aligned (32)));
318  __m256d pv = _mm256_broadcast_sd(&pivotValue);
319  for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
320    __m256d elements=_mm256_loadu_pd(thisElement+j-3);
321    CoinSimplexInt iRow0 = thisIndex[j-3];
322    CoinSimplexInt iRow1 = thisIndex[j-2];
323    CoinSimplexInt iRow2 = thisIndex[j-1];
324    CoinSimplexInt iRow3 = thisIndex[j-0];
325    temp[0] = region[iRow0];
326    temp[1] = region[iRow1];
327    temp[2] = region[iRow2];
328    temp[3] = region[iRow3];
329    __m256d t0=_mm256_load_pd(temp);
330    t0 -= pv*elements;
331    _mm256_store_pd (temp, t0);
332    region[iRow0] = temp[0];
333    region[iRow1] = temp[1];
334    region[iRow2] = temp[2];
335    region[iRow3] = temp[3];
336  }
337#else
338#pragma cilk_grainsize=CILK_FOR_GRAINSIZE
339  for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
340    CoinSimplexInt iRow0 = thisIndex[j];
341    CoinSimplexInt iRow1 = thisIndex[j-1];
342    CoinFactorizationDouble regionValue0 = region[iRow0];
343    CoinFactorizationDouble regionValue1 = region[iRow1];
344    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
345    region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
346    CoinSimplexInt iRow2 = thisIndex[j-2];
347    CoinSimplexInt iRow3 = thisIndex[j-3];
348    CoinFactorizationDouble regionValue2 = region[iRow2];
349    CoinFactorizationDouble regionValue3 = region[iRow3];
350    region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
351    region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
352  }
353#endif
354#elif UNROLL_SCATTER==3
355  const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
356  CoinSimplexInt iRow0;
357  CoinSimplexInt iRow1;
358  CoinFactorizationDouble regionValue0;
359  CoinFactorizationDouble regionValue1;
360  switch(static_cast<unsigned int>(number)) {
361  case 0:
362    break;
363  case 1:
364    iRow0 = thisIndex[0];
365    regionValue0 = region[iRow0];
366    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
367    break;
368  case 2:
369    iRow0 = thisIndex[0];
370    iRow1 = thisIndex[1];
371    regionValue0 = region[iRow0];
372    regionValue1 = region[iRow1];
373    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
374    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
375    break;
376  case 3:
377    iRow0 = thisIndex[0];
378    iRow1 = thisIndex[1];
379    regionValue0 = region[iRow0];
380    regionValue1 = region[iRow1];
381    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
382    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
383    iRow0 = thisIndex[2];
384    regionValue0 = region[iRow0];
385    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
386    break;
387  case 4:
388    iRow0 = thisIndex[0];
389    iRow1 = thisIndex[1];
390    regionValue0 = region[iRow0];
391    regionValue1 = region[iRow1];
392    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
393    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
394    iRow0 = thisIndex[2];
395    iRow1 = thisIndex[3];
396    regionValue0 = region[iRow0];
397    regionValue1 = region[iRow1];
398    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
399    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
400    break;
401  case 5:
402    iRow0 = thisIndex[0];
403    iRow1 = thisIndex[1];
404    regionValue0 = region[iRow0];
405    regionValue1 = region[iRow1];
406    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
407    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
408    iRow0 = thisIndex[2];
409    iRow1 = thisIndex[3];
410    regionValue0 = region[iRow0];
411    regionValue1 = region[iRow1];
412    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
413    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
414    iRow0 = thisIndex[4];
415    regionValue0 = region[iRow0];
416    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
417    break;
418  case 6:
419    iRow0 = thisIndex[0];
420    iRow1 = thisIndex[1];
421    regionValue0 = region[iRow0];
422    regionValue1 = region[iRow1];
423    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
424    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
425    iRow0 = thisIndex[2];
426    iRow1 = thisIndex[3];
427    regionValue0 = region[iRow0];
428    regionValue1 = region[iRow1];
429    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
430    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
431    iRow0 = thisIndex[4];
432    iRow1 = thisIndex[5];
433    regionValue0 = region[iRow0];
434    regionValue1 = region[iRow1];
435    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
436    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
437    break;
438  case 7:
439    iRow0 = thisIndex[0];
440    iRow1 = thisIndex[1];
441    regionValue0 = region[iRow0];
442    regionValue1 = region[iRow1];
443    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
444    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
445    iRow0 = thisIndex[2];
446    iRow1 = thisIndex[3];
447    regionValue0 = region[iRow0];
448    regionValue1 = region[iRow1];
449    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
450    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
451    iRow0 = thisIndex[4];
452    iRow1 = thisIndex[5];
453    regionValue0 = region[iRow0];
454    regionValue1 = region[iRow1];
455    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
456    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
457    iRow0 = thisIndex[6];
458    regionValue0 = region[iRow0];
459    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
460    break;
461  case 8:
462    iRow0 = thisIndex[0];
463    iRow1 = thisIndex[1];
464    regionValue0 = region[iRow0];
465    regionValue1 = region[iRow1];
466    region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
467    region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
468    iRow0 = thisIndex[2];
469    iRow1 = thisIndex[3];
470    regionValue0 = region[iRow0];
471    regionValue1 = region[iRow1];
472    region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
473    region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
474    iRow0 = thisIndex[4];
475    iRow1 = thisIndex[5];
476    regionValue0 = region[iRow0];
477    regionValue1 = region[iRow1];
478    region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
479    region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
480    iRow0 = thisIndex[6];
481    iRow1 = thisIndex[7];
482    regionValue0 = region[iRow0];
483    regionValue1 = region[iRow1];
484    region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
485    region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
486    break;
487  default:
488    if ((number&1)!=0) {
489      number--;
490      CoinSimplexInt iRow = thisIndex[number];
491      CoinFactorizationDouble regionValue = region[iRow];
492      CoinFactorizationDouble value = thisElement[number];
493      region[iRow] = regionValue - value * pivotValue;
494    }
495    for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
496      CoinSimplexInt iRow0 = thisIndex[j];
497      CoinSimplexInt iRow1 = thisIndex[j-1];
498      CoinFactorizationDouble regionValue0 = region[iRow0];
499      CoinFactorizationDouble regionValue1 = region[iRow1];
500      region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
501      region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
502    }
503    break;
504  }
505#endif
506}
507#endif
508//#define COIN_PREFETCH
509#ifdef COIN_PREFETCH
510#if 1
511#define coin_prefetch(mem)                                              \
512  __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<char *>(mem))))
513#define coin_prefetch_const(mem)                                        \
514  __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<const char *>(mem))))
515#else
516#define coin_prefetch(mem)                                              \
517  __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<char *>(mem))))
518#define coin_prefetch_const(mem)                                        \
519  __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<const char *>(mem))))
520#endif
521#else
522// dummy
523#define coin_prefetch(mem)
524#define coin_prefetch_const(mem)
525#endif
526#define NEW_CHUNK_SIZE 4
527#define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE+NEW_CHUNK_SIZE/2);
528#define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE/2)
529// leaf, pure, nothrow and hot give warnings
530// fastcall and sseregparm give wrong results
531//#define SCATTER_ATTRIBUTE __attribute__ ((leaf,fastcall,pure,sseregparm,nothrow,hot))
532#define SCATTER_ATTRIBUTE
533typedef void (*scatterUpdate) (int,CoinFactorizationDouble,const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE ;
534typedef struct {
535  scatterUpdate functionPointer;
536  CoinBigIndex offset;
537  int number;
538} scatterStruct;
539void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble multiplier,
540                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
541                          CoinFactorizationDouble * COIN_RESTRICT region)  SCATTER_ATTRIBUTE ;
542void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
543                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
544                           CoinFactorizationDouble * COIN_RESTRICT region)  SCATTER_ATTRIBUTE ;
545void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
546                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
547                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
548void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
549                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
550                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
551void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
552                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
553                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
554void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
555                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
556                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
557void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
558                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
559                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
560void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
561                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
562                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
563void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
564                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
565                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
566void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
567                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
568                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
569void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
570                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
571                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
572void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
573                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
574                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
575void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
576                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
577                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
578void CoinAbcScatterUpdate1Subtract(int numberIn, CoinFactorizationDouble multiplier,
579                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
580                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
581void CoinAbcScatterUpdate2Subtract(int numberIn, CoinFactorizationDouble multiplier,
582                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
583                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
584void CoinAbcScatterUpdate3Subtract(int numberIn, CoinFactorizationDouble multiplier,
585                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
586                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
587void CoinAbcScatterUpdate4Subtract(int numberIn, CoinFactorizationDouble multiplier,
588                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
589                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
590void CoinAbcScatterUpdate5Subtract(int numberIn, CoinFactorizationDouble multiplier,
591                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
592                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
593void CoinAbcScatterUpdate6Subtract(int numberIn, CoinFactorizationDouble multiplier,
594                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
595                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
596void CoinAbcScatterUpdate7Subtract(int numberIn, CoinFactorizationDouble multiplier,
597                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
598                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
599void CoinAbcScatterUpdate8Subtract(int numberIn, CoinFactorizationDouble multiplier,
600                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
601                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
602void CoinAbcScatterUpdate4NSubtract(int numberIn, CoinFactorizationDouble multiplier,
603                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
604                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
605void CoinAbcScatterUpdate4NPlus1Subtract(int numberIn, CoinFactorizationDouble multiplier,
606                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
607                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
608void CoinAbcScatterUpdate4NPlus2Subtract(int numberIn, CoinFactorizationDouble multiplier,
609                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
610                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
611void CoinAbcScatterUpdate4NPlus3Subtract(int numberIn, CoinFactorizationDouble multiplier,
612                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
613                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
614void CoinAbcScatterUpdate1Add(int numberIn, CoinFactorizationDouble multiplier,
615                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
616                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
617void CoinAbcScatterUpdate2Add(int numberIn, CoinFactorizationDouble multiplier,
618                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
619                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
620void CoinAbcScatterUpdate3Add(int numberIn, CoinFactorizationDouble multiplier,
621                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
622                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
623void CoinAbcScatterUpdate4Add(int numberIn, CoinFactorizationDouble multiplier,
624                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
625                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
626void CoinAbcScatterUpdate5Add(int numberIn, CoinFactorizationDouble multiplier,
627                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
628                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
629void CoinAbcScatterUpdate6Add(int numberIn, CoinFactorizationDouble multiplier,
630                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
631                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
632void CoinAbcScatterUpdate7Add(int numberIn, CoinFactorizationDouble multiplier,
633                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
634                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
635void CoinAbcScatterUpdate8Add(int numberIn, CoinFactorizationDouble multiplier,
636                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
637                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
638void CoinAbcScatterUpdate4NAdd(int numberIn, CoinFactorizationDouble multiplier,
639                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
640                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
641void CoinAbcScatterUpdate4NPlus1Add(int numberIn, CoinFactorizationDouble multiplier,
642                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
643                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
644void CoinAbcScatterUpdate4NPlus2Add(int numberIn, CoinFactorizationDouble multiplier,
645                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
646                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
647void CoinAbcScatterUpdate4NPlus3Add(int numberIn, CoinFactorizationDouble multiplier,
648                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
649                          CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
650#if INLINE_SCATTER==0
651void CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
652                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
653                          const int *  COIN_RESTRICT thisIndex,
654                          CoinFactorizationDouble * COIN_RESTRICT region,
655                          double * COIN_RESTRICT work);
656#else
657#if 0
658void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
659                                            const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
660                                            const int *  COIN_RESTRICT thisIndex,
661                                            CoinFactorizationDouble * COIN_RESTRICT region,
662                                            double * COIN_RESTRICT /*work*/)
663{
664#if UNROLL_SCATTER==0
665  for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
666    CoinSimplexInt iRow = thisIndex[j];
667    CoinFactorizationDouble regionValue = region[iRow];
668    CoinFactorizationDouble value = thisElement[j];
669    assert (value);
670    region[iRow] = regionValue - value * pivotValue;
671  }
672#elif UNROLL_SCATTER==1
673  if ((number&1)!=0) {
674    CoinSimplexInt iRow = thisIndex[0];
675    thisIndex++;
676    CoinFactorizationDouble regionValue = region[iRow];
677    CoinFactorizationDouble value = thisElement[0];
678    thisElement++;
679    region[iRow] = regionValue - value * pivotValue;
680  }
681  number = number>>1;
682  CoinFactorizationDouble work2[4];
683  for ( ; number !=0; number-- ) {
684    CoinSimplexInt iRow0 = thisIndex[0];
685    CoinSimplexInt iRow1 = thisIndex[1];
686    work2[0] = region[iRow0];
687    work2[1] = region[iRow1];
688#if 0
689    work2[2] = region[iRow0];
690    work2[3] = region[iRow1];
691    //__v4df b = __builtin_ia32_maskloadpd256(work2);
692    __v4df b = __builtin_ia32_loadupd256(work2);
693    //__v4df b = _mm256_load_pd(work2);
694#endif
695    work2[0] -= thisElement[0] * pivotValue;
696    work2[1] -= thisElement[1] * pivotValue;
697    region[iRow0] = work2[0];
698    region[iRow1] = work2[1];
699    thisIndex+=2;
700    thisElement+=2;
701  }
702#endif
703}
704#endif
705#endif
706#define UNROLL_GATHER 0
707#define INLINE_GATHER 1
708#if INLINE_GATHER==0
709CoinFactorizationDouble CoinAbcGatherUpdate(CoinSimplexInt number,
710                          const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
711                          const int *  COIN_RESTRICT thisIndex,
712                           CoinFactorizationDouble * COIN_RESTRICT region);
713#else
714CoinFactorizationDouble ABC_INLINE inline CoinAbcGatherUpdate(CoinSimplexInt number,
715                                             const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
716                                             const int *  COIN_RESTRICT thisIndex,
717                                             CoinFactorizationDouble * COIN_RESTRICT region)
718{
719#if UNROLL_GATHER==0
720  CoinFactorizationDouble pivotValue=0.0;
721  for (CoinBigIndex j = 0; j < number; j ++ ) {
722    CoinFactorizationDouble value = thisElement[j];
723    CoinSimplexInt jRow = thisIndex[j];
724    value *= region[jRow];
725    pivotValue -= value;
726  }
727  return pivotValue;
728#else
729#error code
730#endif
731}
732#endif
733#define UNROLL_MULTIPLY_INDEXED 0
734#define INLINE_MULTIPLY_INDEXED 0
735#if INLINE_MULTIPLY_INDEXED==0
736void CoinAbcMultiplyIndexed(int number,
737                            const double *  COIN_RESTRICT multiplier,
738                            const int *  COIN_RESTRICT thisIndex,
739                            CoinFactorizationDouble * COIN_RESTRICT region);
740void CoinAbcMultiplyIndexed(int number,
741                            const long double *  COIN_RESTRICT multiplier,
742                            const int *  COIN_RESTRICT thisIndex,
743                            long double * COIN_RESTRICT region);
744#else
745void ABC_INLINE inline CoinAbcMultiplyIndexed(int number,
746                            const double *  COIN_RESTRICT multiplier,
747                            const int *  COIN_RESTRICT thisIndex,
748                            CoinFactorizationDouble * COIN_RESTRICT region)
749{
750}
751#endif
752double CoinAbcMaximumAbsElement(const double * region, int size);
753void CoinAbcMinMaxAbsElement(const double * region, int size,double & minimum , double & maximum);
754void CoinAbcMinMaxAbsNormalValues(const double * region, int size,double & minimum , double & maximum);
755void CoinAbcScale(double * region, double multiplier,int size);
756void CoinAbcScaleNormalValues(double * region, double multiplier,double killIfLessThanThis,int size);
757/// maximum fabs(region[i]) and then region[i]*=multiplier
758double CoinAbcMaximumAbsElementAndScale(double * region, double multiplier,int size);
759void CoinAbcSetElements(double * region, int size, double value);
760void CoinAbcMultiplyAdd(const double * region1, int size, double multiplier1,
761                 double * regionChanged, double multiplier2);
762double CoinAbcInnerProduct(const double * region1, int size, const double * region2);
763void CoinAbcGetNorms(const double * region, int size, double & norm1, double & norm2);
764/// regionTo[index[i]]=regionFrom[i]
765void CoinAbcScatterTo(const double * regionFrom, double * regionTo, const int * index,int number);
766/// regionTo[i]=regionFrom[index[i]]
767void CoinAbcGatherFrom(const double * regionFrom, double * regionTo, const int * index,int number);
768/// regionTo[index[i]]=0.0
769void CoinAbcScatterZeroTo(double * regionTo, const int * index,int number);
770/// regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
771void CoinAbcScatterToList(const double * regionFrom, double * regionTo, 
772                   const int * indexList, const int * indexScatter ,int number);
773/// array[i]=1.0/sqrt(array[i])
774void CoinAbcInverseSqrts(double * array, int n);
775void CoinAbcReciprocal(double * array, int n, const double *input);
776void CoinAbcMemcpyLong(double * array,const double * arrayFrom,int size);
777void CoinAbcMemcpyLong(int * array,const int * arrayFrom,int size);
778void CoinAbcMemcpyLong(unsigned char * array,const unsigned char * arrayFrom,int size);
779void CoinAbcMemset0Long(double * array,int size);
780void CoinAbcMemset0Long(int * array,int size);
781void CoinAbcMemset0Long(unsigned char * array,int size);
782void CoinAbcMemmove(double * array,const double * arrayFrom,int size);
783void CoinAbcMemmove(int * array,const int * arrayFrom,int size);
784void CoinAbcMemmove(unsigned char * array,const unsigned char * arrayFrom,int size);
785/// This moves down and zeroes out end
786void CoinAbcMemmoveAndZero(double * array,double * arrayFrom,int size);
787/// This compacts several sections and zeroes out end (returns number)
788int CoinAbcCompact(int numberSections,int alreadyDone,double * array,const int * starts, const int * lengths); 
789/// This compacts several sections (returns number)
790int CoinAbcCompact(int numberSections,int alreadyDone,int * array,const int * starts, const int * lengths); 
791#endif
792#if ABC_CREATE_SCATTER_FUNCTION
793SCATTER_ATTRIBUTE void functionName(ScatterUpdate1)(int numberIn, CoinFactorizationDouble multiplier,
794                          const CoinFactorizationDouble *  COIN_RESTRICT element,
795                          CoinFactorizationDouble * COIN_RESTRICT region)
796{
797#ifndef NDEBUG
798  assert (numberIn==1);
799#endif
800  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+1);
801  int iColumn0=thisColumn[0];
802  double value0=region[iColumn0];
803  value0 OPERATION multiplier*element[0];
804  region[iColumn0]=value0;
805}
806SCATTER_ATTRIBUTE void functionName(ScatterUpdate2)(int numberIn, CoinFactorizationDouble multiplier,
807                          const CoinFactorizationDouble *  COIN_RESTRICT element,
808                          CoinFactorizationDouble * COIN_RESTRICT region)
809{
810#ifndef NDEBUG
811  assert (numberIn==2);
812#endif
813  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+2);
814#if NEW_CHUNK_SIZE==2
815  int nFull=2&(~(NEW_CHUNK_SIZE-1));
816  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
817    coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
818    int iColumn0=thisColumn[0];
819    int iColumn1=thisColumn[1];
820    CoinFactorizationDouble value0=region[iColumn0];
821    CoinFactorizationDouble value1=region[iColumn1];
822    value0 OPERATION multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
823    value1 OPERATION multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
824    region[iColumn0]=value0;
825    region[iColumn1]=value1;
826    element+=NEW_CHUNK_SIZE_INCREMENT;
827    thisColumn = reinterpret_cast<const int *>(element);
828  }
829#endif
830#if NEW_CHUNK_SIZE==4
831  int iColumn0=thisColumn[0];
832  int iColumn1=thisColumn[1];
833  CoinFactorizationDouble value0=region[iColumn0];
834  CoinFactorizationDouble value1=region[iColumn1];
835  value0 OPERATION multiplier*element[0];
836  value1 OPERATION multiplier*element[1];
837  region[iColumn0]=value0;
838  region[iColumn1]=value1;
839#endif
840}
841SCATTER_ATTRIBUTE void functionName(ScatterUpdate3)(int numberIn, CoinFactorizationDouble multiplier,
842                          const CoinFactorizationDouble *  COIN_RESTRICT element,
843                          CoinFactorizationDouble * COIN_RESTRICT region)
844{
845#ifndef NDEBUG
846  assert (numberIn==3);
847#endif
848  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+3);
849#if AVX2==1
850  double temp[2];
851#endif
852#if NEW_CHUNK_SIZE==2
853  int nFull=3&(~(NEW_CHUNK_SIZE-1));
854  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
855    //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
856    int iColumn0=thisColumn[0];
857    int iColumn1=thisColumn[1];
858    CoinFactorizationDouble value0=region[iColumn0];
859    CoinFactorizationDouble value1=region[iColumn1];
860    value0 OPERATION multiplier*element[0];
861    value1 OPERATION multiplier*element[1];
862    region[iColumn0]=value0;
863    region[iColumn1]=value1;
864    element+=NEW_CHUNK_SIZE;
865    thisColumn+ = NEW_CHUNK_SIZE;
866  }
867#endif
868#if NEW_CHUNK_SIZE==2
869  int iColumn0=thisColumn[0];
870  double value0=region[iColumn0];
871  value0 OPERATION multiplier*element[0];
872  region[iColumn0]=value0;
873#else
874  int iColumn0=thisColumn[0];
875  int iColumn1=thisColumn[1];
876  int iColumn2=thisColumn[2];
877#if AVX2==1
878  __v2df bb;
879  double value2=region[iColumn2];
880  value2 OPERATION multiplier*element[2];
881  set_const_v2df(bb,multiplier);
882  temp[0]=region[iColumn0];
883  temp[1]=region[iColumn1];
884  region[iColumn2]=value2;
885  __v2df v0 = __builtin_ia32_loadupd (temp);
886  __v2df a = __builtin_ia32_loadupd (element);
887  a *= bb;
888  v0 OPERATION a;
889  __builtin_ia32_storeupd (temp, v0);
890  region[iColumn0]=temp[0];
891  region[iColumn1]=temp[1];
892#else
893  double value0=region[iColumn0];
894  double value1=region[iColumn1];
895  double value2=region[iColumn2];
896  value0 OPERATION multiplier*element[0];
897  value1 OPERATION multiplier*element[1];
898  value2 OPERATION multiplier*element[2];
899  region[iColumn0]=value0;
900  region[iColumn1]=value1;
901  region[iColumn2]=value2;
902#endif
903#endif
904}
905SCATTER_ATTRIBUTE void functionName(ScatterUpdate4)(int numberIn, CoinFactorizationDouble multiplier,
906                           const CoinFactorizationDouble *  COIN_RESTRICT element,
907                           CoinFactorizationDouble * COIN_RESTRICT region)
908{
909#ifndef NDEBUG
910  assert (numberIn==4);
911#endif
912  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+4);
913  int nFull=4&(~(NEW_CHUNK_SIZE-1));
914#if AVX2==1
915  double temp[4];
916#endif
917  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
918    //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
919#if NEW_CHUNK_SIZE==2
920    int iColumn0=thisColumn[0];
921    int iColumn1=thisColumn[1];
922    double value0=region[iColumn0];
923    double value1=region[iColumn1];
924    value0 OPERATION multiplier*element[0];
925    value1 OPERATION multiplier*element[1];
926    region[iColumn0]=value0;
927    region[iColumn1]=value1;
928#elif NEW_CHUNK_SIZE==4
929    int iColumn0=thisColumn[0];
930    int iColumn1=thisColumn[1];
931    int iColumn2=thisColumn[2];
932    int iColumn3=thisColumn[3];
933#if AVX2==1
934    __v2df bb;
935    set_const_v2df(bb,multiplier);
936    temp[0]=region[iColumn0];
937    temp[1]=region[iColumn1];
938    temp[2]=region[iColumn2];
939    temp[3]=region[iColumn3];
940    __v2df v0 = __builtin_ia32_loadupd (temp);
941    __v2df v1 = __builtin_ia32_loadupd (temp+2);
942    __v2df a = __builtin_ia32_loadupd (element);
943    a *= bb;
944    v0 OPERATION a;
945    a = __builtin_ia32_loadupd (element+2);
946    a *= bb;
947    v1 OPERATION a;
948    __builtin_ia32_storeupd (temp, v0);
949    __builtin_ia32_storeupd (temp+2, v1);
950    region[iColumn0]=temp[0];
951    region[iColumn1]=temp[1];
952    region[iColumn2]=temp[2];
953    region[iColumn3]=temp[3];
954#else
955    double value0=region[iColumn0];
956    double value1=region[iColumn1];
957    double value2=region[iColumn2];
958    double value3=region[iColumn3];
959    value0 OPERATION multiplier*element[0];
960    value1 OPERATION multiplier*element[1];
961    value2 OPERATION multiplier*element[2];
962    value3 OPERATION multiplier*element[3];
963    region[iColumn0]=value0;
964    region[iColumn1]=value1;
965    region[iColumn2]=value2;
966    region[iColumn3]=value3;
967#endif
968#else
969    abort();
970#endif
971    element+=NEW_CHUNK_SIZE;
972    thisColumn += NEW_CHUNK_SIZE;
973  }
974}
975SCATTER_ATTRIBUTE void functionName(ScatterUpdate5)(int numberIn, CoinFactorizationDouble multiplier,
976                           const CoinFactorizationDouble *  COIN_RESTRICT element,
977                           CoinFactorizationDouble * COIN_RESTRICT region)
978{
979#ifndef NDEBUG
980  assert (numberIn==5);
981#endif
982  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+5);
983  int nFull=5&(~(NEW_CHUNK_SIZE-1));
984#if AVX2==1
985  double temp[4];
986#endif
987  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
988    //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
989#if NEW_CHUNK_SIZE==2
990    int iColumn0=thisColumn[0];
991    int iColumn1=thisColumn[1];
992    double value0=region[iColumn0];
993    double value1=region[iColumn1];
994    value0 OPERATION multiplier*element[0];
995    value1 OPERATION multiplier*element[1];
996    region[iColumn0]=value0;
997    region[iColumn1]=value1;
998#elif NEW_CHUNK_SIZE==4
999    int iColumn0=thisColumn[0];
1000    int iColumn1=thisColumn[1];
1001    int iColumn2=thisColumn[2];
1002    int iColumn3=thisColumn[3];
1003#if AVX2==1
1004    __v2df bb;
1005    set_const_v2df(bb,multiplier);
1006    temp[0]=region[iColumn0];
1007    temp[1]=region[iColumn1];
1008    temp[2]=region[iColumn2];
1009    temp[3]=region[iColumn3];
1010    __v2df v0 = __builtin_ia32_loadupd (temp);
1011    __v2df v1 = __builtin_ia32_loadupd (temp+2);
1012    __v2df a = __builtin_ia32_loadupd (element);
1013    a *= bb;
1014    v0 OPERATION a;
1015    a = __builtin_ia32_loadupd (element+2);
1016    a *= bb;
1017    v1 OPERATION a;
1018    __builtin_ia32_storeupd (temp, v0);
1019    __builtin_ia32_storeupd (temp+2, v1);
1020    region[iColumn0]=temp[0];
1021    region[iColumn1]=temp[1];
1022    region[iColumn2]=temp[2];
1023    region[iColumn3]=temp[3];
1024#else
1025    double value0=region[iColumn0];
1026    double value1=region[iColumn1];
1027    double value2=region[iColumn2];
1028    double value3=region[iColumn3];
1029    value0 OPERATION multiplier*element[0];
1030    value1 OPERATION multiplier*element[1];
1031    value2 OPERATION multiplier*element[2];
1032    value3 OPERATION multiplier*element[3];
1033    region[iColumn0]=value0;
1034    region[iColumn1]=value1;
1035    region[iColumn2]=value2;
1036    region[iColumn3]=value3;
1037#endif
1038#else
1039    abort();
1040#endif
1041    element+=NEW_CHUNK_SIZE;
1042    thisColumn += NEW_CHUNK_SIZE;
1043  }
1044  int iColumn0=thisColumn[0];
1045  double value0=region[iColumn0];
1046  value0 OPERATION multiplier*element[0];
1047  region[iColumn0]=value0;
1048}
1049SCATTER_ATTRIBUTE void functionName(ScatterUpdate6)(int numberIn, CoinFactorizationDouble multiplier,
1050                           const CoinFactorizationDouble *  COIN_RESTRICT element,
1051                           CoinFactorizationDouble * COIN_RESTRICT region)
1052{
1053#ifndef NDEBUG
1054  assert (numberIn==6);
1055#endif
1056  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+6);
1057  int nFull=6&(~(NEW_CHUNK_SIZE-1));
1058#if AVX2==1
1059  double temp[4];
1060#endif
1061  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1062    coin_prefetch_const(element+6);
1063#if NEW_CHUNK_SIZE==2
1064    int iColumn0=thisColumn[0];
1065    int iColumn1=thisColumn[1];
1066    double value0=region[iColumn0];
1067    double value1=region[iColumn1];
1068    value0 OPERATION multiplier*element[0];
1069    value1 OPERATION multiplier*element[1];
1070    region[iColumn0]=value0;
1071    region[iColumn1]=value1;
1072#elif NEW_CHUNK_SIZE==4
1073    int iColumn0=thisColumn[0];
1074    int iColumn1=thisColumn[1];
1075    int iColumn2=thisColumn[2];
1076    int iColumn3=thisColumn[3];
1077#if AVX2==1
1078    __v2df bb;
1079    set_const_v2df(bb,multiplier);
1080    temp[0]=region[iColumn0];
1081    temp[1]=region[iColumn1];
1082    temp[2]=region[iColumn2];
1083    temp[3]=region[iColumn3];
1084    __v2df v0 = __builtin_ia32_loadupd (temp);
1085    __v2df v1 = __builtin_ia32_loadupd (temp+2);
1086    __v2df a = __builtin_ia32_loadupd (element);
1087    a *= bb;
1088    v0 OPERATION a;
1089    a = __builtin_ia32_loadupd (element+2);
1090    a *= bb;
1091    v1 OPERATION a;
1092    __builtin_ia32_storeupd (temp, v0);
1093    __builtin_ia32_storeupd (temp+2, v1);
1094    region[iColumn0]=temp[0];
1095    region[iColumn1]=temp[1];
1096    region[iColumn2]=temp[2];
1097    region[iColumn3]=temp[3];
1098#else
1099    double value0=region[iColumn0];
1100    double value1=region[iColumn1];
1101    double value2=region[iColumn2];
1102    double value3=region[iColumn3];
1103    value0 OPERATION multiplier*element[0];
1104    value1 OPERATION multiplier*element[1];
1105    value2 OPERATION multiplier*element[2];
1106    value3 OPERATION multiplier*element[3];
1107    region[iColumn0]=value0;
1108    region[iColumn1]=value1;
1109    region[iColumn2]=value2;
1110    region[iColumn3]=value3;
1111#endif
1112#else
1113    abort();
1114#endif
1115    element+=NEW_CHUNK_SIZE;
1116    thisColumn += NEW_CHUNK_SIZE;
1117  }
1118#if NEW_CHUNK_SIZE==4
1119  int iColumn0=thisColumn[0];
1120  int iColumn1=thisColumn[1];
1121  double value0=region[iColumn0];
1122  double value1=region[iColumn1];
1123  value0 OPERATION multiplier*element[0];
1124  value1 OPERATION multiplier*element[1];
1125  region[iColumn0]=value0;
1126  region[iColumn1]=value1;
1127#endif
1128}
1129SCATTER_ATTRIBUTE void functionName(ScatterUpdate7)(int numberIn, CoinFactorizationDouble multiplier,
1130                           const CoinFactorizationDouble *  COIN_RESTRICT element,
1131                           CoinFactorizationDouble * COIN_RESTRICT region)
1132{
1133#ifndef NDEBUG
1134  assert (numberIn==7);
1135#endif
1136  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+7);
1137  int nFull=7&(~(NEW_CHUNK_SIZE-1));
1138#if AVX2==1
1139  double temp[4];
1140#endif
1141  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1142    coin_prefetch_const(element+6);
1143#if NEW_CHUNK_SIZE==2
1144    int iColumn0=thisColumn[0];
1145    int iColumn1=thisColumn[1];
1146    double value0=region[iColumn0];
1147    double value1=region[iColumn1];
1148    value0 OPERATION multiplier*element[0];
1149    value1 OPERATION multiplier*element[1];
1150    region[iColumn0]=value0;
1151    region[iColumn1]=value1;
1152#elif NEW_CHUNK_SIZE==4
1153    int iColumn0=thisColumn[0];
1154    int iColumn1=thisColumn[1];
1155    int iColumn2=thisColumn[2];
1156    int iColumn3=thisColumn[3];
1157#if AVX2==1
1158    __v2df bb;
1159    set_const_v2df(bb,multiplier);
1160    temp[0]=region[iColumn0];
1161    temp[1]=region[iColumn1];
1162    temp[2]=region[iColumn2];
1163    temp[3]=region[iColumn3];
1164    __v2df v0 = __builtin_ia32_loadupd (temp);
1165    __v2df v1 = __builtin_ia32_loadupd (temp+2);
1166    __v2df a = __builtin_ia32_loadupd (element);
1167    a *= bb;
1168    v0 OPERATION a;
1169    a = __builtin_ia32_loadupd (element+2);
1170    a *= bb;
1171    v1 OPERATION a;
1172    __builtin_ia32_storeupd (temp, v0);
1173    __builtin_ia32_storeupd (temp+2, v1);
1174    region[iColumn0]=temp[0];
1175    region[iColumn1]=temp[1];
1176    region[iColumn2]=temp[2];
1177    region[iColumn3]=temp[3];
1178#else
1179    double value0=region[iColumn0];
1180    double value1=region[iColumn1];
1181    double value2=region[iColumn2];
1182    double value3=region[iColumn3];
1183    value0 OPERATION multiplier*element[0];
1184    value1 OPERATION multiplier*element[1];
1185    value2 OPERATION multiplier*element[2];
1186    value3 OPERATION multiplier*element[3];
1187    region[iColumn0]=value0;
1188    region[iColumn1]=value1;
1189    region[iColumn2]=value2;
1190    region[iColumn3]=value3;
1191#endif
1192#else
1193    abort();
1194#endif
1195    element+=NEW_CHUNK_SIZE;
1196    thisColumn += NEW_CHUNK_SIZE;
1197  }
1198#if NEW_CHUNK_SIZE==2
1199  int iColumn0=thisColumn[0];
1200  double value0=region[iColumn0];
1201  value0 OPERATION multiplier*element[0];
1202  region[iColumn0]=value0;
1203#else
1204  int iColumn0=thisColumn[0];
1205  int iColumn1=thisColumn[1];
1206  int iColumn2=thisColumn[2];
1207  double value0=region[iColumn0];
1208  double value1=region[iColumn1];
1209  double value2=region[iColumn2];
1210  value0 OPERATION multiplier*element[0];
1211  value1 OPERATION multiplier*element[1];
1212  value2 OPERATION multiplier*element[2];
1213  region[iColumn0]=value0;
1214  region[iColumn1]=value1;
1215  region[iColumn2]=value2;
1216#endif
1217}
1218SCATTER_ATTRIBUTE void functionName(ScatterUpdate8)(int numberIn, CoinFactorizationDouble multiplier,
1219                           const CoinFactorizationDouble *  COIN_RESTRICT element,
1220                           CoinFactorizationDouble * COIN_RESTRICT region)
1221{
1222#ifndef NDEBUG
1223  assert (numberIn==8);
1224#endif
1225  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+8);
1226  int nFull=8&(~(NEW_CHUNK_SIZE-1));
1227#if AVX2==1
1228  double temp[4];
1229#endif
1230  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1231    coin_prefetch_const(element+6);
1232#if NEW_CHUNK_SIZE==2
1233    int iColumn0=thisColumn[0];
1234    int iColumn1=thisColumn[1];
1235    double value0=region[iColumn0];
1236    double value1=region[iColumn1];
1237    value0 OPERATION multiplier*element[0];
1238    value1 OPERATION multiplier*element[1];
1239    region[iColumn0]=value0;
1240    region[iColumn1]=value1;
1241#elif NEW_CHUNK_SIZE==4
1242    int iColumn0=thisColumn[0];
1243    int iColumn1=thisColumn[1];
1244    int iColumn2=thisColumn[2];
1245    int iColumn3=thisColumn[3];
1246#if AVX2==1
1247    __v2df bb;
1248    set_const_v2df(bb,multiplier);
1249    temp[0]=region[iColumn0];
1250    temp[1]=region[iColumn1];
1251    temp[2]=region[iColumn2];
1252    temp[3]=region[iColumn3];
1253    __v2df v0 = __builtin_ia32_loadupd (temp);
1254    __v2df v1 = __builtin_ia32_loadupd (temp+2);
1255    __v2df a = __builtin_ia32_loadupd (element);
1256    a *= bb;
1257    v0 OPERATION a;
1258    a = __builtin_ia32_loadupd (element+2);
1259    a *= bb;
1260    v1 OPERATION a;
1261    __builtin_ia32_storeupd (temp, v0);
1262    __builtin_ia32_storeupd (temp+2, v1);
1263    region[iColumn0]=temp[0];
1264    region[iColumn1]=temp[1];
1265    region[iColumn2]=temp[2];
1266    region[iColumn3]=temp[3];
1267#else
1268    double value0=region[iColumn0];
1269    double value1=region[iColumn1];
1270    double value2=region[iColumn2];
1271    double value3=region[iColumn3];
1272    value0 OPERATION multiplier*element[0];
1273    value1 OPERATION multiplier*element[1];
1274    value2 OPERATION multiplier*element[2];
1275    value3 OPERATION multiplier*element[3];
1276    region[iColumn0]=value0;
1277    region[iColumn1]=value1;
1278    region[iColumn2]=value2;
1279    region[iColumn3]=value3;
1280#endif
1281#else
1282    abort();
1283#endif
1284    element+=NEW_CHUNK_SIZE;
1285    thisColumn += NEW_CHUNK_SIZE;
1286  }
1287}
1288SCATTER_ATTRIBUTE void functionName(ScatterUpdate4N)(int numberIn, CoinFactorizationDouble multiplier,
1289                            const CoinFactorizationDouble *  COIN_RESTRICT element,
1290                            CoinFactorizationDouble * COIN_RESTRICT region)
1291{
1292  assert ((numberIn&3)==0);
1293  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
1294  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1295#if AVX2==1
1296  double temp[4];
1297#endif
1298  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1299    coin_prefetch_const(element+16);
1300    coin_prefetch_const(thisColumn+32);
1301#if NEW_CHUNK_SIZE==2
1302    int iColumn0=thisColumn[0];
1303    int iColumn1=thisColumn[1];
1304    double value0=region[iColumn0];
1305    double value1=region[iColumn1];
1306    value0 OPERATION multiplier*element[0];
1307    value1 OPERATION multiplier*element[1];
1308    region[iColumn0]=value0;
1309    region[iColumn1]=value1;
1310#elif NEW_CHUNK_SIZE==4
1311    int iColumn0=thisColumn[0];
1312    int iColumn1=thisColumn[1];
1313    int iColumn2=thisColumn[2];
1314    int iColumn3=thisColumn[3];
1315#if AVX2==1
1316    __v2df bb;
1317    set_const_v2df(bb,multiplier);
1318    temp[0]=region[iColumn0];
1319    temp[1]=region[iColumn1];
1320    temp[2]=region[iColumn2];
1321    temp[3]=region[iColumn3];
1322    __v2df v0 = __builtin_ia32_loadupd (temp);
1323    __v2df v1 = __builtin_ia32_loadupd (temp+2);
1324    __v2df a = __builtin_ia32_loadupd (element);
1325    a *= bb;
1326    v0 OPERATION a;
1327    a = __builtin_ia32_loadupd (element+2);
1328    a *= bb;
1329    v1 OPERATION a;
1330    __builtin_ia32_storeupd (temp, v0);
1331    __builtin_ia32_storeupd (temp+2, v1);
1332    region[iColumn0]=temp[0];
1333    region[iColumn1]=temp[1];
1334    region[iColumn2]=temp[2];
1335    region[iColumn3]=temp[3];
1336#else
1337    double value0=region[iColumn0];
1338    double value1=region[iColumn1];
1339    double value2=region[iColumn2];
1340    double value3=region[iColumn3];
1341    value0 OPERATION multiplier*element[0];
1342    value1 OPERATION multiplier*element[1];
1343    value2 OPERATION multiplier*element[2];
1344    value3 OPERATION multiplier*element[3];
1345    region[iColumn0]=value0;
1346    region[iColumn1]=value1;
1347    region[iColumn2]=value2;
1348    region[iColumn3]=value3;
1349#endif
1350#else
1351    abort();
1352#endif
1353    element+=NEW_CHUNK_SIZE;
1354    thisColumn += NEW_CHUNK_SIZE;
1355  }
1356}
1357SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus1)(int numberIn, CoinFactorizationDouble multiplier,
1358                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1359                                 CoinFactorizationDouble * COIN_RESTRICT region)
1360{
1361  assert ((numberIn&3)==1);
1362  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
1363  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1364#if AVX2==1
1365  double temp[4];
1366#endif
1367  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1368    coin_prefetch_const(element+16);
1369    coin_prefetch_const(thisColumn+32);
1370#if NEW_CHUNK_SIZE==2
1371    int iColumn0=thisColumn[0];
1372    int iColumn1=thisColumn[1];
1373    double value0=region[iColumn0];
1374    double value1=region[iColumn1];
1375    value0 OPERATION multiplier*element[0];
1376    value1 OPERATION multiplier*element[1];
1377    region[iColumn0]=value0;
1378    region[iColumn1]=value1;
1379#elif NEW_CHUNK_SIZE==4
1380    int iColumn0=thisColumn[0];
1381    int iColumn1=thisColumn[1];
1382    int iColumn2=thisColumn[2];
1383    int iColumn3=thisColumn[3];
1384#if AVX2==1
1385    __v2df bb;
1386    set_const_v2df(bb,multiplier);
1387    temp[0]=region[iColumn0];
1388    temp[1]=region[iColumn1];
1389    temp[2]=region[iColumn2];
1390    temp[3]=region[iColumn3];
1391    __v2df v0 = __builtin_ia32_loadupd (temp);
1392    __v2df v1 = __builtin_ia32_loadupd (temp+2);
1393    __v2df a = __builtin_ia32_loadupd (element);
1394    a *= bb;
1395    v0 OPERATION a;
1396    a = __builtin_ia32_loadupd (element+2);
1397    a *= bb;
1398    v1 OPERATION a;
1399    __builtin_ia32_storeupd (temp, v0);
1400    __builtin_ia32_storeupd (temp+2, v1);
1401    region[iColumn0]=temp[0];
1402    region[iColumn1]=temp[1];
1403    region[iColumn2]=temp[2];
1404    region[iColumn3]=temp[3];
1405#else
1406    double value0=region[iColumn0];
1407    double value1=region[iColumn1];
1408    double value2=region[iColumn2];
1409    double value3=region[iColumn3];
1410    value0 OPERATION multiplier*element[0];
1411    value1 OPERATION multiplier*element[1];
1412    value2 OPERATION multiplier*element[2];
1413    value3 OPERATION multiplier*element[3];
1414    region[iColumn0]=value0;
1415    region[iColumn1]=value1;
1416    region[iColumn2]=value2;
1417    region[iColumn3]=value3;
1418#endif
1419#else
1420    abort();
1421#endif
1422    element+=NEW_CHUNK_SIZE;
1423    thisColumn += NEW_CHUNK_SIZE;
1424  }
1425  int iColumn0=thisColumn[0];
1426  double value0=region[iColumn0];
1427  value0 OPERATION multiplier*element[0];
1428  region[iColumn0]=value0;
1429}
1430SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus2)(int numberIn, CoinFactorizationDouble multiplier,
1431                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1432                                 CoinFactorizationDouble * COIN_RESTRICT region)
1433{
1434  assert ((numberIn&3)==2);
1435  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
1436  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1437#if AVX2==1
1438  double temp[4];
1439#endif
1440  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1441    coin_prefetch_const(element+16);
1442    coin_prefetch_const(thisColumn+32);
1443#if NEW_CHUNK_SIZE==2
1444    int iColumn0=thisColumn[0];
1445    int iColumn1=thisColumn[1];
1446    double value0=region[iColumn0];
1447    double value1=region[iColumn1];
1448    value0 OPERATION multiplier*element[0];
1449    value1 OPERATION multiplier*element[1];
1450    region[iColumn0]=value0;
1451    region[iColumn1]=value1;
1452#elif NEW_CHUNK_SIZE==4
1453    int iColumn0=thisColumn[0];
1454    int iColumn1=thisColumn[1];
1455    int iColumn2=thisColumn[2];
1456    int iColumn3=thisColumn[3];
1457#if AVX2==1
1458    __v2df bb;
1459    set_const_v2df(bb,multiplier);
1460    temp[0]=region[iColumn0];
1461    temp[1]=region[iColumn1];
1462    temp[2]=region[iColumn2];
1463    temp[3]=region[iColumn3];
1464    __v2df v0 = __builtin_ia32_loadupd (temp);
1465    __v2df v1 = __builtin_ia32_loadupd (temp+2);
1466    __v2df a = __builtin_ia32_loadupd (element);
1467    a *= bb;
1468    v0 OPERATION a;
1469    a = __builtin_ia32_loadupd (element+2);
1470    a *= bb;
1471    v1 OPERATION a;
1472    __builtin_ia32_storeupd (temp, v0);
1473    __builtin_ia32_storeupd (temp+2, v1);
1474    region[iColumn0]=temp[0];
1475    region[iColumn1]=temp[1];
1476    region[iColumn2]=temp[2];
1477    region[iColumn3]=temp[3];
1478#else
1479    double value0=region[iColumn0];
1480    double value1=region[iColumn1];
1481    double value2=region[iColumn2];
1482    double value3=region[iColumn3];
1483    value0 OPERATION multiplier*element[0];
1484    value1 OPERATION multiplier*element[1];
1485    value2 OPERATION multiplier*element[2];
1486    value3 OPERATION multiplier*element[3];
1487    region[iColumn0]=value0;
1488    region[iColumn1]=value1;
1489    region[iColumn2]=value2;
1490    region[iColumn3]=value3;
1491#endif
1492#else
1493    abort();
1494#endif
1495    element+=NEW_CHUNK_SIZE;
1496    thisColumn += NEW_CHUNK_SIZE;
1497  }
1498#if NEW_CHUNK_SIZE==4
1499  int iColumn0=thisColumn[0];
1500  int iColumn1=thisColumn[1];
1501  double value0=region[iColumn0];
1502  double value1=region[iColumn1];
1503  value0 OPERATION multiplier*element[0];
1504  value1 OPERATION multiplier*element[1];
1505  region[iColumn0]=value0;
1506  region[iColumn1]=value1;
1507#endif
1508}
1509SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus3)(int numberIn, CoinFactorizationDouble multiplier,
1510                                 const CoinFactorizationDouble *  COIN_RESTRICT element,
1511                                 CoinFactorizationDouble * COIN_RESTRICT region)
1512{
1513  assert ((numberIn&3)==3);
1514  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
1515  int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
1516#if AVX2==1
1517  double temp[4];
1518#endif
1519  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
1520    coin_prefetch_const(element+16);
1521    coin_prefetch_const(thisColumn+32);
1522#if NEW_CHUNK_SIZE==2
1523    int iColumn0=thisColumn[0];
1524    int iColumn1=thisColumn[1];
1525    double value0=region[iColumn0];
1526    double value1=region[iColumn1];
1527    value0 OPERATION multiplier*element[0];
1528    value1 OPERATION multiplier*element[1];
1529    region[iColumn0]=value0;
1530    region[iColumn1]=value1;
1531#elif NEW_CHUNK_SIZE==4
1532    int iColumn0=thisColumn[0];
1533    int iColumn1=thisColumn[1];
1534    int iColumn2=thisColumn[2];
1535    int iColumn3=thisColumn[3];
1536#if AVX2==1
1537    __v2df bb;
1538    set_const_v2df(bb,multiplier);
1539    temp[0]=region[iColumn0];
1540    temp[1]=region[iColumn1];
1541    temp[2]=region[iColumn2];
1542    temp[3]=region[iColumn3];
1543    __v2df v0 = __builtin_ia32_loadupd (temp);
1544    __v2df v1 = __builtin_ia32_loadupd (temp+2);
1545    __v2df a = __builtin_ia32_loadupd (element);
1546    a *= bb;
1547    v0 OPERATION a;
1548    a = __builtin_ia32_loadupd (element+2);
1549    a *= bb;
1550    v1 OPERATION a;
1551    __builtin_ia32_storeupd (temp, v0);
1552    __builtin_ia32_storeupd (temp+2, v1);
1553    region[iColumn0]=temp[0];
1554    region[iColumn1]=temp[1];
1555    region[iColumn2]=temp[2];
1556    region[iColumn3]=temp[3];
1557#else
1558    double value0=region[iColumn0];
1559    double value1=region[iColumn1];
1560    double value2=region[iColumn2];
1561    double value3=region[iColumn3];
1562    value0 OPERATION multiplier*element[0];
1563    value1 OPERATION multiplier*element[1];
1564    value2 OPERATION multiplier*element[2];
1565    value3 OPERATION multiplier*element[3];
1566    region[iColumn0]=value0;
1567    region[iColumn1]=value1;
1568    region[iColumn2]=value2;
1569    region[iColumn3]=value3;
1570#endif
1571#else
1572    abort();
1573#endif
1574    element+=NEW_CHUNK_SIZE;
1575    thisColumn += NEW_CHUNK_SIZE;
1576  }
1577#if NEW_CHUNK_SIZE==2
1578  int iColumn0=thisColumn[0];
1579  double value0=region[iColumn0];
1580  value0 OPERATION multiplier*element[0];
1581  region[iColumn0]=value0;
1582#else
1583  int iColumn0=thisColumn[0];
1584  int iColumn1=thisColumn[1];
1585  int iColumn2=thisColumn[2];
1586  double value0=region[iColumn0];
1587  double value1=region[iColumn1];
1588  double value2=region[iColumn2];
1589  value0 OPERATION multiplier*element[0];
1590  value1 OPERATION multiplier*element[1];
1591  value2 OPERATION multiplier*element[2];
1592  region[iColumn0]=value0;
1593  region[iColumn1]=value1;
1594  region[iColumn2]=value2;
1595#endif
1596}
1597#endif
Note: See TracBrowser for help on using the repository browser.