Ignore:
Timestamp:
Jan 6, 2019 2:43:06 PM (3 months ago)
Author:
unxusr
Message:

formatting

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Clp/src/CoinAbcHelperFunctions.hpp

    r2042 r2385  
    99#include "ClpConfig.h"
    1010#ifdef HAVE_CMATH
    11 # include <cmath>
    12 #else
    13 # ifdef HAVE_MATH_H
    14 #  include <math.h>
    15 # else
    16 # include <cmath>
    17 # endif
     11#include <cmath>
     12#else
     13#ifdef HAVE_MATH_H
     14#include <math.h>
     15#else
     16#include <cmath>
     17#endif
    1818#endif
    1919#include "CoinAbcCommon.hpp"
    2020#ifndef abc_assert
    21 #define abc_assert(condition)                                                   \
    22   { if (!condition) {printf("abc_assert in %s at line %d - %s is false\n", \
    23                             __FILE__, __LINE__, __STRING(condition)); abort();} }
     21#define abc_assert(condition)                               \
     22  {                                                         \
     23    if (!condition) {                                       \
     24      printf("abc_assert in %s at line %d - %s is false\n", \
     25        __FILE__, __LINE__, __STRING(condition));           \
     26      abort();                                              \
     27    }                                                       \
     28  }
    2429#endif
    2530// cilk_for granularity.
    2631#define CILK_FOR_GRAINSIZE 128
    2732//#define AVX2 2
    28 #if AVX2==1
     33#if AVX2 == 1
    2934#include "emmintrin.h"
    30 #elif AVX2==2
     35#elif AVX2 == 2
    3136#include <immintrin.h>
    32 #elif AVX2==3
     37#elif AVX2 == 3
    3338#include "avx2intrin.h"
    3439#endif
     
    4348#define UNROLL_SCATTER 2
    4449#define INLINE_SCATTER 1
    45 #if INLINE_SCATTER==0
    46 void CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
    47                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    48                           const int *  COIN_RESTRICT thisIndex,
    49                           CoinFactorizationDouble * COIN_RESTRICT region);
    50 #else
    51 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
    52                                             const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    53                                             const int *  COIN_RESTRICT thisIndex,
    54                                             CoinFactorizationDouble * COIN_RESTRICT region)
     50#if INLINE_SCATTER == 0
     51void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
     52  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     53  const int *COIN_RESTRICT thisIndex,
     54  CoinFactorizationDouble *COIN_RESTRICT region);
     55#else
     56void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
     57  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     58  const int *COIN_RESTRICT thisIndex,
     59  CoinFactorizationDouble *COIN_RESTRICT region)
    5560{
    56 #if UNROLL_SCATTER==0
    57   for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
     61#if UNROLL_SCATTER == 0
     62  for (CoinBigIndex j = number - 1; j >= 0; j--) {
    5863    CoinSimplexInt iRow = thisIndex[j];
    5964    CoinFactorizationDouble regionValue = region[iRow];
    6065    CoinFactorizationDouble value = thisElement[j];
    61     assert (value);
     66    assert(value);
    6267    region[iRow] = regionValue - value * pivotValue;
    6368  }
    64 #elif UNROLL_SCATTER==1
    65   if ((number&1)!=0) {
     69#elif UNROLL_SCATTER == 1
     70  if ((number & 1) != 0) {
    6671    number--;
    6772    CoinSimplexInt iRow = thisIndex[number];
     
    7075    region[iRow] = regionValue - value * pivotValue;
    7176  }
    72   for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
     77  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
    7378    CoinSimplexInt iRow0 = thisIndex[j];
    74     CoinSimplexInt iRow1 = thisIndex[j-1];
     79    CoinSimplexInt iRow1 = thisIndex[j - 1];
    7580    CoinFactorizationDouble regionValue0 = region[iRow0];
    7681    CoinFactorizationDouble regionValue1 = region[iRow1];
    7782    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    78     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    79   }
    80 #elif UNROLL_SCATTER==2
    81   if ((number&1)!=0) {
     83    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     84  }
     85#elif UNROLL_SCATTER == 2
     86  if ((number & 1) != 0) {
    8287    number--;
    8388    CoinSimplexInt iRow = thisIndex[number];
    8489    CoinFactorizationDouble regionValue = region[iRow];
    85     CoinFactorizationDouble value = thisElement[number]; 
     90    CoinFactorizationDouble value = thisElement[number];
    8691    region[iRow] = regionValue - value * pivotValue;
    8792  }
    88   if ((number&2)!=0) {
    89     CoinSimplexInt iRow0 = thisIndex[number-1];
     93  if ((number & 2) != 0) {
     94    CoinSimplexInt iRow0 = thisIndex[number - 1];
    9095    CoinFactorizationDouble regionValue0 = region[iRow0];
    91     CoinFactorizationDouble value0 = thisElement[number-1];
    92     CoinSimplexInt iRow1 = thisIndex[number-2];
     96    CoinFactorizationDouble value0 = thisElement[number - 1];
     97    CoinSimplexInt iRow1 = thisIndex[number - 2];
    9398    CoinFactorizationDouble regionValue1 = region[iRow1];
    94     CoinFactorizationDouble value1 = thisElement[number-2];
     99    CoinFactorizationDouble value1 = thisElement[number - 2];
    95100    region[iRow0] = regionValue0 - value0 * pivotValue;
    96101    region[iRow1] = regionValue1 - value1 * pivotValue;
    97     number-=2;
    98   }
    99 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    100   cilk_for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
     102    number -= 2;
     103  }
     104#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     105  cilk_for(CoinBigIndex j = number - 1; j >= 0; j -= 4)
     106  {
    101107    CoinSimplexInt iRow0 = thisIndex[j];
    102     CoinSimplexInt iRow1 = thisIndex[j-1];
     108    CoinSimplexInt iRow1 = thisIndex[j - 1];
    103109    CoinFactorizationDouble regionValue0 = region[iRow0];
    104110    CoinFactorizationDouble regionValue1 = region[iRow1];
    105111    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    106     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    107     CoinSimplexInt iRow2 = thisIndex[j-2];
    108     CoinSimplexInt iRow3 = thisIndex[j-3];
     112    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     113    CoinSimplexInt iRow2 = thisIndex[j - 2];
     114    CoinSimplexInt iRow3 = thisIndex[j - 3];
    109115    CoinFactorizationDouble regionValue2 = region[iRow2];
    110116    CoinFactorizationDouble regionValue3 = region[iRow3];
    111     region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
    112     region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
    113   }
    114 #elif UNROLL_SCATTER==3
     117    region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
     118    region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
     119  }
     120#elif UNROLL_SCATTER == 3
    115121  CoinSimplexInt iRow0;
    116122  CoinSimplexInt iRow1;
    117123  CoinFactorizationDouble regionValue0;
    118124  CoinFactorizationDouble regionValue1;
    119   switch(static_cast<unsigned int>(number)) {
     125  switch (static_cast< unsigned int >(number)) {
    120126  case 0:
    121127    break;
     
    245251    break;
    246252  default:
    247     if ((number&1)!=0) {
     253    if ((number & 1) != 0) {
    248254      number--;
    249255      CoinSimplexInt iRow = thisIndex[number];
     
    252258      region[iRow] = regionValue - value * pivotValue;
    253259    }
    254     for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
     260    for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
    255261      CoinSimplexInt iRow0 = thisIndex[j];
    256       CoinSimplexInt iRow1 = thisIndex[j-1];
     262      CoinSimplexInt iRow1 = thisIndex[j - 1];
    257263      CoinFactorizationDouble regionValue0 = region[iRow0];
    258264      CoinFactorizationDouble regionValue1 = region[iRow1];
    259265      region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    260       region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
     266      region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
    261267    }
    262268    break;
     
    264270#endif
    265271}
    266 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
    267                                             const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    268                                             CoinFactorizationDouble * COIN_RESTRICT region)
     272void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
     273  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     274  CoinFactorizationDouble *COIN_RESTRICT region)
    269275{
    270 #if UNROLL_SCATTER==0
    271   const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
    272   for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
     276#if UNROLL_SCATTER == 0
     277  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
     278  for (CoinBigIndex j = number - 1; j >= 0; j--) {
    273279    CoinSimplexInt iRow = thisIndex[j];
    274280    CoinFactorizationDouble regionValue = region[iRow];
    275281    CoinFactorizationDouble value = thisElement[j];
    276     assert (value);
     282    assert(value);
    277283    region[iRow] = regionValue - value * pivotValue;
    278284  }
    279 #elif UNROLL_SCATTER==1
    280   const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
    281   if ((number&1)!=0) {
     285#elif UNROLL_SCATTER == 1
     286  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
     287  if ((number & 1) != 0) {
    282288    number--;
    283289    CoinSimplexInt iRow = thisIndex[number];
     
    286292    region[iRow] = regionValue - value * pivotValue;
    287293  }
    288   for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
     294  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
    289295    CoinSimplexInt iRow0 = thisIndex[j];
    290     CoinSimplexInt iRow1 = thisIndex[j-1];
     296    CoinSimplexInt iRow1 = thisIndex[j - 1];
    291297    CoinFactorizationDouble regionValue0 = region[iRow0];
    292298    CoinFactorizationDouble regionValue1 = region[iRow1];
    293299    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    294     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    295   }
    296 #elif UNROLL_SCATTER==2
    297   const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
    298   if ((number&1)!=0) {
     300    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     301  }
     302#elif UNROLL_SCATTER == 2
     303  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
     304  if ((number & 1) != 0) {
    299305    number--;
    300306    CoinSimplexInt iRow = thisIndex[number];
    301307    CoinFactorizationDouble regionValue = region[iRow];
    302     CoinFactorizationDouble value = thisElement[number]; 
     308    CoinFactorizationDouble value = thisElement[number];
    303309    region[iRow] = regionValue - value * pivotValue;
    304310  }
    305   if ((number&2)!=0) {
    306     CoinSimplexInt iRow0 = thisIndex[number-1];
     311  if ((number & 2) != 0) {
     312    CoinSimplexInt iRow0 = thisIndex[number - 1];
    307313    CoinFactorizationDouble regionValue0 = region[iRow0];
    308     CoinFactorizationDouble value0 = thisElement[number-1];
    309     CoinSimplexInt iRow1 = thisIndex[number-2];
     314    CoinFactorizationDouble value0 = thisElement[number - 1];
     315    CoinSimplexInt iRow1 = thisIndex[number - 2];
    310316    CoinFactorizationDouble regionValue1 = region[iRow1];
    311     CoinFactorizationDouble value1 = thisElement[number-2];
     317    CoinFactorizationDouble value1 = thisElement[number - 2];
    312318    region[iRow0] = regionValue0 - value0 * pivotValue;
    313319    region[iRow1] = regionValue1 - value1 * pivotValue;
    314     number-=2;
    315   }
    316 #if AVX2==22
    317   CoinFactorizationDouble temp[4] __attribute__ ((aligned (32)));
     320    number -= 2;
     321  }
     322#if AVX2 == 22
     323  CoinFactorizationDouble temp[4] __attribute__((aligned(32)));
    318324  __m256d pv = _mm256_broadcast_sd(&pivotValue);
    319   for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
    320     __m256d elements=_mm256_loadu_pd(thisElement+j-3);
    321     CoinSimplexInt iRow0 = thisIndex[j-3];
    322     CoinSimplexInt iRow1 = thisIndex[j-2];
    323     CoinSimplexInt iRow2 = thisIndex[j-1];
    324     CoinSimplexInt iRow3 = thisIndex[j-0];
     325  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
     326    __m256d elements = _mm256_loadu_pd(thisElement + j - 3);
     327    CoinSimplexInt iRow0 = thisIndex[j - 3];
     328    CoinSimplexInt iRow1 = thisIndex[j - 2];
     329    CoinSimplexInt iRow2 = thisIndex[j - 1];
     330    CoinSimplexInt iRow3 = thisIndex[j - 0];
    325331    temp[0] = region[iRow0];
    326332    temp[1] = region[iRow1];
    327333    temp[2] = region[iRow2];
    328334    temp[3] = region[iRow3];
    329     __m256d t0=_mm256_load_pd(temp);
    330     t0 -= pv*elements;
    331     _mm256_store_pd (temp, t0);
     335    __m256d t0 = _mm256_load_pd(temp);
     336    t0 -= pv * elements;
     337    _mm256_store_pd(temp, t0);
    332338    region[iRow0] = temp[0];
    333339    region[iRow1] = temp[1];
     
    336342  }
    337343#else
    338 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    339   cilk_for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
     344#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     345  cilk_for(CoinBigIndex j = number - 1; j >= 0; j -= 4)
     346  {
    340347    CoinSimplexInt iRow0 = thisIndex[j];
    341     CoinSimplexInt iRow1 = thisIndex[j-1];
     348    CoinSimplexInt iRow1 = thisIndex[j - 1];
    342349    CoinFactorizationDouble regionValue0 = region[iRow0];
    343350    CoinFactorizationDouble regionValue1 = region[iRow1];
    344351    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    345     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    346     CoinSimplexInt iRow2 = thisIndex[j-2];
    347     CoinSimplexInt iRow3 = thisIndex[j-3];
     352    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     353    CoinSimplexInt iRow2 = thisIndex[j - 2];
     354    CoinSimplexInt iRow3 = thisIndex[j - 3];
    348355    CoinFactorizationDouble regionValue2 = region[iRow2];
    349356    CoinFactorizationDouble regionValue3 = region[iRow3];
    350     region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
    351     region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
    352   }
    353 #endif
    354 #elif UNROLL_SCATTER==3
    355   const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
     357    region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
     358    region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
     359  }
     360#endif
     361#elif UNROLL_SCATTER == 3
     362  const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number);
    356363  CoinSimplexInt iRow0;
    357364  CoinSimplexInt iRow1;
    358365  CoinFactorizationDouble regionValue0;
    359366  CoinFactorizationDouble regionValue1;
    360   switch(static_cast<unsigned int>(number)) {
     367  switch (static_cast< unsigned int >(number)) {
    361368  case 0:
    362369    break;
     
    486493    break;
    487494  default:
    488     if ((number&1)!=0) {
     495    if ((number & 1) != 0) {
    489496      number--;
    490497      CoinSimplexInt iRow = thisIndex[number];
     
    493500      region[iRow] = regionValue - value * pivotValue;
    494501    }
    495     for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
     502    for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
    496503      CoinSimplexInt iRow0 = thisIndex[j];
    497       CoinSimplexInt iRow1 = thisIndex[j-1];
     504      CoinSimplexInt iRow1 = thisIndex[j - 1];
    498505      CoinFactorizationDouble regionValue0 = region[iRow0];
    499506      CoinFactorizationDouble regionValue1 = region[iRow1];
    500507      region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    501       region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
     508      region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
    502509    }
    503510    break;
     
    509516#ifdef COIN_PREFETCH
    510517#if 1
    511 #define coin_prefetch(mem)                                              \
    512   __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<char *>(mem))))
    513 #define coin_prefetch_const(mem)                                        \
    514   __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<const char *>(mem))))
    515 #else
    516 #define coin_prefetch(mem)                                              \
    517   __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<char *>(mem))))
    518 #define coin_prefetch_const(mem)                                        \
    519   __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<const char *>(mem))))
     518#define coin_prefetch(mem)              \
     519  __asm__ __volatile__("prefetchnta %0" \
     520                       :                \
     521                       : "m"(*(reinterpret_cast< char * >(mem))))
     522#define coin_prefetch_const(mem)        \
     523  __asm__ __volatile__("prefetchnta %0" \
     524                       :                \
     525                       : "m"(*(reinterpret_cast< const char * >(mem))))
     526#else
     527#define coin_prefetch(mem)           \
     528  __asm__ __volatile__("prefetch %0" \
     529                       :             \
     530                       : "m"(*(reinterpret_cast< char * >(mem))))
     531#define coin_prefetch_const(mem)     \
     532  __asm__ __volatile__("prefetch %0" \
     533                       :             \
     534                       : "m"(*(reinterpret_cast< const char * >(mem))))
    520535#endif
    521536#else
     
    525540#endif
    526541#define NEW_CHUNK_SIZE 4
    527 #define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE+NEW_CHUNK_SIZE/2);
    528 #define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE/2)
    529 // leaf, pure, nothrow and hot give warnings 
     542#define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE + NEW_CHUNK_SIZE / 2);
     543#define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE / 2)
     544// leaf, pure, nothrow and hot give warnings
    530545// fastcall and sseregparm give wrong results
    531546//#define SCATTER_ATTRIBUTE __attribute__ ((leaf,fastcall,pure,sseregparm,nothrow,hot))
    532 #define SCATTER_ATTRIBUTE 
    533 typedef void (*scatterUpdate) (int,CoinFactorizationDouble,const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE ;
     547#define SCATTER_ATTRIBUTE
     548typedef void (*scatterUpdate)(int, CoinFactorizationDouble, const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE;
    534549typedef struct {
    535550  scatterUpdate functionPointer;
     
    538553} scatterStruct;
    539554void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble multiplier,
    540                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    541                           CoinFactorizationDouble * COIN_RESTRICT region)  SCATTER_ATTRIBUTE ;
     555  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     556  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    542557void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
    543                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    544                            CoinFactorizationDouble * COIN_RESTRICT region)  SCATTER_ATTRIBUTE ;
     558  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     559  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    545560void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
    546                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    547                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     561  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     562  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    548563void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
    549                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    550                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     564  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     565  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    551566void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
    552                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    553                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     567  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     568  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    554569void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
    555                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    556                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     570  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     571  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    557572void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
    558                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    559                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     573  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     574  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    560575void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
    561                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    562                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     576  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     577  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    563578void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
    564                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    565                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     579  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     580  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    566581void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
    567                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    568                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     582  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     583  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    569584void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
    570                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    571                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     585  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     586  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    572587void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
    573                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    574                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     588  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     589  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    575590void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
    576                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    577                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     591  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     592  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    578593void CoinAbcScatterUpdate1Subtract(int numberIn, CoinFactorizationDouble multiplier,
    579                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    580                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     594  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     595  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    581596void CoinAbcScatterUpdate2Subtract(int numberIn, CoinFactorizationDouble multiplier,
    582                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    583                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     597  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     598  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    584599void CoinAbcScatterUpdate3Subtract(int numberIn, CoinFactorizationDouble multiplier,
    585                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    586                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     600  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     601  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    587602void CoinAbcScatterUpdate4Subtract(int numberIn, CoinFactorizationDouble multiplier,
    588                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    589                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     603  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     604  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    590605void CoinAbcScatterUpdate5Subtract(int numberIn, CoinFactorizationDouble multiplier,
    591                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    592                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     606  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     607  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    593608void CoinAbcScatterUpdate6Subtract(int numberIn, CoinFactorizationDouble multiplier,
    594                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    595                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     609  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     610  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    596611void CoinAbcScatterUpdate7Subtract(int numberIn, CoinFactorizationDouble multiplier,
    597                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    598                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     612  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     613  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    599614void CoinAbcScatterUpdate8Subtract(int numberIn, CoinFactorizationDouble multiplier,
    600                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    601                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     615  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     616  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    602617void CoinAbcScatterUpdate4NSubtract(int numberIn, CoinFactorizationDouble multiplier,
    603                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    604                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     618  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     619  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    605620void CoinAbcScatterUpdate4NPlus1Subtract(int numberIn, CoinFactorizationDouble multiplier,
    606                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    607                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     621  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     622  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    608623void CoinAbcScatterUpdate4NPlus2Subtract(int numberIn, CoinFactorizationDouble multiplier,
    609                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    610                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     624  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     625  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    611626void CoinAbcScatterUpdate4NPlus3Subtract(int numberIn, CoinFactorizationDouble multiplier,
    612                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    613                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     627  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     628  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    614629void CoinAbcScatterUpdate1Add(int numberIn, CoinFactorizationDouble multiplier,
    615                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    616                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     630  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     631  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    617632void CoinAbcScatterUpdate2Add(int numberIn, CoinFactorizationDouble multiplier,
    618                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    619                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     633  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     634  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    620635void CoinAbcScatterUpdate3Add(int numberIn, CoinFactorizationDouble multiplier,
    621                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    622                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     636  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     637  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    623638void CoinAbcScatterUpdate4Add(int numberIn, CoinFactorizationDouble multiplier,
    624                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    625                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     639  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     640  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    626641void CoinAbcScatterUpdate5Add(int numberIn, CoinFactorizationDouble multiplier,
    627                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    628                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     642  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     643  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    629644void CoinAbcScatterUpdate6Add(int numberIn, CoinFactorizationDouble multiplier,
    630                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    631                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     645  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     646  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    632647void CoinAbcScatterUpdate7Add(int numberIn, CoinFactorizationDouble multiplier,
    633                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    634                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     648  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     649  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    635650void CoinAbcScatterUpdate8Add(int numberIn, CoinFactorizationDouble multiplier,
    636                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    637                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     651  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     652  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    638653void CoinAbcScatterUpdate4NAdd(int numberIn, CoinFactorizationDouble multiplier,
    639                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    640                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     654  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     655  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    641656void CoinAbcScatterUpdate4NPlus1Add(int numberIn, CoinFactorizationDouble multiplier,
    642                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    643                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     657  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     658  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    644659void CoinAbcScatterUpdate4NPlus2Add(int numberIn, CoinFactorizationDouble multiplier,
    645                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    646                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
     660  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     661  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
    647662void CoinAbcScatterUpdate4NPlus3Add(int numberIn, CoinFactorizationDouble multiplier,
    648                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    649                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
    650 #if INLINE_SCATTER==0
    651 void CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
    652                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    653                           const int *  COIN_RESTRICT thisIndex,
    654                           CoinFactorizationDouble * COIN_RESTRICT region,
    655                           double * COIN_RESTRICT work);
     663  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     664  CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE;
     665#if INLINE_SCATTER == 0
     666void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
     667  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     668  const int *COIN_RESTRICT thisIndex,
     669  CoinFactorizationDouble *COIN_RESTRICT region,
     670  double *COIN_RESTRICT work);
    656671#else
    657672#if 0
     
    662677                                            double * COIN_RESTRICT /*work*/)
    663678{
    664 #if UNROLL_SCATTER==0
     679#if UNROLL_SCATTER == 0
    665680  for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
    666681    CoinSimplexInt iRow = thisIndex[j];
     
    670685    region[iRow] = regionValue - value * pivotValue;
    671686  }
    672 #elif UNROLL_SCATTER==1
     687#elif UNROLL_SCATTER == 1
    673688  if ((number&1)!=0) {
    674689    CoinSimplexInt iRow = thisIndex[0];
     
    706721#define UNROLL_GATHER 0
    707722#define INLINE_GATHER 1
    708 #if INLINE_GATHER==0
     723#if INLINE_GATHER == 0
    709724CoinFactorizationDouble CoinAbcGatherUpdate(CoinSimplexInt number,
    710                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    711                           const int *  COIN_RESTRICT thisIndex,
    712                            CoinFactorizationDouble * COIN_RESTRICT region);
     725  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     726  const int *COIN_RESTRICT thisIndex,
     727  CoinFactorizationDouble *COIN_RESTRICT region);
    713728#else
    714729CoinFactorizationDouble ABC_INLINE inline CoinAbcGatherUpdate(CoinSimplexInt number,
    715                                              const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    716                                              const int *  COIN_RESTRICT thisIndex,
    717                                              CoinFactorizationDouble * COIN_RESTRICT region)
     730  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     731  const int *COIN_RESTRICT thisIndex,
     732  CoinFactorizationDouble *COIN_RESTRICT region)
    718733{
    719 #if UNROLL_GATHER==0
    720   CoinFactorizationDouble pivotValue=0.0;
    721   for (CoinBigIndex j = 0; j < number; j ++ ) {
     734#if UNROLL_GATHER == 0
     735  CoinFactorizationDouble pivotValue = 0.0;
     736  for (CoinBigIndex j = 0; j < number; j++) {
    722737    CoinFactorizationDouble value = thisElement[j];
    723738    CoinSimplexInt jRow = thisIndex[j];
     
    733748#define UNROLL_MULTIPLY_INDEXED 0
    734749#define INLINE_MULTIPLY_INDEXED 0
    735 #if INLINE_MULTIPLY_INDEXED==0
     750#if INLINE_MULTIPLY_INDEXED == 0
    736751void CoinAbcMultiplyIndexed(int number,
    737                             const double *  COIN_RESTRICT multiplier,
    738                             const int *  COIN_RESTRICT thisIndex,
    739                             CoinFactorizationDouble * COIN_RESTRICT region);
     752  const double *COIN_RESTRICT multiplier,
     753  const int *COIN_RESTRICT thisIndex,
     754  CoinFactorizationDouble *COIN_RESTRICT region);
    740755void CoinAbcMultiplyIndexed(int number,
    741                             const long double *  COIN_RESTRICT multiplier,
    742                             const int *  COIN_RESTRICT thisIndex,
    743                             long double * COIN_RESTRICT region);
     756  const long double *COIN_RESTRICT multiplier,
     757  const int *COIN_RESTRICT thisIndex,
     758  long double *COIN_RESTRICT region);
    744759#else
    745760void ABC_INLINE inline CoinAbcMultiplyIndexed(int number,
    746                             const double *  COIN_RESTRICT multiplier,
    747                             const int *  COIN_RESTRICT thisIndex,
    748                             CoinFactorizationDouble * COIN_RESTRICT region)
     761  const double *COIN_RESTRICT multiplier,
     762  const int *COIN_RESTRICT thisIndex,
     763  CoinFactorizationDouble *COIN_RESTRICT region)
    749764{
    750765}
    751766#endif
    752 double CoinAbcMaximumAbsElement(const double * region, int size);
    753 void CoinAbcMinMaxAbsElement(const double * region, int size,double & minimum , double & maximum);
    754 void CoinAbcMinMaxAbsNormalValues(const double * region, int size,double & minimum , double & maximum);
    755 void CoinAbcScale(double * region, double multiplier,int size);
    756 void CoinAbcScaleNormalValues(double * region, double multiplier,double killIfLessThanThis,int size);
     767double CoinAbcMaximumAbsElement(const double *region, int size);
     768void CoinAbcMinMaxAbsElement(const double *region, int size, double &minimum, double &maximum);
     769void CoinAbcMinMaxAbsNormalValues(const double *region, int size, double &minimum, double &maximum);
     770void CoinAbcScale(double *region, double multiplier, int size);
     771void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int size);
    757772/// maximum fabs(region[i]) and then region[i]*=multiplier
    758 double CoinAbcMaximumAbsElementAndScale(double * region, double multiplier,int size);
    759 void CoinAbcSetElements(double * region, int size, double value);
    760 void CoinAbcMultiplyAdd(const double * region1, int size, double multiplier1,
    761                  double * regionChanged, double multiplier2);
    762 double CoinAbcInnerProduct(const double * region1, int size, const double * region2);
    763 void CoinAbcGetNorms(const double * region, int size, double & norm1, double & norm2);
     773double CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int size);
     774void CoinAbcSetElements(double *region, int size, double value);
     775void CoinAbcMultiplyAdd(const double *region1, int size, double multiplier1,
     776  double *regionChanged, double multiplier2);
     777double CoinAbcInnerProduct(const double *region1, int size, const double *region2);
     778void CoinAbcGetNorms(const double *region, int size, double &norm1, double &norm2);
    764779/// regionTo[index[i]]=regionFrom[i]
    765 void CoinAbcScatterTo(const double * regionFrom, double * regionTo, const int * index,int number);
     780void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int number);
    766781/// regionTo[i]=regionFrom[index[i]]
    767 void CoinAbcGatherFrom(const double * regionFrom, double * regionTo, const int * index,int number);
     782void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int number);
    768783/// regionTo[index[i]]=0.0
    769 void CoinAbcScatterZeroTo(double * regionTo, const int * index,int number);
     784void CoinAbcScatterZeroTo(double *regionTo, const int *index, int number);
    770785/// regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
    771 void CoinAbcScatterToList(const double * regionFrom, double * regionTo,
    772                    const int * indexList, const int * indexScatter ,int number);
     786void CoinAbcScatterToList(const double *regionFrom, double *regionTo,
     787  const int *indexList, const int *indexScatter, int number);
    773788/// array[i]=1.0/sqrt(array[i])
    774 void CoinAbcInverseSqrts(double * array, int n);
    775 void CoinAbcReciprocal(double * array, int n, const double *input);
    776 void CoinAbcMemcpyLong(double * array,const double * arrayFrom,int size);
    777 void CoinAbcMemcpyLong(int * array,const int * arrayFrom,int size);
    778 void CoinAbcMemcpyLong(unsigned char * array,const unsigned char * arrayFrom,int size);
    779 void CoinAbcMemset0Long(double * array,int size);
    780 void CoinAbcMemset0Long(int * array,int size);
    781 void CoinAbcMemset0Long(unsigned char * array,int size);
    782 void CoinAbcMemmove(double * array,const double * arrayFrom,int size);
    783 void CoinAbcMemmove(int * array,const int * arrayFrom,int size);
    784 void CoinAbcMemmove(unsigned char * array,const unsigned char * arrayFrom,int size);
     789void CoinAbcInverseSqrts(double *array, int n);
     790void CoinAbcReciprocal(double *array, int n, const double *input);
     791void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size);
     792void CoinAbcMemcpyLong(int *array, const int *arrayFrom, int size);
     793void CoinAbcMemcpyLong(unsigned char *array, const unsigned char *arrayFrom, int size);
     794void CoinAbcMemset0Long(double *array, int size);
     795void CoinAbcMemset0Long(int *array, int size);
     796void CoinAbcMemset0Long(unsigned char *array, int size);
     797void CoinAbcMemmove(double *array, const double *arrayFrom, int size);
     798void CoinAbcMemmove(int *array, const int *arrayFrom, int size);
     799void CoinAbcMemmove(unsigned char *array, const unsigned char *arrayFrom, int size);
    785800/// This moves down and zeroes out end
    786 void CoinAbcMemmoveAndZero(double * array,double * arrayFrom,int size);
     801void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size);
    787802/// This compacts several sections and zeroes out end (returns number)
    788 int CoinAbcCompact(int numberSections,int alreadyDone,double * array,const int * starts, const int * lengths);
     803int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths);
    789804/// This compacts several sections (returns number)
    790 int CoinAbcCompact(int numberSections,int alreadyDone,int * array,const int * starts, const int * lengths);
     805int CoinAbcCompact(int numberSections, int alreadyDone, int *array, const int *starts, const int *lengths);
    791806#endif
    792807#if ABC_CREATE_SCATTER_FUNCTION
    793808SCATTER_ATTRIBUTE void functionName(ScatterUpdate1)(int numberIn, CoinFactorizationDouble multiplier,
    794                           const CoinFactorizationDouble *  COIN_RESTRICT element,
    795                           CoinFactorizationDouble * COIN_RESTRICT region)
     809  const CoinFactorizationDouble *COIN_RESTRICT element,
     810  CoinFactorizationDouble *COIN_RESTRICT region)
    796811{
    797812#ifndef NDEBUG
    798   assert (numberIn==1);
    799 #endif
    800   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+1);
    801   int iColumn0=thisColumn[0];
    802   double value0=region[iColumn0];
    803   value0 OPERATION multiplier*element[0];
    804   region[iColumn0]=value0;
     813  assert(numberIn == 1);
     814#endif
     815  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 1);
     816  int iColumn0 = thisColumn[0];
     817  double value0 = region[iColumn0];
     818  value0 OPERATION multiplier *element[0];
     819  region[iColumn0] = value0;
    805820}
    806821SCATTER_ATTRIBUTE void functionName(ScatterUpdate2)(int numberIn, CoinFactorizationDouble multiplier,
    807                           const CoinFactorizationDouble *  COIN_RESTRICT element,
    808                           CoinFactorizationDouble * COIN_RESTRICT region)
     822  const CoinFactorizationDouble *COIN_RESTRICT element,
     823  CoinFactorizationDouble *COIN_RESTRICT region)
    809824{
    810825#ifndef NDEBUG
    811   assert (numberIn==2);
    812 #endif
    813   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+2);
    814 #if NEW_CHUNK_SIZE==2
    815   int nFull=2&(~(NEW_CHUNK_SIZE-1));
    816   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    817     coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
    818     int iColumn0=thisColumn[0];
    819     int iColumn1=thisColumn[1];
    820     CoinFactorizationDouble value0=region[iColumn0];
    821     CoinFactorizationDouble value1=region[iColumn1];
    822     value0 OPERATION multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
    823     value1 OPERATION multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
    824     region[iColumn0]=value0;
    825     region[iColumn1]=value1;
    826     element+=NEW_CHUNK_SIZE_INCREMENT;
    827     thisColumn = reinterpret_cast<const int *>(element);
    828   }
    829 #endif
    830 #if NEW_CHUNK_SIZE==4
    831   int iColumn0=thisColumn[0];
    832   int iColumn1=thisColumn[1];
    833   CoinFactorizationDouble value0=region[iColumn0];
    834   CoinFactorizationDouble value1=region[iColumn1];
    835   value0 OPERATION multiplier*element[0];
    836   value1 OPERATION multiplier*element[1];
    837   region[iColumn0]=value0;
    838   region[iColumn1]=value1;
     826  assert(numberIn == 2);
     827#endif
     828  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 2);
     829#if NEW_CHUNK_SIZE == 2
     830  int nFull = 2 & (~(NEW_CHUNK_SIZE - 1));
     831  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
     832    coin_prefetch(element + NEW_CHUNK_SIZE_INCREMENT);
     833    int iColumn0 = thisColumn[0];
     834    int iColumn1 = thisColumn[1];
     835    CoinFactorizationDouble value0 = region[iColumn0];
     836    CoinFactorizationDouble value1 = region[iColumn1];
     837    value0 OPERATION multiplier *element[0 + NEW_CHUNK_SIZE_OFFSET];
     838    value1 OPERATION multiplier *element[1 + NEW_CHUNK_SIZE_OFFSET];
     839    region[iColumn0] = value0;
     840    region[iColumn1] = value1;
     841    element += NEW_CHUNK_SIZE_INCREMENT;
     842    thisColumn = reinterpret_cast< const int * >(element);
     843  }
     844#endif
     845#if NEW_CHUNK_SIZE == 4
     846  int iColumn0 = thisColumn[0];
     847  int iColumn1 = thisColumn[1];
     848  CoinFactorizationDouble value0 = region[iColumn0];
     849  CoinFactorizationDouble value1 = region[iColumn1];
     850  value0 OPERATION multiplier *element[0];
     851  value1 OPERATION multiplier *element[1];
     852  region[iColumn0] = value0;
     853  region[iColumn1] = value1;
    839854#endif
    840855}
    841856SCATTER_ATTRIBUTE void functionName(ScatterUpdate3)(int numberIn, CoinFactorizationDouble multiplier,
    842                           const CoinFactorizationDouble *  COIN_RESTRICT element,
    843                           CoinFactorizationDouble * COIN_RESTRICT region)
     857  const CoinFactorizationDouble *COIN_RESTRICT element,
     858  CoinFactorizationDouble *COIN_RESTRICT region)
    844859{
    845860#ifndef NDEBUG
    846   assert (numberIn==3);
    847 #endif
    848   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+3);
    849 #if AVX2==1
     861  assert(numberIn == 3);
     862#endif
     863  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 3);
     864#if AVX2 == 1
    850865  double temp[2];
    851866#endif
    852 #if NEW_CHUNK_SIZE==2
    853   int nFull=3&(~(NEW_CHUNK_SIZE-1));
    854   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
     867#if NEW_CHUNK_SIZE == 2
     868  int nFull = 3 & (~(NEW_CHUNK_SIZE - 1));
     869  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
    855870    //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
    856     int iColumn0=thisColumn[0];
    857     int iColumn1=thisColumn[1];
    858     CoinFactorizationDouble value0=region[iColumn0];
    859     CoinFactorizationDouble value1=region[iColumn1];
    860     value0 OPERATION multiplier*element[0];
    861     value1 OPERATION multiplier*element[1];
    862     region[iColumn0]=value0;
    863     region[iColumn1]=value1;
    864     element+=NEW_CHUNK_SIZE;
    865     thisColumn+ = NEW_CHUNK_SIZE;
    866   }
    867 #endif
    868 #if NEW_CHUNK_SIZE==2
    869   int iColumn0=thisColumn[0];
    870   double value0=region[iColumn0];
    871   value0 OPERATION multiplier*element[0];
    872   region[iColumn0]=value0;
    873 #else
    874   int iColumn0=thisColumn[0];
    875   int iColumn1=thisColumn[1];
    876   int iColumn2=thisColumn[2];
    877 #if AVX2==1
     871    int iColumn0 = thisColumn[0];
     872    int iColumn1 = thisColumn[1];
     873    CoinFactorizationDouble value0 = region[iColumn0];
     874    CoinFactorizationDouble value1 = region[iColumn1];
     875    value0 OPERATION multiplier *element[0];
     876    value1 OPERATION multiplier *element[1];
     877    region[iColumn0] = value0;
     878    region[iColumn1] = value1;
     879    element += NEW_CHUNK_SIZE;
     880    thisColumn + = NEW_CHUNK_SIZE;
     881  }
     882#endif
     883#if NEW_CHUNK_SIZE == 2
     884  int iColumn0 = thisColumn[0];
     885  double value0 = region[iColumn0];
     886  value0 OPERATION multiplier *element[0];
     887  region[iColumn0] = value0;
     888#else
     889  int iColumn0 = thisColumn[0];
     890  int iColumn1 = thisColumn[1];
     891  int iColumn2 = thisColumn[2];
     892#if AVX2 == 1
    878893  __v2df bb;
    879   double value2=region[iColumn2];
    880   value2 OPERATION multiplier*element[2];
    881   set_const_v2df(bb,multiplier);
    882   temp[0]=region[iColumn0];
    883   temp[1]=region[iColumn1];
    884   region[iColumn2]=value2;
    885   __v2df v0 = __builtin_ia32_loadupd (temp);
    886   __v2df a = __builtin_ia32_loadupd (element);
     894  double value2 = region[iColumn2];
     895  value2 OPERATION multiplier *element[2];
     896  set_const_v2df(bb, multiplier);
     897  temp[0] = region[iColumn0];
     898  temp[1] = region[iColumn1];
     899  region[iColumn2] = value2;
     900  __v2df v0 = __builtin_ia32_loadupd(temp);
     901  __v2df a = __builtin_ia32_loadupd(element);
    887902  a *= bb;
    888903  v0 OPERATION a;
    889   __builtin_ia32_storeupd (temp, v0);
    890   region[iColumn0]=temp[0];
    891   region[iColumn1]=temp[1];
    892 #else
    893   double value0=region[iColumn0];
    894   double value1=region[iColumn1];
    895   double value2=region[iColumn2];
    896   value0 OPERATION multiplier*element[0];
    897   value1 OPERATION multiplier*element[1];
    898   value2 OPERATION multiplier*element[2];
    899   region[iColumn0]=value0;
    900   region[iColumn1]=value1;
    901   region[iColumn2]=value2;
     904  __builtin_ia32_storeupd(temp, v0);
     905  region[iColumn0] = temp[0];
     906  region[iColumn1] = temp[1];
     907#else
     908  double value0 = region[iColumn0];
     909  double value1 = region[iColumn1];
     910  double value2 = region[iColumn2];
     911  value0 OPERATION multiplier *element[0];
     912  value1 OPERATION multiplier *element[1];
     913  value2 OPERATION multiplier *element[2];
     914  region[iColumn0] = value0;
     915  region[iColumn1] = value1;
     916  region[iColumn2] = value2;
    902917#endif
    903918#endif
    904919}
    905920SCATTER_ATTRIBUTE void functionName(ScatterUpdate4)(int numberIn, CoinFactorizationDouble multiplier,
    906                            const CoinFactorizationDouble *  COIN_RESTRICT element,
    907                            CoinFactorizationDouble * COIN_RESTRICT region)
     921  const CoinFactorizationDouble *COIN_RESTRICT element,
     922  CoinFactorizationDouble *COIN_RESTRICT region)
    908923{
    909924#ifndef NDEBUG
    910   assert (numberIn==4);
    911 #endif
    912   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+4);
    913   int nFull=4&(~(NEW_CHUNK_SIZE-1));
    914 #if AVX2==1
     925  assert(numberIn == 4);
     926#endif
     927  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 4);
     928  int nFull = 4 & (~(NEW_CHUNK_SIZE - 1));
     929#if AVX2 == 1
    915930  double temp[4];
    916931#endif
    917   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
     932  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
    918933    //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
    919 #if NEW_CHUNK_SIZE==2
    920     int iColumn0=thisColumn[0];
    921     int iColumn1=thisColumn[1];
    922     double value0=region[iColumn0];
    923     double value1=region[iColumn1];
    924     value0 OPERATION multiplier*element[0];
    925     value1 OPERATION multiplier*element[1];
    926     region[iColumn0]=value0;
    927     region[iColumn1]=value1;
    928 #elif NEW_CHUNK_SIZE==4
    929     int iColumn0=thisColumn[0];
    930     int iColumn1=thisColumn[1];
    931     int iColumn2=thisColumn[2];
    932     int iColumn3=thisColumn[3];
    933 #if AVX2==1
     934#if NEW_CHUNK_SIZE == 2
     935    int iColumn0 = thisColumn[0];
     936    int iColumn1 = thisColumn[1];
     937    double value0 = region[iColumn0];
     938    double value1 = region[iColumn1];
     939    value0 OPERATION multiplier *element[0];
     940    value1 OPERATION multiplier *element[1];
     941    region[iColumn0] = value0;
     942    region[iColumn1] = value1;
     943#elif NEW_CHUNK_SIZE == 4
     944    int iColumn0 = thisColumn[0];
     945    int iColumn1 = thisColumn[1];
     946    int iColumn2 = thisColumn[2];
     947    int iColumn3 = thisColumn[3];
     948#if AVX2 == 1
    934949    __v2df bb;
    935     set_const_v2df(bb,multiplier);
    936     temp[0]=region[iColumn0];
    937     temp[1]=region[iColumn1];
    938     temp[2]=region[iColumn2];
    939     temp[3]=region[iColumn3];
    940     __v2df v0 = __builtin_ia32_loadupd (temp);
    941     __v2df v1 = __builtin_ia32_loadupd (temp+2);
    942     __v2df a = __builtin_ia32_loadupd (element);
     950    set_const_v2df(bb, multiplier);
     951    temp[0] = region[iColumn0];
     952    temp[1] = region[iColumn1];
     953    temp[2] = region[iColumn2];
     954    temp[3] = region[iColumn3];
     955    __v2df v0 = __builtin_ia32_loadupd(temp);
     956    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
     957    __v2df a = __builtin_ia32_loadupd(element);
    943958    a *= bb;
    944959    v0 OPERATION a;
    945     a = __builtin_ia32_loadupd (element+2);
     960    a = __builtin_ia32_loadupd(element + 2);
    946961    a *= bb;
    947962    v1 OPERATION a;
    948     __builtin_ia32_storeupd (temp, v0);
    949     __builtin_ia32_storeupd (temp+2, v1);
    950     region[iColumn0]=temp[0];
    951     region[iColumn1]=temp[1];
    952     region[iColumn2]=temp[2];
    953     region[iColumn3]=temp[3];
    954 #else
    955     double value0=region[iColumn0];
    956     double value1=region[iColumn1];
    957     double value2=region[iColumn2];
    958     double value3=region[iColumn3];
    959     value0 OPERATION multiplier*element[0];
    960     value1 OPERATION multiplier*element[1];
    961     value2 OPERATION multiplier*element[2];
    962     value3 OPERATION multiplier*element[3];
    963     region[iColumn0]=value0;
    964     region[iColumn1]=value1;
    965     region[iColumn2]=value2;
    966     region[iColumn3]=value3;
     963    __builtin_ia32_storeupd(temp, v0);
     964    __builtin_ia32_storeupd(temp + 2, v1);
     965    region[iColumn0] = temp[0];
     966    region[iColumn1] = temp[1];
     967    region[iColumn2] = temp[2];
     968    region[iColumn3] = temp[3];
     969#else
     970    double value0 = region[iColumn0];
     971    double value1 = region[iColumn1];
     972    double value2 = region[iColumn2];
     973    double value3 = region[iColumn3];
     974    value0 OPERATION multiplier *element[0];
     975    value1 OPERATION multiplier *element[1];
     976    value2 OPERATION multiplier *element[2];
     977    value3 OPERATION multiplier *element[3];
     978    region[iColumn0] = value0;
     979    region[iColumn1] = value1;
     980    region[iColumn2] = value2;
     981    region[iColumn3] = value3;
    967982#endif
    968983#else
    969984    abort();
    970985#endif
    971     element+=NEW_CHUNK_SIZE;
     986    element += NEW_CHUNK_SIZE;
    972987    thisColumn += NEW_CHUNK_SIZE;
    973988  }
    974989}
    975990SCATTER_ATTRIBUTE void functionName(ScatterUpdate5)(int numberIn, CoinFactorizationDouble multiplier,
    976                            const CoinFactorizationDouble *  COIN_RESTRICT element,
    977                            CoinFactorizationDouble * COIN_RESTRICT region)
     991  const CoinFactorizationDouble *COIN_RESTRICT element,
     992  CoinFactorizationDouble *COIN_RESTRICT region)
    978993{
    979994#ifndef NDEBUG
    980   assert (numberIn==5);
    981 #endif
    982   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+5);
    983   int nFull=5&(~(NEW_CHUNK_SIZE-1));
    984 #if AVX2==1
     995  assert(numberIn == 5);
     996#endif
     997  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 5);
     998  int nFull = 5 & (~(NEW_CHUNK_SIZE - 1));
     999#if AVX2 == 1
    9851000  double temp[4];
    9861001#endif
    987   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
     1002  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
    9881003    //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
    989 #if NEW_CHUNK_SIZE==2
    990     int iColumn0=thisColumn[0];
    991     int iColumn1=thisColumn[1];
    992     double value0=region[iColumn0];
    993     double value1=region[iColumn1];
    994     value0 OPERATION multiplier*element[0];
    995     value1 OPERATION multiplier*element[1];
    996     region[iColumn0]=value0;
    997     region[iColumn1]=value1;
    998 #elif NEW_CHUNK_SIZE==4
    999     int iColumn0=thisColumn[0];
    1000     int iColumn1=thisColumn[1];
    1001     int iColumn2=thisColumn[2];
    1002     int iColumn3=thisColumn[3];
    1003 #if AVX2==1
     1004#if NEW_CHUNK_SIZE == 2
     1005    int iColumn0 = thisColumn[0];
     1006    int iColumn1 = thisColumn[1];
     1007    double value0 = region[iColumn0];
     1008    double value1 = region[iColumn1];
     1009    value0 OPERATION multiplier *element[0];
     1010    value1 OPERATION multiplier *element[1];
     1011    region[iColumn0] = value0;
     1012    region[iColumn1] = value1;
     1013#elif NEW_CHUNK_SIZE == 4
     1014    int iColumn0 = thisColumn[0];
     1015    int iColumn1 = thisColumn[1];
     1016    int iColumn2 = thisColumn[2];
     1017    int iColumn3 = thisColumn[3];
     1018#if AVX2 == 1
    10041019    __v2df bb;
    1005     set_const_v2df(bb,multiplier);
    1006     temp[0]=region[iColumn0];
    1007     temp[1]=region[iColumn1];
    1008     temp[2]=region[iColumn2];
    1009     temp[3]=region[iColumn3];
    1010     __v2df v0 = __builtin_ia32_loadupd (temp);
    1011     __v2df v1 = __builtin_ia32_loadupd (temp+2);
    1012     __v2df a = __builtin_ia32_loadupd (element);
     1020    set_const_v2df(bb, multiplier);
     1021    temp[0] = region[iColumn0];
     1022    temp[1] = region[iColumn1];
     1023    temp[2] = region[iColumn2];
     1024    temp[3] = region[iColumn3];
     1025    __v2df v0 = __builtin_ia32_loadupd(temp);
     1026    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
     1027    __v2df a = __builtin_ia32_loadupd(element);
    10131028    a *= bb;
    10141029    v0 OPERATION a;
    1015     a = __builtin_ia32_loadupd (element+2);
     1030    a = __builtin_ia32_loadupd(element + 2);
    10161031    a *= bb;
    10171032    v1 OPERATION a;
    1018     __builtin_ia32_storeupd (temp, v0);
    1019     __builtin_ia32_storeupd (temp+2, v1);
    1020     region[iColumn0]=temp[0];
    1021     region[iColumn1]=temp[1];
    1022     region[iColumn2]=temp[2];
    1023     region[iColumn3]=temp[3];
    1024 #else
    1025     double value0=region[iColumn0];
    1026     double value1=region[iColumn1];
    1027     double value2=region[iColumn2];
    1028     double value3=region[iColumn3];
    1029     value0 OPERATION multiplier*element[0];
    1030     value1 OPERATION multiplier*element[1];
    1031     value2 OPERATION multiplier*element[2];
    1032     value3 OPERATION multiplier*element[3];
    1033     region[iColumn0]=value0;
    1034     region[iColumn1]=value1;
    1035     region[iColumn2]=value2;
    1036     region[iColumn3]=value3;
     1033    __builtin_ia32_storeupd(temp, v0);
     1034    __builtin_ia32_storeupd(temp + 2, v1);
     1035    region[iColumn0] = temp[0];
     1036    region[iColumn1] = temp[1];
     1037    region[iColumn2] = temp[2];
     1038    region[iColumn3] = temp[3];
     1039#else
     1040    double value0 = region[iColumn0];
     1041    double value1 = region[iColumn1];
     1042    double value2 = region[iColumn2];
     1043    double value3 = region[iColumn3];
     1044    value0 OPERATION multiplier *element[0];
     1045    value1 OPERATION multiplier *element[1];
     1046    value2 OPERATION multiplier *element[2];
     1047    value3 OPERATION multiplier *element[3];
     1048    region[iColumn0] = value0;
     1049    region[iColumn1] = value1;
     1050    region[iColumn2] = value2;
     1051    region[iColumn3] = value3;
    10371052#endif
    10381053#else
    10391054    abort();
    10401055#endif
    1041     element+=NEW_CHUNK_SIZE;
     1056    element += NEW_CHUNK_SIZE;
    10421057    thisColumn += NEW_CHUNK_SIZE;
    10431058  }
    1044   int iColumn0=thisColumn[0];
    1045   double value0=region[iColumn0];
    1046   value0 OPERATION multiplier*element[0];
    1047   region[iColumn0]=value0;
     1059  int iColumn0 = thisColumn[0];
     1060  double value0 = region[iColumn0];
     1061  value0 OPERATION multiplier *element[0];
     1062  region[iColumn0] = value0;
    10481063}
    10491064SCATTER_ATTRIBUTE void functionName(ScatterUpdate6)(int numberIn, CoinFactorizationDouble multiplier,
    1050                            const CoinFactorizationDouble *  COIN_RESTRICT element,
    1051                            CoinFactorizationDouble * COIN_RESTRICT region)
     1065  const CoinFactorizationDouble *COIN_RESTRICT element,
     1066  CoinFactorizationDouble *COIN_RESTRICT region)
    10521067{
    10531068#ifndef NDEBUG
    1054   assert (numberIn==6);
    1055 #endif
    1056   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+6);
    1057   int nFull=6&(~(NEW_CHUNK_SIZE-1));
    1058 #if AVX2==1
     1069  assert(numberIn == 6);
     1070#endif
     1071  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 6);
     1072  int nFull = 6 & (~(NEW_CHUNK_SIZE - 1));
     1073#if AVX2 == 1
    10591074  double temp[4];
    10601075#endif
    1061   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    1062     coin_prefetch_const(element+6);
    1063 #if NEW_CHUNK_SIZE==2
    1064     int iColumn0=thisColumn[0];
    1065     int iColumn1=thisColumn[1];
    1066     double value0=region[iColumn0];
    1067     double value1=region[iColumn1];
    1068     value0 OPERATION multiplier*element[0];
    1069     value1 OPERATION multiplier*element[1];
    1070     region[iColumn0]=value0;
    1071     region[iColumn1]=value1;
    1072 #elif NEW_CHUNK_SIZE==4
    1073     int iColumn0=thisColumn[0];
    1074     int iColumn1=thisColumn[1];
    1075     int iColumn2=thisColumn[2];
    1076     int iColumn3=thisColumn[3];
    1077 #if AVX2==1
     1076  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
     1077    coin_prefetch_const(element + 6);
     1078#if NEW_CHUNK_SIZE == 2
     1079    int iColumn0 = thisColumn[0];
     1080    int iColumn1 = thisColumn[1];
     1081    double value0 = region[iColumn0];
     1082    double value1 = region[iColumn1];
     1083    value0 OPERATION multiplier *element[0];
     1084    value1 OPERATION multiplier *element[1];
     1085    region[iColumn0] = value0;
     1086    region[iColumn1] = value1;
     1087#elif NEW_CHUNK_SIZE == 4
     1088    int iColumn0 = thisColumn[0];
     1089    int iColumn1 = thisColumn[1];
     1090    int iColumn2 = thisColumn[2];
     1091    int iColumn3 = thisColumn[3];
     1092#if AVX2 == 1
    10781093    __v2df bb;
    1079     set_const_v2df(bb,multiplier);
    1080     temp[0]=region[iColumn0];
    1081     temp[1]=region[iColumn1];
    1082     temp[2]=region[iColumn2];
    1083     temp[3]=region[iColumn3];
    1084     __v2df v0 = __builtin_ia32_loadupd (temp);
    1085     __v2df v1 = __builtin_ia32_loadupd (temp+2);
    1086     __v2df a = __builtin_ia32_loadupd (element);
     1094    set_const_v2df(bb, multiplier);
     1095    temp[0] = region[iColumn0];
     1096    temp[1] = region[iColumn1];
     1097    temp[2] = region[iColumn2];
     1098    temp[3] = region[iColumn3];
     1099    __v2df v0 = __builtin_ia32_loadupd(temp);
     1100    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
     1101    __v2df a = __builtin_ia32_loadupd(element);
    10871102    a *= bb;
    10881103    v0 OPERATION a;
    1089     a = __builtin_ia32_loadupd (element+2);
     1104    a = __builtin_ia32_loadupd(element + 2);
    10901105    a *= bb;
    10911106    v1 OPERATION a;
    1092     __builtin_ia32_storeupd (temp, v0);
    1093     __builtin_ia32_storeupd (temp+2, v1);
    1094     region[iColumn0]=temp[0];
    1095     region[iColumn1]=temp[1];
    1096     region[iColumn2]=temp[2];
    1097     region[iColumn3]=temp[3];
    1098 #else
    1099     double value0=region[iColumn0];
    1100     double value1=region[iColumn1];
    1101     double value2=region[iColumn2];
    1102     double value3=region[iColumn3];
    1103     value0 OPERATION multiplier*element[0];
    1104     value1 OPERATION multiplier*element[1];
    1105     value2 OPERATION multiplier*element[2];
    1106     value3 OPERATION multiplier*element[3];
    1107     region[iColumn0]=value0;
    1108     region[iColumn1]=value1;
    1109     region[iColumn2]=value2;
    1110     region[iColumn3]=value3;
     1107    __builtin_ia32_storeupd(temp, v0);
     1108    __builtin_ia32_storeupd(temp + 2, v1);
     1109    region[iColumn0] = temp[0];
     1110    region[iColumn1] = temp[1];
     1111    region[iColumn2] = temp[2];
     1112    region[iColumn3] = temp[3];
     1113#else
     1114    double value0 = region[iColumn0];
     1115    double value1 = region[iColumn1];
     1116    double value2 = region[iColumn2];
     1117    double value3 = region[iColumn3];
     1118    value0 OPERATION multiplier *element[0];
     1119    value1 OPERATION multiplier *element[1];
     1120    value2 OPERATION multiplier *element[2];
     1121    value3 OPERATION multiplier *element[3];
     1122    region[iColumn0] = value0;
     1123    region[iColumn1] = value1;
     1124    region[iColumn2] = value2;
     1125    region[iColumn3] = value3;
    11111126#endif
    11121127#else
    11131128    abort();
    11141129#endif
    1115     element+=NEW_CHUNK_SIZE;
     1130    element += NEW_CHUNK_SIZE;
    11161131    thisColumn += NEW_CHUNK_SIZE;
    11171132  }
    1118 #if NEW_CHUNK_SIZE==4
    1119   int iColumn0=thisColumn[0];
    1120   int iColumn1=thisColumn[1];
    1121   double value0=region[iColumn0];
    1122   double value1=region[iColumn1];
    1123   value0 OPERATION multiplier*element[0];
    1124   value1 OPERATION multiplier*element[1];
    1125   region[iColumn0]=value0;
    1126   region[iColumn1]=value1;
     1133#if NEW_CHUNK_SIZE == 4
     1134  int iColumn0 = thisColumn[0];
     1135  int iColumn1 = thisColumn[1];
     1136  double value0 = region[iColumn0];
     1137  double value1 = region[iColumn1];
     1138  value0 OPERATION multiplier *element[0];
     1139  value1 OPERATION multiplier *element[1];
     1140  region[iColumn0] = value0;
     1141  region[iColumn1] = value1;
    11271142#endif
    11281143}
    11291144SCATTER_ATTRIBUTE void functionName(ScatterUpdate7)(int numberIn, CoinFactorizationDouble multiplier,
    1130                            const CoinFactorizationDouble *  COIN_RESTRICT element,
    1131                            CoinFactorizationDouble * COIN_RESTRICT region)
     1145  const CoinFactorizationDouble *COIN_RESTRICT element,
     1146  CoinFactorizationDouble *COIN_RESTRICT region)
    11321147{
    11331148#ifndef NDEBUG
    1134   assert (numberIn==7);
    1135 #endif
    1136   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+7);
    1137   int nFull=7&(~(NEW_CHUNK_SIZE-1));
    1138 #if AVX2==1
     1149  assert(numberIn == 7);
     1150#endif
     1151  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 7);
     1152  int nFull = 7 & (~(NEW_CHUNK_SIZE - 1));
     1153#if AVX2 == 1
    11391154  double temp[4];
    11401155#endif
    1141   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    1142     coin_prefetch_const(element+6);
    1143 #if NEW_CHUNK_SIZE==2
    1144     int iColumn0=thisColumn[0];
    1145     int iColumn1=thisColumn[1];
    1146     double value0=region[iColumn0];
    1147     double value1=region[iColumn1];
    1148     value0 OPERATION multiplier*element[0];
    1149     value1 OPERATION multiplier*element[1];
    1150     region[iColumn0]=value0;
    1151     region[iColumn1]=value1;
    1152 #elif NEW_CHUNK_SIZE==4
    1153     int iColumn0=thisColumn[0];
    1154     int iColumn1=thisColumn[1];
    1155     int iColumn2=thisColumn[2];
    1156     int iColumn3=thisColumn[3];
    1157 #if AVX2==1
     1156  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
     1157    coin_prefetch_const(element + 6);
     1158#if NEW_CHUNK_SIZE == 2
     1159    int iColumn0 = thisColumn[0];
     1160    int iColumn1 = thisColumn[1];
     1161    double value0 = region[iColumn0];
     1162    double value1 = region[iColumn1];
     1163    value0 OPERATION multiplier *element[0];
     1164    value1 OPERATION multiplier *element[1];
     1165    region[iColumn0] = value0;
     1166    region[iColumn1] = value1;
     1167#elif NEW_CHUNK_SIZE == 4
     1168    int iColumn0 = thisColumn[0];
     1169    int iColumn1 = thisColumn[1];
     1170    int iColumn2 = thisColumn[2];
     1171    int iColumn3 = thisColumn[3];
     1172#if AVX2 == 1
    11581173    __v2df bb;
    1159     set_const_v2df(bb,multiplier);
    1160     temp[0]=region[iColumn0];
    1161     temp[1]=region[iColumn1];
    1162     temp[2]=region[iColumn2];
    1163     temp[3]=region[iColumn3];
    1164     __v2df v0 = __builtin_ia32_loadupd (temp);
    1165     __v2df v1 = __builtin_ia32_loadupd (temp+2);
    1166     __v2df a = __builtin_ia32_loadupd (element);
     1174    set_const_v2df(bb, multiplier);
     1175    temp[0] = region[iColumn0];
     1176    temp[1] = region[iColumn1];
     1177    temp[2] = region[iColumn2];
     1178    temp[3] = region[iColumn3];
     1179    __v2df v0 = __builtin_ia32_loadupd(temp);
     1180    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
     1181    __v2df a = __builtin_ia32_loadupd(element);
    11671182    a *= bb;
    11681183    v0 OPERATION a;
    1169     a = __builtin_ia32_loadupd (element+2);
     1184    a = __builtin_ia32_loadupd(element + 2);
    11701185    a *= bb;
    11711186    v1 OPERATION a;
    1172     __builtin_ia32_storeupd (temp, v0);
    1173     __builtin_ia32_storeupd (temp+2, v1);
    1174     region[iColumn0]=temp[0];
    1175     region[iColumn1]=temp[1];
    1176     region[iColumn2]=temp[2];
    1177     region[iColumn3]=temp[3];
    1178 #else
    1179     double value0=region[iColumn0];
    1180     double value1=region[iColumn1];
    1181     double value2=region[iColumn2];
    1182     double value3=region[iColumn3];
    1183     value0 OPERATION multiplier*element[0];
    1184     value1 OPERATION multiplier*element[1];
    1185     value2 OPERATION multiplier*element[2];
    1186     value3 OPERATION multiplier*element[3];
    1187     region[iColumn0]=value0;
    1188     region[iColumn1]=value1;
    1189     region[iColumn2]=value2;
    1190     region[iColumn3]=value3;
     1187    __builtin_ia32_storeupd(temp, v0);
     1188    __builtin_ia32_storeupd(temp + 2, v1);
     1189    region[iColumn0] = temp[0];
     1190    region[iColumn1] = temp[1];
     1191    region[iColumn2] = temp[2];
     1192    region[iColumn3] = temp[3];
     1193#else
     1194    double value0 = region[iColumn0];
     1195    double value1 = region[iColumn1];
     1196    double value2 = region[iColumn2];
     1197    double value3 = region[iColumn3];
     1198    value0 OPERATION multiplier *element[0];
     1199    value1 OPERATION multiplier *element[1];
     1200    value2 OPERATION multiplier *element[2];
     1201    value3 OPERATION multiplier *element[3];
     1202    region[iColumn0] = value0;
     1203    region[iColumn1] = value1;
     1204    region[iColumn2] = value2;
     1205    region[iColumn3] = value3;
    11911206#endif
    11921207#else
    11931208    abort();
    11941209#endif
    1195     element+=NEW_CHUNK_SIZE;
     1210    element += NEW_CHUNK_SIZE;
    11961211    thisColumn += NEW_CHUNK_SIZE;
    11971212  }
    1198 #if NEW_CHUNK_SIZE==2
    1199   int iColumn0=thisColumn[0];
    1200   double value0=region[iColumn0];
    1201   value0 OPERATION multiplier*element[0];
    1202   region[iColumn0]=value0;
    1203 #else
    1204   int iColumn0=thisColumn[0];
    1205   int iColumn1=thisColumn[1];
    1206   int iColumn2=thisColumn[2];
    1207   double value0=region[iColumn0];
    1208   double value1=region[iColumn1];
    1209   double value2=region[iColumn2];
    1210   value0 OPERATION multiplier*element[0];
    1211   value1 OPERATION multiplier*element[1];
    1212   value2 OPERATION multiplier*element[2];
    1213   region[iColumn0]=value0;
    1214   region[iColumn1]=value1;
    1215   region[iColumn2]=value2;
     1213#if NEW_CHUNK_SIZE == 2
     1214  int iColumn0 = thisColumn[0];
     1215  double value0 = region[iColumn0];
     1216  value0 OPERATION multiplier *element[0];
     1217  region[iColumn0] = value0;
     1218#else
     1219  int iColumn0 = thisColumn[0];
     1220  int iColumn1 = thisColumn[1];
     1221  int iColumn2 = thisColumn[2];
     1222  double value0 = region[iColumn0];
     1223  double value1 = region[iColumn1];
     1224  double value2 = region[iColumn2];
     1225  value0 OPERATION multiplier *element[0];
     1226  value1 OPERATION multiplier *element[1];
     1227  value2 OPERATION multiplier *element[2];
     1228  region[iColumn0] = value0;
     1229  region[iColumn1] = value1;
     1230  region[iColumn2] = value2;
    12161231#endif
    12171232}
    12181233SCATTER_ATTRIBUTE void functionName(ScatterUpdate8)(int numberIn, CoinFactorizationDouble multiplier,
    1219                            const CoinFactorizationDouble *  COIN_RESTRICT element,
    1220                            CoinFactorizationDouble * COIN_RESTRICT region)
     1234  const CoinFactorizationDouble *COIN_RESTRICT element,
     1235  CoinFactorizationDouble *COIN_RESTRICT region)
    12211236{
    12221237#ifndef NDEBUG
    1223   assert (numberIn==8);
    1224 #endif
    1225   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+8);
    1226   int nFull=8&(~(NEW_CHUNK_SIZE-1));
    1227 #if AVX2==1
     1238  assert(numberIn == 8);
     1239#endif
     1240  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 8);
     1241  int nFull = 8 & (~(NEW_CHUNK_SIZE - 1));
     1242#if AVX2 == 1
    12281243  double temp[4];
    12291244#endif
    1230   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    1231     coin_prefetch_const(element+6);
    1232 #if NEW_CHUNK_SIZE==2
    1233     int iColumn0=thisColumn[0];
    1234     int iColumn1=thisColumn[1];
    1235     double value0=region[iColumn0];
    1236     double value1=region[iColumn1];
    1237     value0 OPERATION multiplier*element[0];
    1238     value1 OPERATION multiplier*element[1];
    1239     region[iColumn0]=value0;
    1240     region[iColumn1]=value1;
    1241 #elif NEW_CHUNK_SIZE==4
    1242     int iColumn0=thisColumn[0];
    1243     int iColumn1=thisColumn[1];
    1244     int iColumn2=thisColumn[2];
    1245     int iColumn3=thisColumn[3];
    1246 #if AVX2==1
     1245  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
     1246    coin_prefetch_const(element + 6);
     1247#if NEW_CHUNK_SIZE == 2
     1248    int iColumn0 = thisColumn[0];
     1249    int iColumn1 = thisColumn[1];
     1250    double value0 = region[iColumn0];
     1251    double value1 = region[iColumn1];
     1252    value0 OPERATION multiplier *element[0];
     1253    value1 OPERATION multiplier *element[1];
     1254    region[iColumn0] = value0;
     1255    region[iColumn1] = value1;
     1256#elif NEW_CHUNK_SIZE == 4
     1257    int iColumn0 = thisColumn[0];
     1258    int iColumn1 = thisColumn[1];
     1259    int iColumn2 = thisColumn[2];
     1260    int iColumn3 = thisColumn[3];
     1261#if AVX2 == 1
    12471262    __v2df bb;
    1248     set_const_v2df(bb,multiplier);
    1249     temp[0]=region[iColumn0];
    1250     temp[1]=region[iColumn1];
    1251     temp[2]=region[iColumn2];
    1252     temp[3]=region[iColumn3];
    1253     __v2df v0 = __builtin_ia32_loadupd (temp);
    1254     __v2df v1 = __builtin_ia32_loadupd (temp+2);
    1255     __v2df a = __builtin_ia32_loadupd (element);
     1263    set_const_v2df(bb, multiplier);
     1264    temp[0] = region[iColumn0];
     1265    temp[1] = region[iColumn1];
     1266    temp[2] = region[iColumn2];
     1267    temp[3] = region[iColumn3];
     1268    __v2df v0 = __builtin_ia32_loadupd(temp);
     1269    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
     1270    __v2df a = __builtin_ia32_loadupd(element);
    12561271    a *= bb;
    12571272    v0 OPERATION a;
    1258     a = __builtin_ia32_loadupd (element+2);
     1273    a = __builtin_ia32_loadupd(element + 2);
    12591274    a *= bb;
    12601275    v1 OPERATION a;
    1261     __builtin_ia32_storeupd (temp, v0);
    1262     __builtin_ia32_storeupd (temp+2, v1);
    1263     region[iColumn0]=temp[0];
    1264     region[iColumn1]=temp[1];
    1265     region[iColumn2]=temp[2];
    1266     region[iColumn3]=temp[3];
    1267 #else
    1268     double value0=region[iColumn0];
    1269     double value1=region[iColumn1];
    1270     double value2=region[iColumn2];
    1271     double value3=region[iColumn3];
    1272     value0 OPERATION multiplier*element[0];
    1273     value1 OPERATION multiplier*element[1];
    1274     value2 OPERATION multiplier*element[2];
    1275     value3 OPERATION multiplier*element[3];
    1276     region[iColumn0]=value0;
    1277     region[iColumn1]=value1;
    1278     region[iColumn2]=value2;
    1279     region[iColumn3]=value3;
     1276    __builtin_ia32_storeupd(temp, v0);
     1277    __builtin_ia32_storeupd(temp + 2, v1);
     1278    region[iColumn0] = temp[0];
     1279    region[iColumn1] = temp[1];
     1280    region[iColumn2] = temp[2];
     1281    region[iColumn3] = temp[3];
     1282#else
     1283    double value0 = region[iColumn0];
     1284    double value1 = region[iColumn1];
     1285    double value2 = region[iColumn2];
     1286    double value3 = region[iColumn3];
     1287    value0 OPERATION multiplier *element[0];
     1288    value1 OPERATION multiplier *element[1];
     1289    value2 OPERATION multiplier *element[2];
     1290    value3 OPERATION multiplier *element[3];
     1291    region[iColumn0] = value0;
     1292    region[iColumn1] = value1;
     1293    region[iColumn2] = value2;
     1294    region[iColumn3] = value3;
    12801295#endif
    12811296#else
    12821297    abort();
    12831298#endif
    1284     element+=NEW_CHUNK_SIZE;
     1299    element += NEW_CHUNK_SIZE;
    12851300    thisColumn += NEW_CHUNK_SIZE;
    12861301  }
    12871302}
    12881303SCATTER_ATTRIBUTE void functionName(ScatterUpdate4N)(int numberIn, CoinFactorizationDouble multiplier,
    1289                             const CoinFactorizationDouble *  COIN_RESTRICT element,
    1290                             CoinFactorizationDouble * COIN_RESTRICT region)
     1304  const CoinFactorizationDouble *COIN_RESTRICT element,
     1305  CoinFactorizationDouble *COIN_RESTRICT region)
    12911306{
    1292   assert ((numberIn&3)==0);
    1293   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
    1294   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
    1295 #if AVX2==1
     1307  assert((numberIn & 3) == 0);
     1308  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
     1309  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
     1310#if AVX2 == 1
    12961311  double temp[4];
    12971312#endif
    1298   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    1299     coin_prefetch_const(element+16);
    1300     coin_prefetch_const(thisColumn+32);
    1301 #if NEW_CHUNK_SIZE==2
    1302     int iColumn0=thisColumn[0];
    1303     int iColumn1=thisColumn[1];
    1304     double value0=region[iColumn0];
    1305     double value1=region[iColumn1];
    1306     value0 OPERATION multiplier*element[0];
    1307     value1 OPERATION multiplier*element[1];
    1308     region[iColumn0]=value0;
    1309     region[iColumn1]=value1;
    1310 #elif NEW_CHUNK_SIZE==4
    1311     int iColumn0=thisColumn[0];
    1312     int iColumn1=thisColumn[1];
    1313     int iColumn2=thisColumn[2];
    1314     int iColumn3=thisColumn[3];
    1315 #if AVX2==1
     1313  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
     1314    coin_prefetch_const(element + 16);
     1315    coin_prefetch_const(thisColumn + 32);
     1316#if NEW_CHUNK_SIZE == 2
     1317    int iColumn0 = thisColumn[0];
     1318    int iColumn1 = thisColumn[1];
     1319    double value0 = region[iColumn0];
     1320    double value1 = region[iColumn1];
     1321    value0 OPERATION multiplier *element[0];
     1322    value1 OPERATION multiplier *element[1];
     1323    region[iColumn0] = value0;
     1324    region[iColumn1] = value1;
     1325#elif NEW_CHUNK_SIZE == 4
     1326    int iColumn0 = thisColumn[0];
     1327    int iColumn1 = thisColumn[1];
     1328    int iColumn2 = thisColumn[2];
     1329    int iColumn3 = thisColumn[3];
     1330#if AVX2 == 1
    13161331    __v2df bb;
    1317     set_const_v2df(bb,multiplier);
    1318     temp[0]=region[iColumn0];
    1319     temp[1]=region[iColumn1];
    1320     temp[2]=region[iColumn2];
    1321     temp[3]=region[iColumn3];
    1322     __v2df v0 = __builtin_ia32_loadupd (temp);
    1323     __v2df v1 = __builtin_ia32_loadupd (temp+2);
    1324     __v2df a = __builtin_ia32_loadupd (element);
     1332    set_const_v2df(bb, multiplier);
     1333    temp[0] = region[iColumn0];
     1334    temp[1] = region[iColumn1];
     1335    temp[2] = region[iColumn2];
     1336    temp[3] = region[iColumn3];
     1337    __v2df v0 = __builtin_ia32_loadupd(temp);
     1338    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
     1339    __v2df a = __builtin_ia32_loadupd(element);
    13251340    a *= bb;
    13261341    v0 OPERATION a;
    1327     a = __builtin_ia32_loadupd (element+2);
     1342    a = __builtin_ia32_loadupd(element + 2);
    13281343    a *= bb;
    13291344    v1 OPERATION a;
    1330     __builtin_ia32_storeupd (temp, v0);
    1331     __builtin_ia32_storeupd (temp+2, v1);
    1332     region[iColumn0]=temp[0];
    1333     region[iColumn1]=temp[1];
    1334     region[iColumn2]=temp[2];
    1335     region[iColumn3]=temp[3];
    1336 #else
    1337     double value0=region[iColumn0];
    1338     double value1=region[iColumn1];
    1339     double value2=region[iColumn2];
    1340     double value3=region[iColumn3];
    1341     value0 OPERATION multiplier*element[0];
    1342     value1 OPERATION multiplier*element[1];
    1343     value2 OPERATION multiplier*element[2];
    1344     value3 OPERATION multiplier*element[3];
    1345     region[iColumn0]=value0;
    1346     region[iColumn1]=value1;
    1347     region[iColumn2]=value2;
    1348     region[iColumn3]=value3;
     1345    __builtin_ia32_storeupd(temp, v0);
     1346    __builtin_ia32_storeupd(temp + 2, v1);
     1347    region[iColumn0] = temp[0];
     1348    region[iColumn1] = temp[1];
     1349    region[iColumn2] = temp[2];
     1350    region[iColumn3] = temp[3];
     1351#else
     1352    double value0 = region[iColumn0];
     1353    double value1 = region[iColumn1];
     1354    double value2 = region[iColumn2];
     1355    double value3 = region[iColumn3];
     1356    value0 OPERATION multiplier *element[0];
     1357    value1 OPERATION multiplier *element[1];
     1358    value2 OPERATION multiplier *element[2];
     1359    value3 OPERATION multiplier *element[3];
     1360    region[iColumn0] = value0;
     1361    region[iColumn1] = value1;
     1362    region[iColumn2] = value2;
     1363    region[iColumn3] = value3;
    13491364#endif
    13501365#else
    13511366    abort();
    13521367#endif
    1353     element+=NEW_CHUNK_SIZE;
     1368    element += NEW_CHUNK_SIZE;
    13541369    thisColumn += NEW_CHUNK_SIZE;
    13551370  }
    13561371}
    13571372SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus1)(int numberIn, CoinFactorizationDouble multiplier,
    1358                                  const CoinFactorizationDouble *  COIN_RESTRICT element,
    1359                                  CoinFactorizationDouble * COIN_RESTRICT region)
     1373  const CoinFactorizationDouble *COIN_RESTRICT element,
     1374  CoinFactorizationDouble *COIN_RESTRICT region)
    13601375{
    1361   assert ((numberIn&3)==1);
    1362   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
    1363   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
    1364 #if AVX2==1
     1376  assert((numberIn & 3) == 1);
     1377  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
     1378  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
     1379#if AVX2 == 1
    13651380  double temp[4];
    13661381#endif
    1367   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    1368     coin_prefetch_const(element+16);
    1369     coin_prefetch_const(thisColumn+32);
    1370 #if NEW_CHUNK_SIZE==2
    1371     int iColumn0=thisColumn[0];
    1372     int iColumn1=thisColumn[1];
    1373     double value0=region[iColumn0];
    1374     double value1=region[iColumn1];
    1375     value0 OPERATION multiplier*element[0];
    1376     value1 OPERATION multiplier*element[1];
    1377     region[iColumn0]=value0;
    1378     region[iColumn1]=value1;
    1379 #elif NEW_CHUNK_SIZE==4
    1380     int iColumn0=thisColumn[0];
    1381     int iColumn1=thisColumn[1];
    1382     int iColumn2=thisColumn[2];
    1383     int iColumn3=thisColumn[3];
    1384 #if AVX2==1
     1382  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
     1383    coin_prefetch_const(element + 16);
     1384    coin_prefetch_const(thisColumn + 32);
     1385#if NEW_CHUNK_SIZE == 2
     1386    int iColumn0 = thisColumn[0];
     1387    int iColumn1 = thisColumn[1];
     1388    double value0 = region[iColumn0];
     1389    double value1 = region[iColumn1];
     1390    value0 OPERATION multiplier *element[0];
     1391    value1 OPERATION multiplier *element[1];
     1392    region[iColumn0] = value0;
     1393    region[iColumn1] = value1;
     1394#elif NEW_CHUNK_SIZE == 4
     1395    int iColumn0 = thisColumn[0];
     1396    int iColumn1 = thisColumn[1];
     1397    int iColumn2 = thisColumn[2];
     1398    int iColumn3 = thisColumn[3];
     1399#if AVX2 == 1
    13851400    __v2df bb;
    1386     set_const_v2df(bb,multiplier);
    1387     temp[0]=region[iColumn0];
    1388     temp[1]=region[iColumn1];
    1389     temp[2]=region[iColumn2];
    1390     temp[3]=region[iColumn3];
    1391     __v2df v0 = __builtin_ia32_loadupd (temp);
    1392     __v2df v1 = __builtin_ia32_loadupd (temp+2);
    1393     __v2df a = __builtin_ia32_loadupd (element);
     1401    set_const_v2df(bb, multiplier);
     1402    temp[0] = region[iColumn0];
     1403    temp[1] = region[iColumn1];
     1404    temp[2] = region[iColumn2];
     1405    temp[3] = region[iColumn3];
     1406    __v2df v0 = __builtin_ia32_loadupd(temp);
     1407    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
     1408    __v2df a = __builtin_ia32_loadupd(element);
    13941409    a *= bb;
    13951410    v0 OPERATION a;
    1396     a = __builtin_ia32_loadupd (element+2);
     1411    a = __builtin_ia32_loadupd(element + 2);
    13971412    a *= bb;
    13981413    v1 OPERATION a;
    1399     __builtin_ia32_storeupd (temp, v0);
    1400     __builtin_ia32_storeupd (temp+2, v1);
    1401     region[iColumn0]=temp[0];
    1402     region[iColumn1]=temp[1];
    1403     region[iColumn2]=temp[2];
    1404     region[iColumn3]=temp[3];
    1405 #else
    1406     double value0=region[iColumn0];
    1407     double value1=region[iColumn1];
    1408     double value2=region[iColumn2];
    1409     double value3=region[iColumn3];
    1410     value0 OPERATION multiplier*element[0];
    1411     value1 OPERATION multiplier*element[1];
    1412     value2 OPERATION multiplier*element[2];
    1413     value3 OPERATION multiplier*element[3];
    1414     region[iColumn0]=value0;
    1415     region[iColumn1]=value1;
    1416     region[iColumn2]=value2;
    1417     region[iColumn3]=value3;
     1414    __builtin_ia32_storeupd(temp, v0);
     1415    __builtin_ia32_storeupd(temp + 2, v1);
     1416    region[iColumn0] = temp[0];
     1417    region[iColumn1] = temp[1];
     1418    region[iColumn2] = temp[2];
     1419    region[iColumn3] = temp[3];
     1420#else
     1421    double value0 = region[iColumn0];
     1422    double value1 = region[iColumn1];
     1423    double value2 = region[iColumn2];
     1424    double value3 = region[iColumn3];
     1425    value0 OPERATION multiplier *element[0];
     1426    value1 OPERATION multiplier *element[1];
     1427    value2 OPERATION multiplier *element[2];
     1428    value3 OPERATION multiplier *element[3];
     1429    region[iColumn0] = value0;
     1430    region[iColumn1] = value1;
     1431    region[iColumn2] = value2;
     1432    region[iColumn3] = value3;
    14181433#endif
    14191434#else
    14201435    abort();
    14211436#endif
    1422     element+=NEW_CHUNK_SIZE;
     1437    element += NEW_CHUNK_SIZE;
    14231438    thisColumn += NEW_CHUNK_SIZE;
    14241439  }
    1425   int iColumn0=thisColumn[0];
    1426   double value0=region[iColumn0];
    1427   value0 OPERATION multiplier*element[0];
    1428   region[iColumn0]=value0;
     1440  int iColumn0 = thisColumn[0];
     1441  double value0 = region[iColumn0];
     1442  value0 OPERATION multiplier *element[0];
     1443  region[iColumn0] = value0;
    14291444}
    14301445SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus2)(int numberIn, CoinFactorizationDouble multiplier,
    1431                                  const CoinFactorizationDouble *  COIN_RESTRICT element,
    1432                                  CoinFactorizationDouble * COIN_RESTRICT region)
     1446  const CoinFactorizationDouble *COIN_RESTRICT element,
     1447  CoinFactorizationDouble *COIN_RESTRICT region)
    14331448{
    1434   assert ((numberIn&3)==2);
    1435   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
    1436   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
    1437 #if AVX2==1
     1449  assert((numberIn & 3) == 2);
     1450  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
     1451  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
     1452#if AVX2 == 1
    14381453  double temp[4];
    14391454#endif
    1440   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    1441     coin_prefetch_const(element+16);
    1442     coin_prefetch_const(thisColumn+32);
    1443 #if NEW_CHUNK_SIZE==2
    1444     int iColumn0=thisColumn[0];
    1445     int iColumn1=thisColumn[1];
    1446     double value0=region[iColumn0];
    1447     double value1=region[iColumn1];
    1448     value0 OPERATION multiplier*element[0];
    1449     value1 OPERATION multiplier*element[1];
    1450     region[iColumn0]=value0;
    1451     region[iColumn1]=value1;
    1452 #elif NEW_CHUNK_SIZE==4
    1453     int iColumn0=thisColumn[0];
    1454     int iColumn1=thisColumn[1];
    1455     int iColumn2=thisColumn[2];
    1456     int iColumn3=thisColumn[3];
    1457 #if AVX2==1
     1455  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
     1456    coin_prefetch_const(element + 16);
     1457    coin_prefetch_const(thisColumn + 32);
     1458#if NEW_CHUNK_SIZE == 2
     1459    int iColumn0 = thisColumn[0];
     1460    int iColumn1 = thisColumn[1];
     1461    double value0 = region[iColumn0];
     1462    double value1 = region[iColumn1];
     1463    value0 OPERATION multiplier *element[0];
     1464    value1 OPERATION multiplier *element[1];
     1465    region[iColumn0] = value0;
     1466    region[iColumn1] = value1;
     1467#elif NEW_CHUNK_SIZE == 4
     1468    int iColumn0 = thisColumn[0];
     1469    int iColumn1 = thisColumn[1];
     1470    int iColumn2 = thisColumn[2];
     1471    int iColumn3 = thisColumn[3];
     1472#if AVX2 == 1
    14581473    __v2df bb;
    1459     set_const_v2df(bb,multiplier);
    1460     temp[0]=region[iColumn0];
    1461     temp[1]=region[iColumn1];
    1462     temp[2]=region[iColumn2];
    1463     temp[3]=region[iColumn3];
    1464     __v2df v0 = __builtin_ia32_loadupd (temp);
    1465     __v2df v1 = __builtin_ia32_loadupd (temp+2);
    1466     __v2df a = __builtin_ia32_loadupd (element);
     1474    set_const_v2df(bb, multiplier);
     1475    temp[0] = region[iColumn0];
     1476    temp[1] = region[iColumn1];
     1477    temp[2] = region[iColumn2];
     1478    temp[3] = region[iColumn3];
     1479    __v2df v0 = __builtin_ia32_loadupd(temp);
     1480    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
     1481    __v2df a = __builtin_ia32_loadupd(element);
    14671482    a *= bb;
    14681483    v0 OPERATION a;
    1469     a = __builtin_ia32_loadupd (element+2);
     1484    a = __builtin_ia32_loadupd(element + 2);
    14701485    a *= bb;
    14711486    v1 OPERATION a;
    1472     __builtin_ia32_storeupd (temp, v0);
    1473     __builtin_ia32_storeupd (temp+2, v1);
    1474     region[iColumn0]=temp[0];
    1475     region[iColumn1]=temp[1];
    1476     region[iColumn2]=temp[2];
    1477     region[iColumn3]=temp[3];
    1478 #else
    1479     double value0=region[iColumn0];
    1480     double value1=region[iColumn1];
    1481     double value2=region[iColumn2];
    1482     double value3=region[iColumn3];
    1483     value0 OPERATION multiplier*element[0];
    1484     value1 OPERATION multiplier*element[1];
    1485     value2 OPERATION multiplier*element[2];
    1486     value3 OPERATION multiplier*element[3];
    1487     region[iColumn0]=value0;
    1488     region[iColumn1]=value1;
    1489     region[iColumn2]=value2;
    1490     region[iColumn3]=value3;
     1487    __builtin_ia32_storeupd(temp, v0);
     1488    __builtin_ia32_storeupd(temp + 2, v1);
     1489    region[iColumn0] = temp[0];
     1490    region[iColumn1] = temp[1];
     1491    region[iColumn2] = temp[2];
     1492    region[iColumn3] = temp[3];
     1493#else
     1494    double value0 = region[iColumn0];
     1495    double value1 = region[iColumn1];
     1496    double value2 = region[iColumn2];
     1497    double value3 = region[iColumn3];
     1498    value0 OPERATION multiplier *element[0];
     1499    value1 OPERATION multiplier *element[1];
     1500    value2 OPERATION multiplier *element[2];
     1501    value3 OPERATION multiplier *element[3];
     1502    region[iColumn0] = value0;
     1503    region[iColumn1] = value1;
     1504    region[iColumn2] = value2;
     1505    region[iColumn3] = value3;
    14911506#endif
    14921507#else
    14931508    abort();
    14941509#endif
    1495     element+=NEW_CHUNK_SIZE;
     1510    element += NEW_CHUNK_SIZE;
    14961511    thisColumn += NEW_CHUNK_SIZE;
    14971512  }
    1498 #if NEW_CHUNK_SIZE==4
    1499   int iColumn0=thisColumn[0];
    1500   int iColumn1=thisColumn[1];
    1501   double value0=region[iColumn0];
    1502   double value1=region[iColumn1];
    1503   value0 OPERATION multiplier*element[0];
    1504   value1 OPERATION multiplier*element[1];
    1505   region[iColumn0]=value0;
    1506   region[iColumn1]=value1;
     1513#if NEW_CHUNK_SIZE == 4
     1514  int iColumn0 = thisColumn[0];
     1515  int iColumn1 = thisColumn[1];
     1516  double value0 = region[iColumn0];
     1517  double value1 = region[iColumn1];
     1518  value0 OPERATION multiplier *element[0];
     1519  value1 OPERATION multiplier *element[1];
     1520  region[iColumn0] = value0;
     1521  region[iColumn1] = value1;
    15071522#endif
    15081523}
    15091524SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus3)(int numberIn, CoinFactorizationDouble multiplier,
    1510                                  const CoinFactorizationDouble *  COIN_RESTRICT element,
    1511                                  CoinFactorizationDouble * COIN_RESTRICT region)
     1525  const CoinFactorizationDouble *COIN_RESTRICT element,
     1526  CoinFactorizationDouble *COIN_RESTRICT region)
    15121527{
    1513   assert ((numberIn&3)==3);
    1514   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
    1515   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
    1516 #if AVX2==1
     1528  assert((numberIn & 3) == 3);
     1529  const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn);
     1530  int nFull = numberIn & (~(NEW_CHUNK_SIZE - 1));
     1531#if AVX2 == 1
    15171532  double temp[4];
    15181533#endif
    1519   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    1520     coin_prefetch_const(element+16);
    1521     coin_prefetch_const(thisColumn+32);
    1522 #if NEW_CHUNK_SIZE==2
    1523     int iColumn0=thisColumn[0];
    1524     int iColumn1=thisColumn[1];
    1525     double value0=region[iColumn0];
    1526     double value1=region[iColumn1];
    1527     value0 OPERATION multiplier*element[0];
    1528     value1 OPERATION multiplier*element[1];
    1529     region[iColumn0]=value0;
    1530     region[iColumn1]=value1;
    1531 #elif NEW_CHUNK_SIZE==4
    1532     int iColumn0=thisColumn[0];
    1533     int iColumn1=thisColumn[1];
    1534     int iColumn2=thisColumn[2];
    1535     int iColumn3=thisColumn[3];
    1536 #if AVX2==1
     1534  for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) {
     1535    coin_prefetch_const(element + 16);
     1536    coin_prefetch_const(thisColumn + 32);
     1537#if NEW_CHUNK_SIZE == 2
     1538    int iColumn0 = thisColumn[0];
     1539    int iColumn1 = thisColumn[1];
     1540    double value0 = region[iColumn0];
     1541    double value1 = region[iColumn1];
     1542    value0 OPERATION multiplier *element[0];
     1543    value1 OPERATION multiplier *element[1];
     1544    region[iColumn0] = value0;
     1545    region[iColumn1] = value1;
     1546#elif NEW_CHUNK_SIZE == 4
     1547    int iColumn0 = thisColumn[0];
     1548    int iColumn1 = thisColumn[1];
     1549    int iColumn2 = thisColumn[2];
     1550    int iColumn3 = thisColumn[3];
     1551#if AVX2 == 1
    15371552    __v2df bb;
    1538     set_const_v2df(bb,multiplier);
    1539     temp[0]=region[iColumn0];
    1540     temp[1]=region[iColumn1];
    1541     temp[2]=region[iColumn2];
    1542     temp[3]=region[iColumn3];
    1543     __v2df v0 = __builtin_ia32_loadupd (temp);
    1544     __v2df v1 = __builtin_ia32_loadupd (temp+2);
    1545     __v2df a = __builtin_ia32_loadupd (element);
     1553    set_const_v2df(bb, multiplier);
     1554    temp[0] = region[iColumn0];
     1555    temp[1] = region[iColumn1];
     1556    temp[2] = region[iColumn2];
     1557    temp[3] = region[iColumn3];
     1558    __v2df v0 = __builtin_ia32_loadupd(temp);
     1559    __v2df v1 = __builtin_ia32_loadupd(temp + 2);
     1560    __v2df a = __builtin_ia32_loadupd(element);
    15461561    a *= bb;
    15471562    v0 OPERATION a;
    1548     a = __builtin_ia32_loadupd (element+2);
     1563    a = __builtin_ia32_loadupd(element + 2);
    15491564    a *= bb;
    15501565    v1 OPERATION a;
    1551     __builtin_ia32_storeupd (temp, v0);
    1552     __builtin_ia32_storeupd (temp+2, v1);
    1553     region[iColumn0]=temp[0];
    1554     region[iColumn1]=temp[1];
    1555     region[iColumn2]=temp[2];
    1556     region[iColumn3]=temp[3];
    1557 #else
    1558     double value0=region[iColumn0];
    1559     double value1=region[iColumn1];
    1560     double value2=region[iColumn2];
    1561     double value3=region[iColumn3];
    1562     value0 OPERATION multiplier*element[0];
    1563     value1 OPERATION multiplier*element[1];
    1564     value2 OPERATION multiplier*element[2];
    1565     value3 OPERATION multiplier*element[3];
    1566     region[iColumn0]=value0;
    1567     region[iColumn1]=value1;
    1568     region[iColumn2]=value2;
    1569     region[iColumn3]=value3;
     1566    __builtin_ia32_storeupd(temp, v0);
     1567    __builtin_ia32_storeupd(temp + 2, v1);
     1568    region[iColumn0] = temp[0];
     1569    region[iColumn1] = temp[1];
     1570    region[iColumn2] = temp[2];
     1571    region[iColumn3] = temp[3];
     1572#else
     1573    double value0 = region[iColumn0];
     1574    double value1 = region[iColumn1];
     1575    double value2 = region[iColumn2];
     1576    double value3 = region[iColumn3];
     1577    value0 OPERATION multiplier *element[0];
     1578    value1 OPERATION multiplier *element[1];
     1579    value2 OPERATION multiplier *element[2];
     1580    value3 OPERATION multiplier *element[3];
     1581    region[iColumn0] = value0;
     1582    region[iColumn1] = value1;
     1583    region[iColumn2] = value2;
     1584    region[iColumn3] = value3;
    15701585#endif
    15711586#else
    15721587    abort();
    15731588#endif
    1574     element+=NEW_CHUNK_SIZE;
     1589    element += NEW_CHUNK_SIZE;
    15751590    thisColumn += NEW_CHUNK_SIZE;
    15761591  }
    1577 #if NEW_CHUNK_SIZE==2
    1578   int iColumn0=thisColumn[0];
    1579   double value0=region[iColumn0];
    1580   value0 OPERATION multiplier*element[0];
    1581   region[iColumn0]=value0;
    1582 #else
    1583   int iColumn0=thisColumn[0];
    1584   int iColumn1=thisColumn[1];
    1585   int iColumn2=thisColumn[2];
    1586   double value0=region[iColumn0];
    1587   double value1=region[iColumn1];
    1588   double value2=region[iColumn2];
    1589   value0 OPERATION multiplier*element[0];
    1590   value1 OPERATION multiplier*element[1];
    1591   value2 OPERATION multiplier*element[2];
    1592   region[iColumn0]=value0;
    1593   region[iColumn1]=value1;
    1594   region[iColumn2]=value2;
     1592#if NEW_CHUNK_SIZE == 2
     1593  int iColumn0 = thisColumn[0];
     1594  double value0 = region[iColumn0];
     1595  value0 OPERATION multiplier *element[0];
     1596  region[iColumn0] = value0;
     1597#else
     1598  int iColumn0 = thisColumn[0];
     1599  int iColumn1 = thisColumn[1];
     1600  int iColumn2 = thisColumn[2];
     1601  double value0 = region[iColumn0];
     1602  double value1 = region[iColumn1];
     1603  double value2 = region[iColumn2];
     1604  value0 OPERATION multiplier *element[0];
     1605  value1 OPERATION multiplier *element[1];
     1606  value2 OPERATION multiplier *element[2];
     1607  region[iColumn0] = value0;
     1608  region[iColumn1] = value1;
     1609  region[iColumn2] = value2;
    15951610#endif
    15961611}
    15971612#endif
     1613
     1614/* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
     1615*/
Note: See TracChangeset for help on using the changeset viewer.