Ignore:
Timestamp:
Jan 6, 2019 2:43:06 PM (3 months ago)
Author:
unxusr
Message:

formatting

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Clp/src/CoinAbcHelperFunctions.cpp

    r2043 r2385  
    1313#include "CoinAbcCommon.hpp"
    1414//#define AVX2 1
    15 #if AVX2==1
    16 #define set_const_v2df(bb,b) {double * temp=reinterpret_cast<double*>(&bb);temp[0]=b;temp[1]=b;}       
    17 #endif
    18 #if INLINE_SCATTER==0
    19 void
    20 CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
    21                      const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    22                      const int *  COIN_RESTRICT thisIndex,
    23                      CoinFactorizationDouble * COIN_RESTRICT region)
    24 {
    25 #if UNROLL_SCATTER==0
    26   for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
     15#if AVX2 == 1
     16#define set_const_v2df(bb, b)                         \
     17  {                                                   \
     18    double *temp = reinterpret_cast< double * >(&bb); \
     19    temp[0] = b;                                      \
     20    temp[1] = b;                                      \
     21  }
     22#endif
     23#if INLINE_SCATTER == 0
     24void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,
     25  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     26  const int *COIN_RESTRICT thisIndex,
     27  CoinFactorizationDouble *COIN_RESTRICT region)
     28{
     29#if UNROLL_SCATTER == 0
     30  for (CoinBigIndex j = number - 1; j >= 0; j--) {
    2731    CoinSimplexInt iRow = thisIndex[j];
    2832    CoinFactorizationDouble regionValue = region[iRow];
    2933    CoinFactorizationDouble value = thisElement[j];
    30     assert (value);
     34    assert(value);
    3135    region[iRow] = regionValue - value * pivotValue;
    3236  }
    33 #elif UNROLL_SCATTER==1
    34   if ((number&1)!=0) {
     37#elif UNROLL_SCATTER == 1
     38  if ((number & 1) != 0) {
    3539    number--;
    3640    CoinSimplexInt iRow = thisIndex[number];
    3741    CoinFactorizationDouble regionValue = region[iRow];
    38     CoinFactorizationDouble value = thisElement[number]; 
     42    CoinFactorizationDouble value = thisElement[number];
    3943    region[iRow] = regionValue - value * pivotValue;
    4044  }
    41   for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
     45  for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
    4246    CoinSimplexInt iRow0 = thisIndex[j];
    43     CoinSimplexInt iRow1 = thisIndex[j-1];
     47    CoinSimplexInt iRow1 = thisIndex[j - 1];
    4448    CoinFactorizationDouble regionValue0 = region[iRow0];
    4549    CoinFactorizationDouble regionValue1 = region[iRow1];
    4650    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    47     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    48   }
    49 #elif UNROLL_SCATTER==2
    50   if ((number&1)!=0) {
     51    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     52  }
     53#elif UNROLL_SCATTER == 2
     54  if ((number & 1) != 0) {
    5155    number--;
    5256    CoinSimplexInt iRow = thisIndex[number];
    5357    CoinFactorizationDouble regionValue = region[iRow];
    54     CoinFactorizationDouble value = thisElement[number]; 
     58    CoinFactorizationDouble value = thisElement[number];
    5559    region[iRow] = regionValue - value * pivotValue;
    5660  }
    57   if ((number&2)!=0) {
    58     CoinSimplexInt iRow0 = thisIndex[number-1];
     61  if ((number & 2) != 0) {
     62    CoinSimplexInt iRow0 = thisIndex[number - 1];
    5963    CoinFactorizationDouble regionValue0 = region[iRow0];
    60     CoinFactorizationDouble value0 = thisElement[number-1];
    61     CoinSimplexInt iRow1 = thisIndex[number-2];
     64    CoinFactorizationDouble value0 = thisElement[number - 1];
     65    CoinSimplexInt iRow1 = thisIndex[number - 2];
    6266    CoinFactorizationDouble regionValue1 = region[iRow1];
    63     CoinFactorizationDouble value1 = thisElement[number-2];
     67    CoinFactorizationDouble value1 = thisElement[number - 2];
    6468    region[iRow0] = regionValue0 - value0 * pivotValue;
    6569    region[iRow1] = regionValue1 - value1 * pivotValue;
    66     number-=2;
    67   }
    68   for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
     70    number -= 2;
     71  }
     72  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
    6973    CoinSimplexInt iRow0 = thisIndex[j];
    70     CoinSimplexInt iRow1 = thisIndex[j-1];
     74    CoinSimplexInt iRow1 = thisIndex[j - 1];
    7175    CoinFactorizationDouble regionValue0 = region[iRow0];
    7276    CoinFactorizationDouble regionValue1 = region[iRow1];
    7377    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    74     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    75     CoinSimplexInt iRow2 = thisIndex[j-2];
    76     CoinSimplexInt iRow3 = thisIndex[j-3];
     78    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     79    CoinSimplexInt iRow2 = thisIndex[j - 2];
     80    CoinSimplexInt iRow3 = thisIndex[j - 3];
    7781    CoinFactorizationDouble regionValue2 = region[iRow2];
    7882    CoinFactorizationDouble regionValue3 = region[iRow3];
    79     region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
    80     region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
    81   }
    82 #elif UNROLL_SCATTER==3
     83    region[iRow2] = regionValue2 - thisElement[j - 2] * pivotValue;
     84    region[iRow3] = regionValue3 - thisElement[j - 3] * pivotValue;
     85  }
     86#elif UNROLL_SCATTER == 3
    8387  CoinSimplexInt iRow0;
    8488  CoinSimplexInt iRow1;
    8589  CoinFactorizationDouble regionValue0;
    8690  CoinFactorizationDouble regionValue1;
    87   switch(static_cast<unsigned int>(number)) {
     91  switch (static_cast< unsigned int >(number)) {
    8892  case 0:
    8993    break;
     
    213217    break;
    214218  default:
    215     if ((number&1)!=0) {
     219    if ((number & 1) != 0) {
    216220      number--;
    217221      CoinSimplexInt iRow = thisIndex[number];
     
    220224      region[iRow] = regionValue - value * pivotValue;
    221225    }
    222     for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
     226    for (CoinBigIndex j = number - 1; j >= 0; j -= 2) {
    223227      CoinSimplexInt iRow0 = thisIndex[j];
    224       CoinSimplexInt iRow1 = thisIndex[j-1];
     228      CoinSimplexInt iRow1 = thisIndex[j - 1];
    225229      CoinFactorizationDouble regionValue0 = region[iRow0];
    226230      CoinFactorizationDouble regionValue1 = region[iRow1];
    227231      region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    228       region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
     232      region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
    229233    }
    230234    break;
     
    233237}
    234238#endif
    235 #if INLINE_GATHER==0
    236 CoinFactorizationDouble 
     239#if INLINE_GATHER == 0
     240CoinFactorizationDouble
    237241CoinAbcGatherUpdate(CoinSimplexInt number,
    238                     const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    239                     const int *  COIN_RESTRICT thisIndex,
    240                     CoinFactorizationDouble * COIN_RESTRICT region)
    241 {
    242 #if UNROLL_GATHER==0
    243   CoinFactorizationDouble pivotValue=0.0;
    244   for (CoinBigIndex j = 0; j < number; j ++ ) {
     242  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     243  const int *COIN_RESTRICT thisIndex,
     244  CoinFactorizationDouble *COIN_RESTRICT region)
     245{
     246#if UNROLL_GATHER == 0
     247  CoinFactorizationDouble pivotValue = 0.0;
     248  for (CoinBigIndex j = 0; j < number; j++) {
    245249    CoinFactorizationDouble value = thisElement[j];
    246250    CoinSimplexInt jRow = thisIndex[j];
     
    254258}
    255259#endif
    256 #if INLINE_MULTIPLY_ADD==0
    257 void
    258 CoinAbcMultiplyIndexed(int number,
    259                        const CoinFactorizationDouble *  COIN_RESTRICT multiplier,
    260                        const int *  COIN_RESTRICT thisIndex,
    261                        CoinSimplexDouble * COIN_RESTRICT region)
    262 {
    263 #ifndef INTEL_COMPILER
    264 // was #pragma simd
    265 #endif
    266 #if UNROLL_MULTIPLY_ADD==0
    267   for (CoinSimplexInt i = 0; i < number; i ++ ) {
     260#if INLINE_MULTIPLY_ADD == 0
     261void CoinAbcMultiplyIndexed(int number,
     262  const CoinFactorizationDouble *COIN_RESTRICT multiplier,
     263  const int *COIN_RESTRICT thisIndex,
     264  CoinSimplexDouble *COIN_RESTRICT region)
     265{
     266#ifndef INTEL_COMPILER
     267// was #pragma simd
     268#endif
     269#if UNROLL_MULTIPLY_ADD == 0
     270  for (CoinSimplexInt i = 0; i < number; i++) {
    268271    CoinSimplexInt iRow = thisIndex[i];
    269272    CoinSimplexDouble value = region[iRow];
     
    275278#endif
    276279}
    277 void
    278 CoinAbcMultiplyIndexed(int number,
    279                        const long double *  COIN_RESTRICT multiplier,
    280                        const int *  COIN_RESTRICT thisIndex,
    281                        long double * COIN_RESTRICT region)
    282 {
    283 #ifndef INTEL_COMPILER
    284 // was #pragma simd
    285 #endif
    286 #if UNROLL_MULTIPLY_ADD==0
    287   for (CoinSimplexInt i = 0; i < number; i ++ ) {
     280void CoinAbcMultiplyIndexed(int number,
     281  const long double *COIN_RESTRICT multiplier,
     282  const int *COIN_RESTRICT thisIndex,
     283  long double *COIN_RESTRICT region)
     284{
     285#ifndef INTEL_COMPILER
     286// was #pragma simd
     287#endif
     288#if UNROLL_MULTIPLY_ADD == 0
     289  for (CoinSimplexInt i = 0; i < number; i++) {
    288290    CoinSimplexInt iRow = thisIndex[i];
    289291    long double value = region[iRow];
     
    297299#endif
    298300double
    299 CoinAbcMaximumAbsElement(const double * region, int sizeIn)
    300 {
    301   int size=sizeIn;
    302      double maxValue = 0.0;
    303      //printf("a\n");
    304 #ifndef INTEL_COMPILER
    305 // was #pragma simd
    306 #endif
    307      /*cilk_*/ for (int i = 0; i < size; i++)
    308           maxValue = CoinMax(maxValue, fabs(region[i]));
    309      return maxValue;
    310 }
    311 void
    312 CoinAbcMinMaxAbsElement(const double * region, int sizeIn,double & minimum , double & maximum)
    313 {
    314   double minValue=minimum;
    315   double maxValue=maximum;
    316   int size=sizeIn;
     301CoinAbcMaximumAbsElement(const double *region, int sizeIn)
     302{
     303  int size = sizeIn;
     304  double maxValue = 0.0;
     305  //printf("a\n");
     306#ifndef INTEL_COMPILER
     307// was #pragma simd
     308#endif
     309  /*cilk_*/ for (int i = 0; i < size; i++)
     310    maxValue = CoinMax(maxValue, fabs(region[i]));
     311  return maxValue;
     312}
     313void CoinAbcMinMaxAbsElement(const double *region, int sizeIn, double &minimum, double &maximum)
     314{
     315  double minValue = minimum;
     316  double maxValue = maximum;
     317  int size = sizeIn;
    317318  //printf("b\n");
    318319#ifndef INTEL_COMPILER
    319320// was #pragma simd
    320321#endif
    321      /*cilk_*/ for (int i=0;i<size;i++) {
    322     double value=fabs(region[i]);
    323     minValue=CoinMin(value,minValue);
    324     maxValue=CoinMax(value,maxValue);
    325   }
    326   minimum=minValue;
    327   maximum=maxValue;
    328 }
    329 void
    330 CoinAbcMinMaxAbsNormalValues(const double * region, int sizeIn,double & minimum , double & maximum)
    331 {
    332   int size=sizeIn;
    333   double minValue=minimum;
    334   double maxValue=maximum;
     322  /*cilk_*/ for (int i = 0; i < size; i++) {
     323    double value = fabs(region[i]);
     324    minValue = CoinMin(value, minValue);
     325    maxValue = CoinMax(value, maxValue);
     326  }
     327  minimum = minValue;
     328  maximum = maxValue;
     329}
     330void CoinAbcMinMaxAbsNormalValues(const double *region, int sizeIn, double &minimum, double &maximum)
     331{
     332  int size = sizeIn;
     333  double minValue = minimum;
     334  double maxValue = maximum;
    335335#define NORMAL_LOW_VALUE 1.0e-8
    336336#define NORMAL_HIGH_VALUE 1.0e8
     
    339339// was #pragma simd
    340340#endif
    341      /*cilk_*/ for (int i=0;i<size;i++) {
    342     double value=fabs(region[i]);
    343     if (value>NORMAL_LOW_VALUE&&value<NORMAL_HIGH_VALUE) {
    344       minValue=CoinMin(value,minValue);
    345       maxValue=CoinMax(value,maxValue);
     341  /*cilk_*/ for (int i = 0; i < size; i++) {
     342    double value = fabs(region[i]);
     343    if (value > NORMAL_LOW_VALUE && value < NORMAL_HIGH_VALUE) {
     344      minValue = CoinMin(value, minValue);
     345      maxValue = CoinMax(value, maxValue);
    346346    }
    347347  }
    348   minimum=minValue;
    349   maximum=maxValue;
    350 }
    351 void
    352 CoinAbcScale(double * region, double multiplier,int sizeIn)
    353 {
    354   int size=sizeIn;
     348  minimum = minValue;
     349  maximum = maxValue;
     350}
     351void CoinAbcScale(double *region, double multiplier, int sizeIn)
     352{
     353  int size = sizeIn;
    355354  // used printf("d\n");
    356355#ifndef INTEL_COMPILER
    357356// was #pragma simd
    358357#endif
    359 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    360   cilk_for (int i=0;i<size;i++) {
     358#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     359  cilk_for(int i = 0; i < size; i++)
     360  {
    361361    region[i] *= multiplier;
    362362  }
    363363}
    364 void
    365 CoinAbcScaleNormalValues(double * region, double multiplier,double killIfLessThanThis,int sizeIn)
    366 {
    367   int size=sizeIn;
    368      // used printf("e\n");
    369 #ifndef INTEL_COMPILER
    370 // was #pragma simd
    371 #endif
    372 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    373   cilk_for (int i=0;i<size;i++) {
    374     double value=fabs(region[i]);
    375     if (value>killIfLessThanThis) {
    376       if (value!=COIN_DBL_MAX) {
    377         value *= multiplier;
    378         if (value>killIfLessThanThis)
    379           region[i] *= multiplier;
    380         else
    381           region[i]=0.0;
     364void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int sizeIn)
     365{
     366  int size = sizeIn;
     367  // used printf("e\n");
     368#ifndef INTEL_COMPILER
     369// was #pragma simd
     370#endif
     371#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     372  cilk_for(int i = 0; i < size; i++)
     373  {
     374    double value = fabs(region[i]);
     375    if (value > killIfLessThanThis) {
     376      if (value != COIN_DBL_MAX) {
     377        value *= multiplier;
     378        if (value > killIfLessThanThis)
     379          region[i] *= multiplier;
     380        else
     381          region[i] = 0.0;
    382382      }
    383383    } else {
    384       region[i]=0.0;
     384      region[i] = 0.0;
    385385    }
    386386  }
    387387}
    388388// maximum fabs(region[i]) and then region[i]*=multiplier
    389 double 
    390 CoinAbcMaximumAbsElementAndScale(double * region, double multiplier,int sizeIn)
    391 {
    392   double maxValue=0.0;
    393   int size=sizeIn;
     389double
     390CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int sizeIn)
     391{
     392  double maxValue = 0.0;
     393  int size = sizeIn;
    394394  //printf("f\n");
    395395#ifndef INTEL_COMPILER
    396396// was #pragma simd
    397397#endif
    398      /*cilk_*/ for (int i=0;i<size;i++) {
    399     double value=fabs(region[i]);
     398  /*cilk_*/ for (int i = 0; i < size; i++) {
     399    double value = fabs(region[i]);
    400400    region[i] *= multiplier;
    401     maxValue=CoinMax(value,maxValue);
     401    maxValue = CoinMax(value, maxValue);
    402402  }
    403403  return maxValue;
    404404}
    405 void
    406 CoinAbcSetElements(double * region, int sizeIn, double value)
    407 {
    408   int size=sizeIn;
    409      printf("g\n");
    410 #ifndef INTEL_COMPILER
    411 // was #pragma simd
    412 #endif
    413 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    414      cilk_for (int i = 0; i < size; i++)
    415           region[i] = value;
    416 }
    417 void
    418 CoinAbcMultiplyAdd(const double * region1, int sizeIn, double multiplier1,
    419             double * regionChanged, double multiplier2)
     405void CoinAbcSetElements(double *region, int sizeIn, double value)
     406{
     407  int size = sizeIn;
     408  printf("g\n");
     409#ifndef INTEL_COMPILER
     410// was #pragma simd
     411#endif
     412#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     413  cilk_for(int i = 0; i < size; i++)
     414    region[i]
     415    = value;
     416}
     417void CoinAbcMultiplyAdd(const double *region1, int sizeIn, double multiplier1,
     418  double *regionChanged, double multiplier2)
    420419{
    421420  //printf("h\n");
    422   int size=sizeIn;
    423      if (multiplier1 == 1.0) {
    424           if (multiplier2 == 1.0) {
    425 #ifndef INTEL_COMPILER
    426 // was #pragma simd
    427 #endif
    428                /*cilk_*/ for (int i = 0; i < size; i++)
    429                     regionChanged[i] = region1[i] + regionChanged[i];
    430           } else if (multiplier2 == -1.0) {
    431 #ifndef INTEL_COMPILER
    432 // was #pragma simd
    433 #endif
    434                /*cilk_*/ for (int i = 0; i < size; i++)
    435                     regionChanged[i] = region1[i] - regionChanged[i];
    436           } else if (multiplier2 == 0.0) {
    437 #ifndef INTEL_COMPILER
    438 // was #pragma simd
    439 #endif
    440                /*cilk_*/ for (int i = 0; i < size; i++)
    441                     regionChanged[i] = region1[i] ;
    442           } else {
    443 #ifndef INTEL_COMPILER
    444 // was #pragma simd
    445 #endif
    446                /*cilk_*/ for (int i = 0; i < size; i++)
    447                     regionChanged[i] = region1[i] + multiplier2 * regionChanged[i];
    448           }
    449      } else if (multiplier1 == -1.0) {
    450           if (multiplier2 == 1.0) {
    451 #ifndef INTEL_COMPILER
    452 // was #pragma simd
    453 #endif
    454                /*cilk_*/ for (int i = 0; i < size; i++)
    455                     regionChanged[i] = -region1[i] + regionChanged[i];
    456           } else if (multiplier2 == -1.0) {
    457 #ifndef INTEL_COMPILER
    458 // was #pragma simd
    459 #endif
    460                /*cilk_*/ for (int i = 0; i < size; i++)
    461                     regionChanged[i] = -region1[i] - regionChanged[i];
    462           } else if (multiplier2 == 0.0) {
    463 #ifndef INTEL_COMPILER
    464 // was #pragma simd
    465 #endif
    466                /*cilk_*/ for (int i = 0; i < size; i++)
    467                     regionChanged[i] = -region1[i] ;
    468           } else {
    469 #ifndef INTEL_COMPILER
    470 // was #pragma simd
    471 #endif
    472                /*cilk_*/ for (int i = 0; i < size; i++)
    473                     regionChanged[i] = -region1[i] + multiplier2 * regionChanged[i];
    474           }
    475      } else if (multiplier1 == 0.0) {
    476           if (multiplier2 == 1.0) {
    477                // nothing to do
    478           } else if (multiplier2 == -1.0) {
    479 #ifndef INTEL_COMPILER
    480 // was #pragma simd
    481 #endif
    482                /*cilk_*/ for (int i = 0; i < size; i++)
    483                     regionChanged[i] = -regionChanged[i];
    484           } else if (multiplier2 == 0.0) {
    485 #ifndef INTEL_COMPILER
    486 // was #pragma simd
    487 #endif
    488                /*cilk_*/ for (int i = 0; i < size; i++)
    489                     regionChanged[i] = 0.0;
    490           } else {
    491 #ifndef INTEL_COMPILER
    492 // was #pragma simd
    493 #endif
    494                /*cilk_*/ for (int i = 0; i < size; i++)
    495                     regionChanged[i] = multiplier2 * regionChanged[i];
    496           }
    497      } else {
    498           if (multiplier2 == 1.0) {
    499 #ifndef INTEL_COMPILER
    500 // was #pragma simd
    501 #endif
    502                /*cilk_*/ for (int i = 0; i < size; i++)
    503                     regionChanged[i] = multiplier1 * region1[i] + regionChanged[i];
    504           } else if (multiplier2 == -1.0) {
    505 #ifndef INTEL_COMPILER
    506 // was #pragma simd
    507 #endif
    508                /*cilk_*/ for (int i = 0; i < size; i++)
    509                     regionChanged[i] = multiplier1 * region1[i] - regionChanged[i];
    510           } else if (multiplier2 == 0.0) {
    511 #ifndef INTEL_COMPILER
    512 // was #pragma simd
    513 #endif
    514                /*cilk_*/ for (int i = 0; i < size; i++)
    515                     regionChanged[i] = multiplier1 * region1[i] ;
    516           } else {
    517 #ifndef INTEL_COMPILER
    518 // was #pragma simd
    519 #endif
    520                /*cilk_*/ for (int i = 0; i < size; i++)
    521                     regionChanged[i] = multiplier1 * region1[i] + multiplier2 * regionChanged[i];
    522           }
    523      }
     421  int size = sizeIn;
     422  if (multiplier1 == 1.0) {
     423    if (multiplier2 == 1.0) {
     424#ifndef INTEL_COMPILER
     425// was #pragma simd
     426#endif
     427      /*cilk_*/ for (int i = 0; i < size; i++)
     428        regionChanged[i] = region1[i] + regionChanged[i];
     429    } else if (multiplier2 == -1.0) {
     430#ifndef INTEL_COMPILER
     431// was #pragma simd
     432#endif
     433      /*cilk_*/ for (int i = 0; i < size; i++)
     434        regionChanged[i] = region1[i] - regionChanged[i];
     435    } else if (multiplier2 == 0.0) {
     436#ifndef INTEL_COMPILER
     437// was #pragma simd
     438#endif
     439      /*cilk_*/ for (int i = 0; i < size; i++)
     440        regionChanged[i] = region1[i];
     441    } else {
     442#ifndef INTEL_COMPILER
     443// was #pragma simd
     444#endif
     445      /*cilk_*/ for (int i = 0; i < size; i++)
     446        regionChanged[i] = region1[i] + multiplier2 * regionChanged[i];
     447    }
     448  } else if (multiplier1 == -1.0) {
     449    if (multiplier2 == 1.0) {
     450#ifndef INTEL_COMPILER
     451// was #pragma simd
     452#endif
     453      /*cilk_*/ for (int i = 0; i < size; i++)
     454        regionChanged[i] = -region1[i] + regionChanged[i];
     455    } else if (multiplier2 == -1.0) {
     456#ifndef INTEL_COMPILER
     457// was #pragma simd
     458#endif
     459      /*cilk_*/ for (int i = 0; i < size; i++)
     460        regionChanged[i] = -region1[i] - regionChanged[i];
     461    } else if (multiplier2 == 0.0) {
     462#ifndef INTEL_COMPILER
     463// was #pragma simd
     464#endif
     465      /*cilk_*/ for (int i = 0; i < size; i++)
     466        regionChanged[i] = -region1[i];
     467    } else {
     468#ifndef INTEL_COMPILER
     469// was #pragma simd
     470#endif
     471      /*cilk_*/ for (int i = 0; i < size; i++)
     472        regionChanged[i] = -region1[i] + multiplier2 * regionChanged[i];
     473    }
     474  } else if (multiplier1 == 0.0) {
     475    if (multiplier2 == 1.0) {
     476      // nothing to do
     477    } else if (multiplier2 == -1.0) {
     478#ifndef INTEL_COMPILER
     479// was #pragma simd
     480#endif
     481      /*cilk_*/ for (int i = 0; i < size; i++)
     482        regionChanged[i] = -regionChanged[i];
     483    } else if (multiplier2 == 0.0) {
     484#ifndef INTEL_COMPILER
     485// was #pragma simd
     486#endif
     487      /*cilk_*/ for (int i = 0; i < size; i++)
     488        regionChanged[i] = 0.0;
     489    } else {
     490#ifndef INTEL_COMPILER
     491// was #pragma simd
     492#endif
     493      /*cilk_*/ for (int i = 0; i < size; i++)
     494        regionChanged[i] = multiplier2 * regionChanged[i];
     495    }
     496  } else {
     497    if (multiplier2 == 1.0) {
     498#ifndef INTEL_COMPILER
     499// was #pragma simd
     500#endif
     501      /*cilk_*/ for (int i = 0; i < size; i++)
     502        regionChanged[i] = multiplier1 * region1[i] + regionChanged[i];
     503    } else if (multiplier2 == -1.0) {
     504#ifndef INTEL_COMPILER
     505// was #pragma simd
     506#endif
     507      /*cilk_*/ for (int i = 0; i < size; i++)
     508        regionChanged[i] = multiplier1 * region1[i] - regionChanged[i];
     509    } else if (multiplier2 == 0.0) {
     510#ifndef INTEL_COMPILER
     511// was #pragma simd
     512#endif
     513      /*cilk_*/ for (int i = 0; i < size; i++)
     514        regionChanged[i] = multiplier1 * region1[i];
     515    } else {
     516#ifndef INTEL_COMPILER
     517// was #pragma simd
     518#endif
     519      /*cilk_*/ for (int i = 0; i < size; i++)
     520        regionChanged[i] = multiplier1 * region1[i] + multiplier2 * regionChanged[i];
     521    }
     522  }
    524523}
    525524double
    526 CoinAbcInnerProduct(const double * region1, int sizeIn, const double * region2)
    527 {
    528   int size=sizeIn;
     525CoinAbcInnerProduct(const double *region1, int sizeIn, const double *region2)
     526{
     527  int size = sizeIn;
    529528  //printf("i\n");
    530      double value = 0.0;
    531 #ifndef INTEL_COMPILER
    532 // was #pragma simd
    533 #endif
    534      /*cilk_*/ for (int i = 0; i < size; i++)
    535           value += region1[i] * region2[i];
    536      return value;
    537 }
    538 void
    539 CoinAbcGetNorms(const double * region, int sizeIn, double & norm1, double & norm2)
    540 {
    541   int size=sizeIn;
     529  double value = 0.0;
     530#ifndef INTEL_COMPILER
     531// was #pragma simd
     532#endif
     533  /*cilk_*/ for (int i = 0; i < size; i++)
     534    value += region1[i] * region2[i];
     535  return value;
     536}
     537void CoinAbcGetNorms(const double *region, int sizeIn, double &norm1, double &norm2)
     538{
     539  int size = sizeIn;
    542540  //printf("j\n");
    543      norm1 = 0.0;
    544      norm2 = 0.0;
    545 #ifndef INTEL_COMPILER
    546 // was #pragma simd
    547 #endif
    548      /*cilk_*/ for (int i = 0; i < size; i++) {
    549           norm2 += region[i] * region[i];
    550           norm1 = CoinMax(norm1, fabs(region[i]));
    551      }
     541  norm1 = 0.0;
     542  norm2 = 0.0;
     543#ifndef INTEL_COMPILER
     544// was #pragma simd
     545#endif
     546  /*cilk_*/ for (int i = 0; i < size; i++) {
     547    norm2 += region[i] * region[i];
     548    norm1 = CoinMax(norm1, fabs(region[i]));
     549  }
    552550}
    553551// regionTo[index[i]]=regionFrom[i]
    554 void
    555 CoinAbcScatterTo(const double * regionFrom, double * regionTo, const int * index,int numberIn)
    556 {
    557   int number=numberIn;
    558      // used printf("k\n");
    559 #ifndef INTEL_COMPILER
    560 // was #pragma simd
    561 #endif
    562 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    563   cilk_for (int i=0;i<number;i++) {
    564     int k=index[i];
    565     regionTo[k]=regionFrom[i];
     552void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int numberIn)
     553{
     554  int number = numberIn;
     555  // used printf("k\n");
     556#ifndef INTEL_COMPILER
     557// was #pragma simd
     558#endif
     559#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     560  cilk_for(int i = 0; i < number; i++)
     561  {
     562    int k = index[i];
     563    regionTo[k] = regionFrom[i];
    566564  }
    567565}
    568566// regionTo[i]=regionFrom[index[i]]
    569 void
    570 CoinAbcGatherFrom(const double * regionFrom, double * regionTo, const int * index,int numberIn)
    571 {
    572   int number=numberIn;
    573      // used printf("l\n");
    574 #ifndef INTEL_COMPILER
    575 // was #pragma simd
    576 #endif
    577 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    578   cilk_for (int i=0;i<number;i++) {
    579     int k=index[i];
    580     regionTo[i]=regionFrom[k];
     567void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int numberIn)
     568{
     569  int number = numberIn;
     570  // used printf("l\n");
     571#ifndef INTEL_COMPILER
     572// was #pragma simd
     573#endif
     574#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     575  cilk_for(int i = 0; i < number; i++)
     576  {
     577    int k = index[i];
     578    regionTo[i] = regionFrom[k];
    581579  }
    582580}
    583581// regionTo[index[i]]=0.0
    584 void
    585 CoinAbcScatterZeroTo(double * regionTo, const int * index,int numberIn)
    586 {
    587   int number=numberIn;
    588      // used printf("m\n");
    589 #ifndef INTEL_COMPILER
    590 // was #pragma simd
    591 #endif
    592 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    593   cilk_for (int i=0;i<number;i++) {
    594     int k=index[i];
    595     regionTo[k]=0.0;
     582void CoinAbcScatterZeroTo(double *regionTo, const int *index, int numberIn)
     583{
     584  int number = numberIn;
     585  // used printf("m\n");
     586#ifndef INTEL_COMPILER
     587// was #pragma simd
     588#endif
     589#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     590  cilk_for(int i = 0; i < number; i++)
     591  {
     592    int k = index[i];
     593    regionTo[k] = 0.0;
    596594  }
    597595}
    598596// regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]]
    599 void
    600 CoinAbcScatterToList(const double * regionFrom, double * regionTo,
    601                    const int * indexList, const int * indexScatter ,int numberIn)
    602 {
    603   int number=numberIn;
     597void CoinAbcScatterToList(const double *regionFrom, double *regionTo,
     598  const int *indexList, const int *indexScatter, int numberIn)
     599{
     600  int number = numberIn;
    604601  //printf("n\n");
    605602#ifndef INTEL_COMPILER
    606603// was #pragma simd
    607604#endif
    608      /*cilk_*/ for (int i=0;i<number;i++) {
    609     int j=indexList[i];
    610     double value=regionFrom[j];
    611     int k=indexScatter[j];
    612     regionTo[k]=value;
    613   }
    614 }
    615 void
    616 CoinAbcInverseSqrts(double * array, int nIn)
    617 {
    618   int n=nIn;
    619      // used printf("o\n");
    620 #ifndef INTEL_COMPILER
    621 // was #pragma simd
    622 #endif
    623 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    624   cilk_for (int i = 0; i < n; i++)
    625     array[i] = 1.0 / sqrt(array[i]);
    626 }
    627 void
    628 CoinAbcReciprocal(double * array, int nIn, const double *input)
    629 {
    630   int n=nIn;
    631      // used printf("p\n");
    632 #ifndef INTEL_COMPILER
    633 // was #pragma simd
    634 #endif
    635 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
    636   cilk_for (int i = 0; i < n; i++)
    637     array[i] = 1.0 / input[i];
    638 }
    639 void
    640 CoinAbcMemcpyLong(double * array,const double * arrayFrom,int size)
    641 {
    642   memcpy(array,arrayFrom,size*sizeof(double));
    643 }
    644 void
    645 CoinAbcMemcpyLong(int * array,const int * arrayFrom,int size)
    646 {
    647   memcpy(array,arrayFrom,size*sizeof(int));
    648 }
    649 void
    650 CoinAbcMemcpyLong(unsigned char * array,const unsigned char * arrayFrom,int size)
    651 {
    652   memcpy(array,arrayFrom,size*sizeof(unsigned char));
    653 }
    654 void
    655 CoinAbcMemset0Long(double * array,int size)
    656 {
    657   memset(array,0,size*sizeof(double));
    658 }
    659 void
    660 CoinAbcMemset0Long(int * array,int size)
    661 {
    662   memset(array,0,size*sizeof(int));
    663 }
    664 void
    665 CoinAbcMemset0Long(unsigned char * array,int size)
    666 {
    667   memset(array,0,size*sizeof(unsigned char));
    668 }
    669 void
    670 CoinAbcMemmove(double * array,const double * arrayFrom,int size)
    671 {
    672   memmove(array,arrayFrom,size*sizeof(double));
    673 }
    674 void CoinAbcMemmove(int * array,const int * arrayFrom,int size)
    675 {
    676   memmove(array,arrayFrom,size*sizeof(int));
    677 }
    678 void CoinAbcMemmove(unsigned char * array,const unsigned char * arrayFrom,int size)
    679 {
    680   memmove(array,arrayFrom,size*sizeof(unsigned char));
     605  /*cilk_*/ for (int i = 0; i < number; i++) {
     606    int j = indexList[i];
     607    double value = regionFrom[j];
     608    int k = indexScatter[j];
     609    regionTo[k] = value;
     610  }
     611}
     612void CoinAbcInverseSqrts(double *array, int nIn)
     613{
     614  int n = nIn;
     615  // used printf("o\n");
     616#ifndef INTEL_COMPILER
     617// was #pragma simd
     618#endif
     619#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     620  cilk_for(int i = 0; i < n; i++)
     621    array[i]
     622    = 1.0 / sqrt(array[i]);
     623}
     624void CoinAbcReciprocal(double *array, int nIn, const double *input)
     625{
     626  int n = nIn;
     627  // used printf("p\n");
     628#ifndef INTEL_COMPILER
     629// was #pragma simd
     630#endif
     631#pragma cilk grainsize = CILK_FOR_GRAINSIZE
     632  cilk_for(int i = 0; i < n; i++)
     633    array[i]
     634    = 1.0 / input[i];
     635}
     636void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size)
     637{
     638  memcpy(array, arrayFrom, size * sizeof(double));
     639}
     640void CoinAbcMemcpyLong(int *array, const int *arrayFrom, int size)
     641{
     642  memcpy(array, arrayFrom, size * sizeof(int));
     643}
     644void CoinAbcMemcpyLong(unsigned char *array, const unsigned char *arrayFrom, int size)
     645{
     646  memcpy(array, arrayFrom, size * sizeof(unsigned char));
     647}
     648void CoinAbcMemset0Long(double *array, int size)
     649{
     650  memset(array, 0, size * sizeof(double));
     651}
     652void CoinAbcMemset0Long(int *array, int size)
     653{
     654  memset(array, 0, size * sizeof(int));
     655}
     656void CoinAbcMemset0Long(unsigned char *array, int size)
     657{
     658  memset(array, 0, size * sizeof(unsigned char));
     659}
     660void CoinAbcMemmove(double *array, const double *arrayFrom, int size)
     661{
     662  memmove(array, arrayFrom, size * sizeof(double));
     663}
     664void CoinAbcMemmove(int *array, const int *arrayFrom, int size)
     665{
     666  memmove(array, arrayFrom, size * sizeof(int));
     667}
     668void CoinAbcMemmove(unsigned char *array, const unsigned char *arrayFrom, int size)
     669{
     670  memmove(array, arrayFrom, size * sizeof(unsigned char));
    681671}
    682672// This moves down and zeroes out end
    683 void CoinAbcMemmoveAndZero(double * array,double * arrayFrom,int size)
    684 {
    685   assert(arrayFrom-array>=0);
    686   memmove(array,arrayFrom,size*sizeof(double));
    687   double * endArray = array+size;
    688   double * endArrayFrom = arrayFrom+size;
    689   double * startZero = CoinMax(arrayFrom,endArray);
    690   memset(startZero,0,(endArrayFrom-startZero)*sizeof(double));
     673void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size)
     674{
     675  assert(arrayFrom - array >= 0);
     676  memmove(array, arrayFrom, size * sizeof(double));
     677  double *endArray = array + size;
     678  double *endArrayFrom = arrayFrom + size;
     679  double *startZero = CoinMax(arrayFrom, endArray);
     680  memset(startZero, 0, (endArrayFrom - startZero) * sizeof(double));
    691681}
    692682// This compacts several sections and zeroes out end
    693 int CoinAbcCompact(int numberSections,int alreadyDone,double * array,const int * starts, const int * lengths)
    694 {
    695   int totalLength=alreadyDone;
    696   for (int i=0;i<numberSections;i++) {
     683int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths)
     684{
     685  int totalLength = alreadyDone;
     686  for (int i = 0; i < numberSections; i++) {
    697687    totalLength += lengths[i];
    698688  }
    699   for (int i=0;i<numberSections;i++) {
    700     int start=starts[i];
    701     int length=lengths[i];
    702     int end=start+length;
    703     memmove(array+alreadyDone,array+start,length*sizeof(double));
    704     if (end>totalLength) {
    705       start=CoinMax(start,totalLength);
    706       memset(array+start,0,(end-start)*sizeof(double));
     689  for (int i = 0; i < numberSections; i++) {
     690    int start = starts[i];
     691    int length = lengths[i];
     692    int end = start + length;
     693    memmove(array + alreadyDone, array + start, length * sizeof(double));
     694    if (end > totalLength) {
     695      start = CoinMax(start, totalLength);
     696      memset(array + start, 0, (end - start) * sizeof(double));
    707697    }
    708698    alreadyDone += length;
    709699  }
    710700  return totalLength;
    711 } 
     701}
    712702// This compacts several sections
    713 int CoinAbcCompact(int numberSections,int alreadyDone,int * array,const int * starts, const int * lengths)
    714 {
    715   for (int i=0;i<numberSections;i++) {
    716     memmove(array+alreadyDone,array+starts[i],lengths[i]*sizeof(int));
     703int CoinAbcCompact(int numberSections, int alreadyDone, int *array, const int *starts, const int *lengths)
     704{
     705  for (int i = 0; i < numberSections; i++) {
     706    memmove(array + alreadyDone, array + starts[i], lengths[i] * sizeof(int));
    717707    alreadyDone += lengths[i];
    718708  }
    719709  return alreadyDone;
    720710}
    721 void CoinAbcScatterUpdate0(int number,CoinFactorizationDouble /*pivotValue*/,
    722                            const CoinFactorizationDouble *  COIN_RESTRICT /*thisElement*/,
    723                            const int *  COIN_RESTRICT /*thisIndex*/,
    724                            CoinFactorizationDouble *  COIN_RESTRICT /*region*/)
    725 {
    726   assert (number==0);
    727 }
    728 void CoinAbcScatterUpdate1(int number,CoinFactorizationDouble pivotValue,
    729                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    730                           const int *  COIN_RESTRICT thisIndex,
    731                           CoinFactorizationDouble * COIN_RESTRICT region)
    732 {
    733   assert (number==1);
     711void CoinAbcScatterUpdate0(int number, CoinFactorizationDouble /*pivotValue*/,
     712  const CoinFactorizationDouble *COIN_RESTRICT /*thisElement*/,
     713  const int *COIN_RESTRICT /*thisIndex*/,
     714  CoinFactorizationDouble *COIN_RESTRICT /*region*/)
     715{
     716  assert(number == 0);
     717}
     718void CoinAbcScatterUpdate1(int number, CoinFactorizationDouble pivotValue,
     719  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     720  const int *COIN_RESTRICT thisIndex,
     721  CoinFactorizationDouble *COIN_RESTRICT region)
     722{
     723  assert(number == 1);
    734724  int iRow0 = thisIndex[0];
    735725  CoinFactorizationDouble regionValue0 = region[iRow0];
    736726  region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
    737727}
    738 void CoinAbcScatterUpdate2(int number,CoinFactorizationDouble pivotValue,
    739                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    740                           const int *  COIN_RESTRICT thisIndex,
    741                           CoinFactorizationDouble * COIN_RESTRICT region)
    742 {
    743   assert (number==2);
     728void CoinAbcScatterUpdate2(int number, CoinFactorizationDouble pivotValue,
     729  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     730  const int *COIN_RESTRICT thisIndex,
     731  CoinFactorizationDouble *COIN_RESTRICT region)
     732{
     733  assert(number == 2);
    744734  int iRow0 = thisIndex[0];
    745735  int iRow1 = thisIndex[1];
     
    749739  region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
    750740}
    751 void CoinAbcScatterUpdate3(int number,CoinFactorizationDouble pivotValue,
    752                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    753                           const int *  COIN_RESTRICT thisIndex,
    754                           CoinFactorizationDouble * COIN_RESTRICT region)
    755 {
    756   assert (number==3);
     741void CoinAbcScatterUpdate3(int number, CoinFactorizationDouble pivotValue,
     742  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     743  const int *COIN_RESTRICT thisIndex,
     744  CoinFactorizationDouble *COIN_RESTRICT region)
     745{
     746  assert(number == 3);
    757747  int iRow0 = thisIndex[0];
    758748  int iRow1 = thisIndex[1];
     
    765755  region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
    766756}
    767 void CoinAbcScatterUpdate4(int number,CoinFactorizationDouble pivotValue,
    768                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    769                           const int *  COIN_RESTRICT thisIndex,
    770                           CoinFactorizationDouble * COIN_RESTRICT region)
    771 {
    772   assert (number==4);
     757void CoinAbcScatterUpdate4(int number, CoinFactorizationDouble pivotValue,
     758  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     759  const int *COIN_RESTRICT thisIndex,
     760  CoinFactorizationDouble *COIN_RESTRICT region)
     761{
     762  assert(number == 4);
    773763  int iRow0 = thisIndex[0];
    774764  int iRow1 = thisIndex[1];
     
    784774  region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
    785775}
    786 void CoinAbcScatterUpdate5(int number,CoinFactorizationDouble pivotValue,
    787                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    788                           const int *  COIN_RESTRICT thisIndex,
    789                           CoinFactorizationDouble * COIN_RESTRICT region)
    790 {
    791   assert (number==5);
     776void CoinAbcScatterUpdate5(int number, CoinFactorizationDouble pivotValue,
     777  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     778  const int *COIN_RESTRICT thisIndex,
     779  CoinFactorizationDouble *COIN_RESTRICT region)
     780{
     781  assert(number == 5);
    792782  int iRow0 = thisIndex[0];
    793783  int iRow1 = thisIndex[1];
     
    806796  region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
    807797}
    808 void CoinAbcScatterUpdate6(int number,CoinFactorizationDouble pivotValue,
    809                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    810                           const int *  COIN_RESTRICT thisIndex,
    811                           CoinFactorizationDouble * COIN_RESTRICT region)
    812 {
    813   assert (number==6);
     798void CoinAbcScatterUpdate6(int number, CoinFactorizationDouble pivotValue,
     799  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     800  const int *COIN_RESTRICT thisIndex,
     801  CoinFactorizationDouble *COIN_RESTRICT region)
     802{
     803  assert(number == 6);
    814804  int iRow0 = thisIndex[0];
    815805  int iRow1 = thisIndex[1];
     
    831821  region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
    832822}
    833 void CoinAbcScatterUpdate7(int number,CoinFactorizationDouble pivotValue,
    834                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    835                           const int *  COIN_RESTRICT thisIndex,
    836                           CoinFactorizationDouble * COIN_RESTRICT region)
    837 {
    838   assert (number==7);
     823void CoinAbcScatterUpdate7(int number, CoinFactorizationDouble pivotValue,
     824  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     825  const int *COIN_RESTRICT thisIndex,
     826  CoinFactorizationDouble *COIN_RESTRICT region)
     827{
     828  assert(number == 7);
    839829  int iRow0 = thisIndex[0];
    840830  int iRow1 = thisIndex[1];
     
    859849  region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
    860850}
    861 void CoinAbcScatterUpdate8(int number,CoinFactorizationDouble pivotValue,
    862                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    863                           const int *  COIN_RESTRICT thisIndex,
    864                           CoinFactorizationDouble * COIN_RESTRICT region)
    865 {
    866   assert (number==8);
     851void CoinAbcScatterUpdate8(int number, CoinFactorizationDouble pivotValue,
     852  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     853  const int *COIN_RESTRICT thisIndex,
     854  CoinFactorizationDouble *COIN_RESTRICT region)
     855{
     856  assert(number == 8);
    867857  int iRow0 = thisIndex[0];
    868858  int iRow1 = thisIndex[1];
     
    890880  region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
    891881}
    892 void CoinAbcScatterUpdate4N(int number,CoinFactorizationDouble pivotValue,
    893                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    894                           const int *  COIN_RESTRICT thisIndex,
    895                           CoinFactorizationDouble * COIN_RESTRICT region)
    896 {
    897   assert ((number&3)==0);
    898   for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
     882void CoinAbcScatterUpdate4N(int number, CoinFactorizationDouble pivotValue,
     883  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     884  const int *COIN_RESTRICT thisIndex,
     885  CoinFactorizationDouble *COIN_RESTRICT region)
     886{
     887  assert((number & 3) == 0);
     888  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
    899889    CoinSimplexInt iRow0 = thisIndex[j];
    900     CoinSimplexInt iRow1 = thisIndex[j-1];
     890    CoinSimplexInt iRow1 = thisIndex[j - 1];
    901891    CoinFactorizationDouble regionValue0 = region[iRow0];
    902892    CoinFactorizationDouble regionValue1 = region[iRow1];
    903893    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    904     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    905     iRow0 = thisIndex[j-2];
    906     iRow1 = thisIndex[j-3];
     894    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     895    iRow0 = thisIndex[j - 2];
     896    iRow1 = thisIndex[j - 3];
    907897    regionValue0 = region[iRow0];
    908898    regionValue1 = region[iRow1];
    909     region[iRow0] = regionValue0 - thisElement[j-2] * pivotValue;
    910     region[iRow1] = regionValue1 - thisElement[j-3] * pivotValue;
    911   }
    912 }
    913 void CoinAbcScatterUpdate4NPlus1(int number,CoinFactorizationDouble pivotValue,
    914                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    915                           const int *  COIN_RESTRICT thisIndex,
    916                           CoinFactorizationDouble * COIN_RESTRICT region)
    917 {
    918   assert ((number&3)==1);
    919   int iRow0 = thisIndex[number-1];
     899    region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
     900    region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
     901  }
     902}
     903void CoinAbcScatterUpdate4NPlus1(int number, CoinFactorizationDouble pivotValue,
     904  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     905  const int *COIN_RESTRICT thisIndex,
     906  CoinFactorizationDouble *COIN_RESTRICT region)
     907{
     908  assert((number & 3) == 1);
     909  int iRow0 = thisIndex[number - 1];
    920910  CoinFactorizationDouble regionValue0 = region[iRow0];
    921   region[iRow0] = regionValue0 - thisElement[number-1] * pivotValue;
     911  region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
    922912  number -= 1;
    923   for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
     913  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
    924914    CoinSimplexInt iRow0 = thisIndex[j];
    925     CoinSimplexInt iRow1 = thisIndex[j-1];
     915    CoinSimplexInt iRow1 = thisIndex[j - 1];
    926916    CoinFactorizationDouble regionValue0 = region[iRow0];
    927917    CoinFactorizationDouble regionValue1 = region[iRow1];
    928918    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    929     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    930     iRow0 = thisIndex[j-2];
    931     iRow1 = thisIndex[j-3];
     919    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     920    iRow0 = thisIndex[j - 2];
     921    iRow1 = thisIndex[j - 3];
    932922    regionValue0 = region[iRow0];
    933923    regionValue1 = region[iRow1];
    934     region[iRow0] = regionValue0 - thisElement[j-2] * pivotValue;
    935     region[iRow1] = regionValue1 - thisElement[j-3] * pivotValue;
    936   }
    937 }
    938 void CoinAbcScatterUpdate4NPlus2(int number,CoinFactorizationDouble pivotValue,
    939                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    940                           const int *  COIN_RESTRICT thisIndex,
    941                           CoinFactorizationDouble * COIN_RESTRICT region)
    942 {
    943   assert ((number&3)==2);
    944   int iRow0 = thisIndex[number-1];
    945   int iRow1 = thisIndex[number-2];
     924    region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
     925    region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
     926  }
     927}
     928void CoinAbcScatterUpdate4NPlus2(int number, CoinFactorizationDouble pivotValue,
     929  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     930  const int *COIN_RESTRICT thisIndex,
     931  CoinFactorizationDouble *COIN_RESTRICT region)
     932{
     933  assert((number & 3) == 2);
     934  int iRow0 = thisIndex[number - 1];
     935  int iRow1 = thisIndex[number - 2];
    946936  CoinFactorizationDouble regionValue0 = region[iRow0];
    947937  CoinFactorizationDouble regionValue1 = region[iRow1];
    948   region[iRow0] = regionValue0 - thisElement[number-1] * pivotValue;
    949   region[iRow1] = regionValue1 - thisElement[number-2] * pivotValue;
     938  region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
     939  region[iRow1] = regionValue1 - thisElement[number - 2] * pivotValue;
    950940  number -= 2;
    951   for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
     941  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
    952942    CoinSimplexInt iRow0 = thisIndex[j];
    953     CoinSimplexInt iRow1 = thisIndex[j-1];
     943    CoinSimplexInt iRow1 = thisIndex[j - 1];
    954944    CoinFactorizationDouble regionValue0 = region[iRow0];
    955945    CoinFactorizationDouble regionValue1 = region[iRow1];
    956946    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    957     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    958     iRow0 = thisIndex[j-2];
    959     iRow1 = thisIndex[j-3];
     947    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     948    iRow0 = thisIndex[j - 2];
     949    iRow1 = thisIndex[j - 3];
    960950    regionValue0 = region[iRow0];
    961951    regionValue1 = region[iRow1];
    962     region[iRow0] = regionValue0 - thisElement[j-2] * pivotValue;
    963     region[iRow1] = regionValue1 - thisElement[j-3] * pivotValue;
    964   }
    965 }
    966 void CoinAbcScatterUpdate4NPlus3(int number,CoinFactorizationDouble pivotValue,
    967                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
    968                           const int *  COIN_RESTRICT thisIndex,
    969                           CoinFactorizationDouble * COIN_RESTRICT region)
    970 {
    971   assert ((number&3)==3);
    972   int iRow0 = thisIndex[number-1];
    973   int iRow1 = thisIndex[number-2];
     952    region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
     953    region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
     954  }
     955}
     956void CoinAbcScatterUpdate4NPlus3(int number, CoinFactorizationDouble pivotValue,
     957  const CoinFactorizationDouble *COIN_RESTRICT thisElement,
     958  const int *COIN_RESTRICT thisIndex,
     959  CoinFactorizationDouble *COIN_RESTRICT region)
     960{
     961  assert((number & 3) == 3);
     962  int iRow0 = thisIndex[number - 1];
     963  int iRow1 = thisIndex[number - 2];
    974964  CoinFactorizationDouble regionValue0 = region[iRow0];
    975965  CoinFactorizationDouble regionValue1 = region[iRow1];
    976   region[iRow0] = regionValue0 - thisElement[number-1] * pivotValue;
    977   region[iRow1] = regionValue1 - thisElement[number-2] * pivotValue;
    978   iRow0 = thisIndex[number-3];
     966  region[iRow0] = regionValue0 - thisElement[number - 1] * pivotValue;
     967  region[iRow1] = regionValue1 - thisElement[number - 2] * pivotValue;
     968  iRow0 = thisIndex[number - 3];
    979969  regionValue0 = region[iRow0];
    980   region[iRow0] = regionValue0 - thisElement[number-3] * pivotValue;
     970  region[iRow0] = regionValue0 - thisElement[number - 3] * pivotValue;
    981971  number -= 3;
    982   for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
     972  for (CoinBigIndex j = number - 1; j >= 0; j -= 4) {
    983973    CoinSimplexInt iRow0 = thisIndex[j];
    984     CoinSimplexInt iRow1 = thisIndex[j-1];
     974    CoinSimplexInt iRow1 = thisIndex[j - 1];
    985975    CoinFactorizationDouble regionValue0 = region[iRow0];
    986976    CoinFactorizationDouble regionValue1 = region[iRow1];
    987977    region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
    988     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
    989     iRow0 = thisIndex[j-2];
    990     iRow1 = thisIndex[j-3];
     978    region[iRow1] = regionValue1 - thisElement[j - 1] * pivotValue;
     979    iRow0 = thisIndex[j - 2];
     980    iRow1 = thisIndex[j - 3];
    991981    regionValue0 = region[iRow0];
    992982    regionValue1 = region[iRow1];
    993     region[iRow0] = regionValue0 - thisElement[j-2] * pivotValue;
    994     region[iRow1] = regionValue1 - thisElement[j-3] * pivotValue;
     983    region[iRow0] = regionValue0 - thisElement[j - 2] * pivotValue;
     984    region[iRow1] = regionValue1 - thisElement[j - 3] * pivotValue;
    995985  }
    996986}
    997987void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble /*multiplier*/,
    998                            const CoinFactorizationDouble *  COIN_RESTRICT element,
    999                            CoinFactorizationDouble *  COIN_RESTRICT /*region*/)
     988  const CoinFactorizationDouble *COIN_RESTRICT element,
     989  CoinFactorizationDouble *COIN_RESTRICT /*region*/)
    1000990{
    1001991#ifndef NDEBUG
    1002   assert (numberIn==0);
     992  assert(numberIn == 0);
    1003993#endif
    1004994}
     
    10271017#endif
    10281018  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
    1029 #if NEW_CHUNK_SIZE==2
     1019#if NEW_CHUNK_SIZE == 2
    10301020  int nFull=2&(~(NEW_CHUNK_SIZE-1));
    10311021  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
     
    10431033  }
    10441034#endif
    1045 #if NEW_CHUNK_SIZE==4
     1035#if NEW_CHUNK_SIZE == 4
    10461036  element++;
    10471037  int iColumn0=thisColumn[0];
     
    10631053#endif
    10641054  const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element);
    1065 #if NEW_CHUNK_SIZE==2
     1055#if NEW_CHUNK_SIZE == 2
    10661056  int nFull=3&(~(NEW_CHUNK_SIZE-1));
    10671057  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
     
    10791069  }
    10801070#endif
    1081 #if NEW_CHUNK_SIZE==2
     1071#if NEW_CHUNK_SIZE == 2
    10821072  element++;
    10831073  int iColumn0=thisColumn[0];
     
    11121102  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    11131103    //coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
    1114 #if NEW_CHUNK_SIZE==2
     1104#if NEW_CHUNK_SIZE == 2
    11151105    int iColumn0=thisColumn[0];
    11161106    int iColumn1=thisColumn[1];
     
    11211111    region[iColumn0]=value0;
    11221112    region[iColumn1]=value1;
    1123 #elif NEW_CHUNK_SIZE==4
     1113#elif NEW_CHUNK_SIZE == 4
    11241114    int iColumn0=thisColumn[0];
    11251115    int iColumn1=thisColumn[1];
     
    11561146  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    11571147    coin_prefetch_const(element+6);
    1158 #if NEW_CHUNK_SIZE==2
     1148#if NEW_CHUNK_SIZE == 2
    11591149    int iColumn0=thisColumn[0];
    11601150    int iColumn1=thisColumn[1];
     
    11651155    region[iColumn0]=value0;
    11661156    region[iColumn1]=value1;
    1167 #elif NEW_CHUNK_SIZE==4
     1157#elif NEW_CHUNK_SIZE == 4
    11681158    int iColumn0=thisColumn[0];
    11691159    int iColumn1=thisColumn[1];
     
    12051195  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    12061196    coin_prefetch_const(element+6);
    1207 #if NEW_CHUNK_SIZE==2
     1197#if NEW_CHUNK_SIZE == 2
    12081198    int iColumn0=thisColumn[0];
    12091199    int iColumn1=thisColumn[1];
     
    12141204    region[iColumn0]=value0;
    12151205    region[iColumn1]=value1;
    1216 #elif NEW_CHUNK_SIZE==4
     1206#elif NEW_CHUNK_SIZE == 4
    12171207    int iColumn0=thisColumn[0];
    12181208    int iColumn1=thisColumn[1];
     
    12371227    thisColumn = reinterpret_cast<const int *>(element);
    12381228  }
    1239 #if NEW_CHUNK_SIZE==4
     1229#if NEW_CHUNK_SIZE == 4
    12401230  element++;
    12411231  int iColumn0=thisColumn[0];
     
    12601250  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    12611251    coin_prefetch_const(element+6);
    1262 #if NEW_CHUNK_SIZE==2
     1252#if NEW_CHUNK_SIZE == 2
    12631253    int iColumn0=thisColumn[0];
    12641254    int iColumn1=thisColumn[1];
     
    12691259    region[iColumn0]=value0;
    12701260    region[iColumn1]=value1;
    1271 #elif NEW_CHUNK_SIZE==4
     1261#elif NEW_CHUNK_SIZE == 4
    12721262    int iColumn0=thisColumn[0];
    12731263    int iColumn1=thisColumn[1];
     
    12921282    thisColumn = reinterpret_cast<const int *>(element);
    12931283  }
    1294 #if NEW_CHUNK_SIZE==2
     1284#if NEW_CHUNK_SIZE == 2
    12951285  element++;
    12961286  int iColumn0=thisColumn[0];
     
    13251315  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    13261316    coin_prefetch_const(element+6);
    1327 #if NEW_CHUNK_SIZE==2
     1317#if NEW_CHUNK_SIZE == 2
    13281318    int iColumn0=thisColumn[0];
    13291319    int iColumn1=thisColumn[1];
     
    13341324    region[iColumn0]=value0;
    13351325    region[iColumn1]=value1;
    1336 #elif NEW_CHUNK_SIZE==4
     1326#elif NEW_CHUNK_SIZE == 4
    13371327    int iColumn0=thisColumn[0];
    13381328    int iColumn1=thisColumn[1];
     
    13671357  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    13681358    coin_prefetch_const(element+6);
    1369 #if NEW_CHUNK_SIZE==2
     1359#if NEW_CHUNK_SIZE == 2
    13701360    int iColumn0=thisColumn[0];
    13711361    int iColumn1=thisColumn[1];
     
    13761366    region[iColumn0]=value0;
    13771367    region[iColumn1]=value1;
    1378 #elif NEW_CHUNK_SIZE==4
     1368#elif NEW_CHUNK_SIZE == 4
    13791369    int iColumn0=thisColumn[0];
    13801370    int iColumn1=thisColumn[1];
     
    14091399  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    14101400    coin_prefetch_const(element+6);
    1411 #if NEW_CHUNK_SIZE==2
     1401#if NEW_CHUNK_SIZE == 2
    14121402    int iColumn0=thisColumn[0];
    14131403    int iColumn1=thisColumn[1];
     
    14181408    region[iColumn0]=value0;
    14191409    region[iColumn1]=value1;
    1420 #elif NEW_CHUNK_SIZE==4
     1410#elif NEW_CHUNK_SIZE == 4
    14211411    int iColumn0=thisColumn[0];
    14221412    int iColumn1=thisColumn[1];
     
    14561446  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    14571447    coin_prefetch_const(element+6);
    1458 #if NEW_CHUNK_SIZE==2
     1448#if NEW_CHUNK_SIZE == 2
    14591449    int iColumn0=thisColumn[0];
    14601450    int iColumn1=thisColumn[1];
     
    14651455    region[iColumn0]=value0;
    14661456    region[iColumn1]=value1;
    1467 #elif NEW_CHUNK_SIZE==4
     1457#elif NEW_CHUNK_SIZE == 4
    14681458    int iColumn0=thisColumn[0];
    14691459    int iColumn1=thisColumn[1];
     
    14881478    thisColumn = reinterpret_cast<const int *>(element);
    14891479  }
    1490 #if NEW_CHUNK_SIZE==4
     1480#if NEW_CHUNK_SIZE == 4
    14911481  element++;
    14921482  int iColumn0=thisColumn[0];
     
    15091499  for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
    15101500    coin_prefetch_const(element+6);
    1511 #if NEW_CHUNK_SIZE==2
     1501#if NEW_CHUNK_SIZE == 2
    15121502    int iColumn0=thisColumn[0];
    15131503    int iColumn1=thisColumn[1];
     
    15181508    region[iColumn0]=value0;
    15191509    region[iColumn1]=value1;
    1520 #elif NEW_CHUNK_SIZE==4
     1510#elif NEW_CHUNK_SIZE == 4
    15211511    int iColumn0=thisColumn[0];
    15221512    int iColumn1=thisColumn[1];
     
    15411531    thisColumn = reinterpret_cast<const int *>(element);
    15421532  }
    1543 #if NEW_CHUNK_SIZE==2
     1533#if NEW_CHUNK_SIZE == 2
    15441534  element++;
    15451535  int iColumn0=thisColumn[0];
     
    15801570#define functionName(zname) CoinAbc##zname##Add
    15811571#include "CoinAbcHelperFunctions.hpp"
    1582 scatterUpdate AbcScatterLow[]={
     1572scatterUpdate AbcScatterLow[] = {
    15831573  &CoinAbcScatterUpdate0,
    15841574  &CoinAbcScatterUpdate1,
     
    15891579  &CoinAbcScatterUpdate6,
    15901580  &CoinAbcScatterUpdate7,
    1591   &CoinAbcScatterUpdate8};
    1592 scatterUpdate AbcScatterHigh[]={
     1581  &CoinAbcScatterUpdate8
     1582};
     1583scatterUpdate AbcScatterHigh[] = {
    15931584  &CoinAbcScatterUpdate4N,
    15941585  &CoinAbcScatterUpdate4NPlus1,
    15951586  &CoinAbcScatterUpdate4NPlus2,
    1596   &CoinAbcScatterUpdate4NPlus3};
    1597 scatterUpdate AbcScatterLowSubtract[]={
     1587  &CoinAbcScatterUpdate4NPlus3
     1588};
     1589scatterUpdate AbcScatterLowSubtract[] = {
    15981590  &CoinAbcScatterUpdate0,
    15991591  &CoinAbcScatterUpdate1Subtract,
     
    16041596  &CoinAbcScatterUpdate6Subtract,
    16051597  &CoinAbcScatterUpdate7Subtract,
    1606   &CoinAbcScatterUpdate8Subtract};
    1607 scatterUpdate AbcScatterHighSubtract[]={
     1598  &CoinAbcScatterUpdate8Subtract
     1599};
     1600scatterUpdate AbcScatterHighSubtract[] = {
    16081601  &CoinAbcScatterUpdate4NSubtract,
    16091602  &CoinAbcScatterUpdate4NPlus1Subtract,
    16101603  &CoinAbcScatterUpdate4NPlus2Subtract,
    1611   &CoinAbcScatterUpdate4NPlus3Subtract};
    1612 scatterUpdate AbcScatterLowAdd[]={
     1604  &CoinAbcScatterUpdate4NPlus3Subtract
     1605};
     1606scatterUpdate AbcScatterLowAdd[] = {
    16131607  &CoinAbcScatterUpdate0,
    16141608  &CoinAbcScatterUpdate1Add,
     
    16191613  &CoinAbcScatterUpdate6Add,
    16201614  &CoinAbcScatterUpdate7Add,
    1621   &CoinAbcScatterUpdate8Add};
    1622 scatterUpdate AbcScatterHighAdd[]={
     1615  &CoinAbcScatterUpdate8Add
     1616};
     1617scatterUpdate AbcScatterHighAdd[] = {
    16231618  &CoinAbcScatterUpdate4NAdd,
    16241619  &CoinAbcScatterUpdate4NPlus1Add,
    16251620  &CoinAbcScatterUpdate4NPlus2Add,
    1626   &CoinAbcScatterUpdate4NPlus3Add};
     1621  &CoinAbcScatterUpdate4NPlus3Add
     1622};
    16271623#endif
    16281624#include "CoinPragma.hpp"
     
    16371633#include "CoinAbcCommonFactorization.hpp"
    16381634#if 1
    1639 #if AVX2==1
     1635#if AVX2 == 1
    16401636#include "emmintrin.h"
    1641 #elif AVX2==2
     1637#elif AVX2 == 2
    16421638#include <immintrin.h>
    1643 #elif AVX2==3
     1639#elif AVX2 == 3
    16441640#include "avx2intrin.h"
    16451641#endif
     
    16471643// cilk seems a bit fragile
    16481644#define CILK_FRAGILE 0
    1649 #if CILK_FRAGILE>1
     1645#if CILK_FRAGILE > 1
    16501646#undef cilk_spawn
    16511647#undef cilk_sync
     
    16531649#define cilk_sync
    16541650#define ONWARD 0
    1655 #elif CILK_FRAGILE==1
     1651#elif CILK_FRAGILE == 1
    16561652#define ONWARD 0
    16571653#else
    1658 #define ONWARD 1 
     1654#define ONWARD 1
    16591655#endif
    16601656#define BLOCKING1 8 // factorization strip
    16611657#define BLOCKING2 8 // dgemm recursive
    16621658#define BLOCKING3 32 // dgemm parallel
    1663 void CoinAbcDgemm(int m, int n, int k, double * COIN_RESTRICT a,int lda,
    1664                           double * COIN_RESTRICT b,double * COIN_RESTRICT c
    1665 #if ABC_PARALLEL==2
    1666                           ,int parallelMode
     1659void CoinAbcDgemm(int m, int n, int k, double *COIN_RESTRICT a, int lda,
     1660  double *COIN_RESTRICT b, double *COIN_RESTRICT c
     1661#if ABC_PARALLEL == 2
     1662  ,
     1663  int parallelMode
    16671664#endif
    16681665)
    16691666{
    1670   assert ((m&(BLOCKING8-1))==0&&(n&(BLOCKING8-1))==0&&(k&(BLOCKING8-1))==0);
     1667  assert((m & (BLOCKING8 - 1)) == 0 && (n & (BLOCKING8 - 1)) == 0 && (k & (BLOCKING8 - 1)) == 0);
    16711668  /* entry for column j and row i (when multiple of BLOCKING8)
    16721669     is at aBlocked+j*m+i*BLOCKING8
    16731670  */
    1674   // k is always smallish 
    1675   if (m<=BLOCKING8&&n<=BLOCKING8) {
    1676     assert (m==BLOCKING8&&n==BLOCKING8&&k==BLOCKING8);
    1677     double * COIN_RESTRICT aBase2=a;
    1678     double * COIN_RESTRICT bBase2=b;
    1679     double * COIN_RESTRICT cBase2=c;
    1680     for (int j=0;j<BLOCKING8;j++) {
    1681       double * COIN_RESTRICT aBase=aBase2;
    1682 #if AVX2!=2
     1671  // k is always smallish
     1672  if (m <= BLOCKING8 && n <= BLOCKING8) {
     1673    assert(m == BLOCKING8 && n == BLOCKING8 && k == BLOCKING8);
     1674    double *COIN_RESTRICT aBase2 = a;
     1675    double *COIN_RESTRICT bBase2 = b;
     1676    double *COIN_RESTRICT cBase2 = c;
     1677    for (int j = 0; j < BLOCKING8; j++) {
     1678      double *COIN_RESTRICT aBase = aBase2;
     1679#if AVX2 != 2
    16831680#if 1
    1684       double c0=cBase2[0];
    1685       double c1=cBase2[1];
    1686       double c2=cBase2[2];
    1687       double c3=cBase2[3];
    1688       double c4=cBase2[4];
    1689       double c5=cBase2[5];
    1690       double c6=cBase2[6];
    1691       double c7=cBase2[7];
    1692       for (int l=0;l<BLOCKING8;l++) {
    1693         double bValue = bBase2[l];
    1694         if (bValue) {
    1695           c0 -= bValue*aBase[0];
    1696           c1 -= bValue*aBase[1];
    1697           c2 -= bValue*aBase[2];
    1698           c3 -= bValue*aBase[3];
    1699           c4 -= bValue*aBase[4];
    1700           c5 -= bValue*aBase[5];
    1701           c6 -= bValue*aBase[6];
    1702           c7 -= bValue*aBase[7];
    1703         }
    1704         aBase += BLOCKING8;
     1681      double c0 = cBase2[0];
     1682      double c1 = cBase2[1];
     1683      double c2 = cBase2[2];
     1684      double c3 = cBase2[3];
     1685      double c4 = cBase2[4];
     1686      double c5 = cBase2[5];
     1687      double c6 = cBase2[6];
     1688      double c7 = cBase2[7];
     1689      for (int l = 0; l < BLOCKING8; l++) {
     1690        double bValue = bBase2[l];
     1691        if (bValue) {
     1692          c0 -= bValue * aBase[0];
     1693          c1 -= bValue * aBase[1];
     1694          c2 -= bValue * aBase[2];
     1695          c3 -= bValue * aBase[3];
     1696          c4 -= bValue * aBase[4];
     1697          c5 -= bValue * aBase[5];
     1698          c6 -= bValue * aBase[6];
     1699          c7 -= bValue * aBase[7];
     1700        }
     1701        aBase += BLOCKING8;
    17051702      }
    1706       cBase2[0]=c0;
    1707       cBase2[1]=c1;
    1708       cBase2[2]=c2;
    1709       cBase2[3]=c3;
    1710       cBase2[4]=c4;
    1711       cBase2[5]=c5;
    1712       cBase2[6]=c6;
    1713       cBase2[7]=c7;
    1714 #else
    1715       for (int l=0;l<BLOCKING8;l++) {
    1716         double bValue = bBase2[l];
    1717         if (bValue) {
    1718           for (int i=0;i<BLOCKING8;i++) {
    1719             cBase2[i] -= bValue*aBase[i];
    1720           }
    1721         }
    1722         aBase += BLOCKING8;
     1703      cBase2[0] = c0;
     1704      cBase2[1] = c1;
     1705      cBase2[2] = c2;
     1706      cBase2[3] = c3;
     1707      cBase2[4] = c4;
     1708      cBase2[5] = c5;
     1709      cBase2[6] = c6;
     1710      cBase2[7] = c7;
     1711#else
     1712      for (int l = 0; l < BLOCKING8; l++) {
     1713        double bValue = bBase2[l];
     1714        if (bValue) {
     1715          for (int i = 0; i < BLOCKING8; i++) {
     1716            cBase2[i] -= bValue * aBase[i];
     1717          }
     1718        }
     1719        aBase += BLOCKING8;
    17231720      }
    17241721#endif
    17251722#else
    17261723      //__m256d c0=_mm256_load_pd(cBase2);
    1727       __m256d c0=*reinterpret_cast<__m256d *>(cBase2);
     1724      __m256d c0 = *reinterpret_cast< __m256d * >(cBase2);
    17281725      //__m256d c1=_mm256_load_pd(cBase2+4);
    1729       __m256d c1=*reinterpret_cast<__m256d *>(cBase2+4);
    1730       for (int l=0;l<BLOCKING8;l++) {
    1731         //__m256d bb = _mm256_broadcast_sd(bBase2+l);
    1732         __m256d bb = static_cast<__m256d> (__builtin_ia32_vbroadcastsd256 (bBase2+l));
    1733         //__m256d a0 = _mm256_load_pd(aBase);
    1734         __m256d a0=*reinterpret_cast<__m256d *>(aBase);
    1735         //__m256d a1 = _mm256_load_pd(aBase+4);
    1736         __m256d a1=*reinterpret_cast<__m256d *>(aBase+4);
    1737         c0 -= bb*a0;
    1738         c1 -= bb*a1;
    1739         aBase += BLOCKING8;
     1726      __m256d c1 = *reinterpret_cast< __m256d * >(cBase2 + 4);
     1727      for (int l = 0; l < BLOCKING8; l++) {
     1728        //__m256d bb = _mm256_broadcast_sd(bBase2+l);
     1729        __m256d bb = static_cast< __m256d >(__builtin_ia32_vbroadcastsd256(bBase2 + l));
     1730        //__m256d a0 = _mm256_load_pd(aBase);
     1731        __m256d a0 = *reinterpret_cast< __m256d * >(aBase);
     1732        //__m256d a1 = _mm256_load_pd(aBase+4);
     1733        __m256d a1 = *reinterpret_cast< __m256d * >(aBase + 4);
     1734        c0 -= bb * a0;
     1735        c1 -= bb * a1;
     1736        aBase += BLOCKING8;
    17401737      }
    17411738      //_mm256_store_pd (cBase2, c0);
    1742       *reinterpret_cast<__m256d *>(cBase2)=c0;
     1739      *reinterpret_cast< __m256d * >(cBase2) = c0;
    17431740      //_mm256_store_pd (cBase2+4, c1);
    1744       *reinterpret_cast<__m256d *>(cBase2+4)=c1;
     1741      *reinterpret_cast< __m256d * >(cBase2 + 4) = c1;
    17451742#endif
    17461743      bBase2 += BLOCKING8;
    17471744      cBase2 += BLOCKING8;
    17481745    }
    1749   } else if (m>n) {
     1746  } else if (m > n) {
    17501747    // make sure mNew1 multiple of BLOCKING8
    1751 #if BLOCKING8==8
    1752     int mNew1=((m+15)>>4)<<3;
    1753 #elif BLOCKING8==4
    1754     int mNew1=((m+7)>>3)<<2;
    1755 #elif BLOCKING8==2
    1756     int mNew1=((m+3)>>2)<<1;
     1748#if BLOCKING8 == 8
     1749    int mNew1 = ((m + 15) >> 4) << 3;
     1750#elif BLOCKING8 == 4
     1751    int mNew1 = ((m + 7) >> 3) << 2;
     1752#elif BLOCKING8 == 2
     1753    int mNew1 = ((m + 3) >> 2) << 1;
    17571754#else
    17581755    abort();
    17591756#endif
    1760     assert (mNew1>0&&m-mNew1>0);
    1761 #if ABC_PARALLEL==2
    1762     if (mNew1<=BLOCKING3||!parallelMode) {
     1757    assert(mNew1 > 0 && m - mNew1 > 0);
     1758#if ABC_PARALLEL == 2
     1759    if (mNew1 <= BLOCKING3 || !parallelMode) {
    17631760#endif
    17641761      //printf("splitMa mNew1 %d\n",mNew1);
    1765       CoinAbcDgemm(mNew1,n,k,a,lda,b,c
    1766 #if ABC_PARALLEL==2
    1767                           ,0
    1768 #endif
    1769 );
     1762      CoinAbcDgemm(mNew1, n, k, a, lda, b, c
     1763#if ABC_PARALLEL == 2
     1764        ,
     1765        0
     1766#endif
     1767      );
    17701768      //printf("splitMb mNew1 %d\n",mNew1);
    1771       CoinAbcDgemm(m-mNew1,n,k,a+mNew1*BLOCKING8,lda,b,c+mNew1*BLOCKING8
    1772 #if ABC_PARALLEL==2
    1773                           ,0
    1774 #endif
    1775 );
    1776 #if ABC_PARALLEL==2
     1769      CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8
     1770#if ABC_PARALLEL == 2
     1771        ,
     1772        0
     1773#endif
     1774      );
     1775#if ABC_PARALLEL == 2
    17771776    } else {
    17781777      //printf("splitMa mNew1 %d\n",mNew1);
    1779       cilk_spawn CoinAbcDgemm(mNew1,n,k,a,lda,b,c,ONWARD);
     1778      cilk_spawn CoinAbcDgemm(mNew1, n, k, a, lda, b, c, ONWARD);
    17801779      //printf("splitMb mNew1 %d\n",mNew1);
    1781       CoinAbcDgemm(m-mNew1,n,k,a+mNew1*BLOCKING8,lda,b,c+mNew1*BLOCKING8,ONWARD);
     1780      CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8, ONWARD);
    17821781      cilk_sync;
    17831782    }
     
    17851784  } else {
    17861785    // make sure nNew1 multiple of BLOCKING8
    1787 #if BLOCKING8==8
    1788     int nNew1=((n+15)>>4)<<3;
    1789 #elif BLOCKING8==4
    1790     int nNew1=((n+7)>>3)<<2;
    1791 #elif BLOCKING8==2
    1792     int nNew1=((n+3)>>2)<<1;
     1786#if BLOCKING8 == 8
     1787    int nNew1 = ((n + 15) >> 4) << 3;
     1788#elif BLOCKING8 == 4
     1789    int nNew1 = ((n + 7) >> 3) << 2;
     1790#elif BLOCKING8 == 2
     1791    int nNew1 = ((n + 3) >> 2) << 1;
    17931792#else
    17941793    abort();
    17951794#endif
    1796     assert (nNew1>0&&n-nNew1>0);
    1797 #if ABC_PARALLEL==2
    1798     if (nNew1<=BLOCKING3||!parallelMode) {
     1795    assert(nNew1 > 0 && n - nNew1 > 0);
     1796#if ABC_PARALLEL == 2
     1797    if (nNew1 <= BLOCKING3 || !parallelMode) {
    17991798#endif
    18001799      //printf("splitNa nNew1 %d\n",nNew1);
    1801       CoinAbcDgemm(m,nNew1,k,a,lda,b,c
    1802 #if ABC_PARALLEL==2
    1803                           ,0
    1804 #endif
    1805                     );
     1800      CoinAbcDgemm(m, nNew1, k, a, lda, b, c
     1801#if ABC_PARALLEL == 2
     1802        ,
     1803        0
     1804#endif
     1805      );
    18061806      //printf("splitNb nNew1 %d\n",nNew1);
    1807       CoinAbcDgemm(m,n-nNew1,k,a,lda,b+lda*nNew1,c+lda*nNew1
    1808 #if ABC_PARALLEL==2
    1809                           ,0
    1810 #endif
    1811                     );
    1812 #if ABC_PARALLEL==2
     1807      CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1
     1808#if ABC_PARALLEL == 2
     1809        ,
     1810        0
     1811#endif
     1812      );
     1813#if ABC_PARALLEL == 2
    18131814    } else {
    18141815      //printf("splitNa nNew1 %d\n",nNew1);
    1815       cilk_spawn CoinAbcDgemm(m,nNew1,k,a,lda,b,c,ONWARD);
     1816      cilk_spawn CoinAbcDgemm(m, nNew1, k, a, lda, b, c, ONWARD);
    18161817      //printf("splitNb nNew1 %d\n",nNew1);
    1817       CoinAbcDgemm(m,n-nNew1,k,a,lda,b+lda*nNew1,c+lda*nNew1,ONWARD);
     1818      CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1, ONWARD);
    18181819      cilk_sync;
    18191820    }
     
    18231824#ifdef ABC_LONG_FACTORIZATION
    18241825// Start long double version
    1825 void CoinAbcDgemm(int m, int n, int k, long double * COIN_RESTRICT a,int lda,
    1826                           long double * COIN_RESTRICT b,long double * COIN_RESTRICT c
    1827 #if ABC_PARALLEL==2
    1828                           ,int parallelMode
     1826void CoinAbcDgemm(int m, int n, int k, long double *COIN_RESTRICT a, int lda,
     1827  long double *COIN_RESTRICT b, long double *COIN_RESTRICT c
     1828#if ABC_PARALLEL == 2
     1829  ,
     1830  int parallelMode
    18291831#endif
    18301832)
    18311833{
    1832   assert ((m&(BLOCKING8-1))==0&&(n&(BLOCKING8-1))==0&&(k&(BLOCKING8-1))==0);
     1834  assert((m & (BLOCKING8 - 1)) == 0 && (n & (BLOCKING8 - 1)) == 0 && (k & (BLOCKING8 - 1)) == 0);
    18331835  /* entry for column j and row i (when multiple of BLOCKING8)
    18341836     is at aBlocked+j*m+i*BLOCKING8
    18351837  */
    1836   // k is always smallish 
    1837   if (m<=BLOCKING8&&n<=BLOCKING8) {
    1838     assert (m==BLOCKING8&&n==BLOCKING8&&k==BLOCKING8);
    1839     long double * COIN_RESTRICT aBase2=a;
    1840     long double * COIN_RESTRICT bBase2=b;
    1841     long double * COIN_RESTRICT cBase2=c;
    1842     for (int j=0;j<BLOCKING8;j++) {
    1843       long double * COIN_RESTRICT aBase=aBase2;
    1844 #if AVX2!=2
     1838  // k is always smallish
     1839  if (m <= BLOCKING8 && n <= BLOCKING8) {
     1840    assert(m == BLOCKING8 && n == BLOCKING8 && k == BLOCKING8);
     1841    long double *COIN_RESTRICT aBase2 = a;
     1842    long double *COIN_RESTRICT bBase2 = b;
     1843    long double *COIN_RESTRICT cBase2 = c;
     1844    for (int j = 0; j < BLOCKING8; j++) {
     1845      long double *COIN_RESTRICT aBase = aBase2;
     1846#if AVX2 != 2
    18451847#if 1
    1846       long double c0=cBase2[0];
    1847       long double c1=cBase2[1];
    1848       long double c2=cBase2[2];
    1849       long double c3=cBase2[3];
    1850       long double c4=cBase2[4];
    1851       long double c5=cBase2[5];
    1852       long double c6=cBase2[6];
    1853       long double c7=cBase2[7];
    1854       for (int l=0;l<BLOCKING8;l++) {
    1855         long double bValue = bBase2[l];
    1856         if (bValue) {
    1857           c0 -= bValue*aBase[0];
    1858           c1 -= bValue*aBase[1];
    1859           c2 -= bValue*aBase[2];
    1860           c3 -= bValue*aBase[3];
    1861           c4 -= bValue*aBase[4];
    1862           c5 -= bValue*aBase[5];
    1863           c6 -= bValue*aBase[6];
    1864           c7 -= bValue*aBase[7];
    1865         }
    1866         aBase += BLOCKING8;
     1848      long double c0 = cBase2[0];
     1849      long double c1 = cBase2[1];
     1850      long double c2 = cBase2[2];
     1851      long double c3 = cBase2[3];
     1852      long double c4 = cBase2[4];
     1853      long double c5 = cBase2[5];
     1854      long double c6 = cBase2[6];
     1855      long double c7 = cBase2[7];
     1856      for (int l = 0; l < BLOCKING8; l++) {
     1857        long double bValue = bBase2[l];
     1858        if (bValue) {
     1859          c0 -= bValue * aBase[0];
     1860          c1 -= bValue * aBase[1];
     1861          c2 -= bValue * aBase[2];
     1862          c3 -= bValue * aBase[3];
     1863          c4 -= bValue * aBase[4];
     1864          c5 -= bValue * aBase[5];
     1865          c6 -= bValue * aBase[6];
     1866          c7 -= bValue * aBase[7];
     1867        }
     1868        aBase += BLOCKING8;
    18671869      }
    1868       cBase2[0]=c0;
    1869       cBase2[1]=c1;
    1870       cBase2[2]=c2;
    1871       cBase2[3]=c3;
    1872       cBase2[4]=c4;
    1873       cBase2[5]=c5;
    1874       cBase2[6]=c6;
    1875       cBase2[7]=c7;
    1876 #else
    1877       for (int l=0;l<BLOCKING8;l++) {
    1878         long double bValue = bBase2[l];
    1879         if (bValue) {
    1880           for (int i=0;i<BLOCKING8;i++) {
    1881             cBase2[i] -= bValue*aBase[i];
    1882           }
    1883         }
    1884         aBase += BLOCKING8;
     1870      cBase2[0] = c0;
     1871      cBase2[1] = c1;
     1872      cBase2[2] = c2;
     1873      cBase2[3] = c3;
     1874      cBase2[4] = c4;
     1875      cBase2[5] = c5;
     1876      cBase2[6] = c6;
     1877      cBase2[7] = c7;
     1878#else
     1879      for (int l = 0; l < BLOCKING8; l++) {
     1880        long double bValue = bBase2[l];
     1881        if (bValue) {
     1882          for (int i = 0; i < BLOCKING8; i++) {
     1883            cBase2[i] -= bValue * aBase[i];
     1884          }
     1885        }
     1886        aBase += BLOCKING8;
    18851887      }
    18861888#endif
    18871889#else
    18881890      //__m256d c0=_mm256_load_pd(cBase2);
    1889       __m256d c0=*reinterpret_cast<__m256d *>(cBase2);
     1891      __m256d c0 = *reinterpret_cast< __m256d * >(cBase2);
    18901892      //__m256d c1=_mm256_load_pd(cBase2+4);
    1891       __m256d c1=*reinterpret_cast<__m256d *>(cBase2+4);
    1892       for (int l=0;l<BLOCKING8;l++) {
    1893         //__m256d bb = _mm256_broadcast_sd(bBase2+l);
    1894         __m256d bb = static_cast<__m256d> (__builtin_ia32_vbroadcastsd256 (bBase2+l));
    1895         //__m256d a0 = _mm256_load_pd(aBase);
    1896         __m256d a0=*reinterpret_cast<__m256d *>(aBase);
    1897         //__m256d a1 = _mm256_load_pd(aBase+4);
    1898         __m256d a1=*reinterpret_cast<__m256d *>(aBase+4);
    1899         c0 -= bb*a0;
    1900         c1 -= bb*a1;
    1901         aBase += BLOCKING8;
     1893      __m256d c1 = *reinterpret_cast< __m256d * >(cBase2 + 4);
     1894      for (int l = 0; l < BLOCKING8; l++) {
     1895        //__m256d bb = _mm256_broadcast_sd(bBase2+l);
     1896        __m256d bb = static_cast< __m256d >(__builtin_ia32_vbroadcastsd256(bBase2 + l));
     1897        //__m256d a0 = _mm256_load_pd(aBase);
     1898        __m256d a0 = *reinterpret_cast< __m256d * >(aBase);
     1899        //__m256d a1 = _mm256_load_pd(aBase+4);
     1900        __m256d a1 = *reinterpret_cast< __m256d * >(aBase + 4);
     1901        c0 -= bb * a0;
     1902        c1 -= bb * a1;
     1903        aBase += BLOCKING8;
    19021904      }
    19031905      //_mm256_store_pd (cBase2, c0);
    1904       *reinterpret_cast<__m256d *>(cBase2)=c0;
     1906      *reinterpret_cast< __m256d * >(cBase2) = c0;
    19051907      //_mm256_store_pd (cBase2+4, c1);
    1906       *reinterpret_cast<__m256d *>(cBase2+4)=c1;
     1908      *reinterpret_cast< __m256d * >(cBase2 + 4) = c1;
    19071909#endif
    19081910      bBase2 += BLOCKING8;
    19091911      cBase2 += BLOCKING8;
    19101912    }
    1911   } else if (m>n) {
     1913  } else if (m > n) {
    19121914    // make sure mNew1 multiple of BLOCKING8
    1913 #if BLOCKING8==8
    1914     int mNew1=((m+15)>>4)<<3;
    1915 #elif BLOCKING8==4
    1916     int mNew1=((m+7)>>3)<<2;
    1917 #elif BLOCKING8==2
    1918     int mNew1=((m+3)>>2)<<1;
     1915#if BLOCKING8 == 8
     1916    int mNew1 = ((m + 15) >> 4) << 3;
     1917#elif BLOCKING8 == 4
     1918    int mNew1 = ((m + 7) >> 3) << 2;
     1919#elif BLOCKING8 == 2
     1920    int mNew1 = ((m + 3) >> 2) << 1;
    19191921#else
    19201922    abort();
    19211923#endif
    1922     assert (mNew1>0&&m-mNew1>0);
    1923 #if ABC_PARALLEL==2
    1924     if (mNew1<=BLOCKING3||!parallelMode) {
     1924    assert(mNew1 > 0 && m - mNew1 > 0);
     1925#if ABC_PARALLEL == 2
     1926    if (mNew1 <= BLOCKING3 || !parallelMode) {
    19251927#endif
    19261928      //printf("splitMa mNew1 %d\n",mNew1);
    1927       CoinAbcDgemm(mNew1,n,k,a,lda,b,c
    1928 #if ABC_PARALLEL==2
    1929                           ,0
    1930 #endif
    1931 );
     1929      CoinAbcDgemm(mNew1, n, k, a, lda, b, c
     1930#if ABC_PARALLEL == 2
     1931        ,
     1932        0
     1933#endif
     1934      );
    19321935      //printf("splitMb mNew1 %d\n",mNew1);
    1933       CoinAbcDgemm(m-mNew1,n,k,a+mNew1*BLOCKING8,lda,b,c+mNew1*BLOCKING8
    1934 #if ABC_PARALLEL==2
    1935                           ,0
    1936 #endif
    1937 );
    1938 #if ABC_PARALLEL==2
     1936      CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8
     1937#if ABC_PARALLEL == 2
     1938        ,
     1939        0
     1940#endif
     1941      );
     1942#if ABC_PARALLEL == 2
    19391943    } else {
    19401944      //printf("splitMa mNew1 %d\n",mNew1);
    1941       cilk_spawn CoinAbcDgemm(mNew1,n,k,a,lda,b,c,ONWARD);
     1945      cilk_spawn CoinAbcDgemm(mNew1, n, k, a, lda, b, c, ONWARD);
    19421946      //printf("splitMb mNew1 %d\n",mNew1);
    1943       CoinAbcDgemm(m-mNew1,n,k,a+mNew1*BLOCKING8,lda,b,c+mNew1*BLOCKING8,ONWARD);
     1947      CoinAbcDgemm(m - mNew1, n, k, a + mNew1 * BLOCKING8, lda, b, c + mNew1 * BLOCKING8, ONWARD);
    19441948      cilk_sync;
    19451949    }
     
    19471951  } else {
    19481952    // make sure nNew1 multiple of BLOCKING8
    1949 #if BLOCKING8==8
    1950     int nNew1=((n+15)>>4)<<3;
    1951 #elif BLOCKING8==4
    1952     int nNew1=((n+7)>>3)<<2;
    1953 #elif BLOCKING8==2
    1954     int nNew1=((n+3)>>2)<<1;
     1953#if BLOCKING8 == 8
     1954    int nNew1 = ((n + 15) >> 4) << 3;
     1955#elif BLOCKING8 == 4
     1956    int nNew1 = ((n + 7) >> 3) << 2;
     1957#elif BLOCKING8 == 2
     1958    int nNew1 = ((n + 3) >> 2) << 1;
    19551959#else
    19561960    abort();
    19571961#endif
    1958     assert (nNew1>0&&n-nNew1>0);
    1959 #if ABC_PARALLEL==2
    1960     if (nNew1<=BLOCKING3||!parallelMode) {
     1962    assert(nNew1 > 0 && n - nNew1 > 0);
     1963#if ABC_PARALLEL == 2
     1964    if (nNew1 <= BLOCKING3 || !parallelMode) {
    19611965#endif
    19621966      //printf("splitNa nNew1 %d\n",nNew1);
    1963       CoinAbcDgemm(m,nNew1,k,a,lda,b,c
    1964 #if ABC_PARALLEL==2
    1965                           ,0
    1966 #endif
    1967                     );
     1967      CoinAbcDgemm(m, nNew1, k, a, lda, b, c
     1968#if ABC_PARALLEL == 2
     1969        ,
     1970        0
     1971#endif
     1972      );
    19681973      //printf("splitNb nNew1 %d\n",nNew1);
    1969       CoinAbcDgemm(m,n-nNew1,k,a,lda,b+lda*nNew1,c+lda*nNew1
    1970 #if ABC_PARALLEL==2
    1971                           ,0
    1972 #endif
    1973                     );
    1974 #if ABC_PARALLEL==2
     1974      CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1
     1975#if ABC_PARALLEL == 2
     1976        ,
     1977        0
     1978#endif
     1979      );
     1980#if ABC_PARALLEL == 2
    19751981    } else {
    19761982      //printf("splitNa nNew1 %d\n",nNew1);
    1977       cilk_spawn CoinAbcDgemm(m,nNew1,k,a,lda,b,c,ONWARD);
     1983      cilk_spawn CoinAbcDgemm(m, nNew1, k, a, lda, b, c, ONWARD);
    19781984      //printf("splitNb nNew1 %d\n",nNew1);
    1979       CoinAbcDgemm(m,n-nNew1,k,a,lda,b+lda*nNew1,c+lda*nNew1,ONWARD);
     1985      CoinAbcDgemm(m, n - nNew1, k, a, lda, b + lda * nNew1, c + lda * nNew1, ONWARD);
    19801986      cilk_sync;
    19811987    }
     
    19861992#if 0
    19871993// From Intel site
    1988 // get AVX intrinsics 
     1994// get AVX intrinsics
    19891995#include <immintrin.h> 
    19901996// get CPUID capability 
    1991 //#include <intrin.h> 
    1992 #define cpuid(func,ax,bx,cx,dx)\
    1993         __asm__ __volatile__ ("cpuid":\
    1994         "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func)); 
    1995 // written for clarity, not conciseness 
    1996 #define OSXSAVEFlag (1UL<<27) 
    1997 #define AVXFlag     ((1UL<<28)|OSXSAVEFlag) 
    1998 #define VAESFlag    ((1UL<<25)|AVXFlag|OSXSAVEFlag) 
    1999 #define FMAFlag     ((1UL<<12)|AVXFlag|OSXSAVEFlag) 
    2000 #define CLMULFlag   ((1UL<< 1)|AVXFlag|OSXSAVEFlag) 
     1997//#include <intrin.h>
     1998#define cpuid(func, ax, bx, cx, dx)                             \
     1999  __asm__ __volatile__("cpuid"                                  \
     2000                       : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
     2001                       : "a"(func)); 
     2002// written for clarity, not conciseness
     2003#define OSXSAVEFlag (1UL << 27)
     2004#define AVXFlag ((1UL << 28) | OSXSAVEFlag)
     2005#define VAESFlag ((1UL << 25) | AVXFlag | OSXSAVEFlag)
     2006#define FMAFlag ((1UL << 12) | AVXFlag | OSXSAVEFlag)
     2007#define CLMULFlag ((1UL << 1) | AVXFlag | OSXSAVEFlag) 
    20012008   
    20022009bool DetectFeature(unsigned int feature) 
     
    20072014  unsigned int ECX = CPUInfo[2];  // the output of CPUID in the ECX register.   
    20082015  if ((ECX & feature) != feature) // Missing feature   
    2009     return false;   
     2016    return false;
    20102017#if 0
    20112018  long int val = _xgetbv(0);       // read XFEATURE_ENABLED_MASK register 
    20122019  if ((val&6) != 6)               // check OS has enabled both XMM and YMM support. 
    2013     return false;   
     2020    return false;
    20142021#endif
    20152022  return true; 
    2016 }
    2017 #endif
     2023}
     2024#endif
     2025
     2026/* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2
     2027*/
Note: See TracChangeset for help on using the changeset viewer.