Changeset 2385 for trunk/Clp/src/CoinAbcHelperFunctions.hpp
 Timestamp:
 Jan 6, 2019 2:43:06 PM (3 months ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/Clp/src/CoinAbcHelperFunctions.hpp
r2042 r2385 9 9 #include "ClpConfig.h" 10 10 #ifdef HAVE_CMATH 11 # 12 #else 13 # 14 # 15 # 16 # 17 # 11 #include <cmath> 12 #else 13 #ifdef HAVE_MATH_H 14 #include <math.h> 15 #else 16 #include <cmath> 17 #endif 18 18 #endif 19 19 #include "CoinAbcCommon.hpp" 20 20 #ifndef abc_assert 21 #define abc_assert(condition) \ 22 { if (!condition) {printf("abc_assert in %s at line %d  %s is false\n", \ 23 __FILE__, __LINE__, __STRING(condition)); abort();} } 21 #define abc_assert(condition) \ 22 { \ 23 if (!condition) { \ 24 printf("abc_assert in %s at line %d  %s is false\n", \ 25 __FILE__, __LINE__, __STRING(condition)); \ 26 abort(); \ 27 } \ 28 } 24 29 #endif 25 30 // cilk_for granularity. 26 31 #define CILK_FOR_GRAINSIZE 128 27 32 //#define AVX2 2 28 #if AVX2 ==133 #if AVX2 == 1 29 34 #include "emmintrin.h" 30 #elif AVX2 ==235 #elif AVX2 == 2 31 36 #include <immintrin.h> 32 #elif AVX2 ==337 #elif AVX2 == 3 33 38 #include "avx2intrin.h" 34 39 #endif … … 43 48 #define UNROLL_SCATTER 2 44 49 #define INLINE_SCATTER 1 45 #if INLINE_SCATTER ==046 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,47 const CoinFactorizationDouble *COIN_RESTRICT thisElement,48 const int *COIN_RESTRICT thisIndex,49 CoinFactorizationDouble *COIN_RESTRICT region);50 #else 51 void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,52 const CoinFactorizationDouble *COIN_RESTRICT thisElement,53 const int *COIN_RESTRICT thisIndex,54 CoinFactorizationDouble *COIN_RESTRICT region)50 #if INLINE_SCATTER == 0 51 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue, 52 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 53 const int *COIN_RESTRICT thisIndex, 54 CoinFactorizationDouble *COIN_RESTRICT region); 55 #else 56 void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue, 57 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 58 const int *COIN_RESTRICT thisIndex, 59 CoinFactorizationDouble *COIN_RESTRICT region) 55 60 { 56 #if UNROLL_SCATTER ==057 for (CoinBigIndex j =number1 ; j >=0; j) {61 #if UNROLL_SCATTER == 0 62 for (CoinBigIndex j = number  1; j >= 0; j) { 58 63 CoinSimplexInt iRow = thisIndex[j]; 59 64 CoinFactorizationDouble regionValue = region[iRow]; 60 65 CoinFactorizationDouble value = thisElement[j]; 61 assert 66 assert(value); 62 67 region[iRow] = regionValue  value * pivotValue; 63 68 } 64 #elif UNROLL_SCATTER ==165 if ((number &1)!=0) {69 #elif UNROLL_SCATTER == 1 70 if ((number & 1) != 0) { 66 71 number; 67 72 CoinSimplexInt iRow = thisIndex[number]; … … 70 75 region[iRow] = regionValue  value * pivotValue; 71 76 } 72 for (CoinBigIndex j =number1 ; j >=0; j=2) {77 for (CoinBigIndex j = number  1; j >= 0; j = 2) { 73 78 CoinSimplexInt iRow0 = thisIndex[j]; 74 CoinSimplexInt iRow1 = thisIndex[j 1];79 CoinSimplexInt iRow1 = thisIndex[j  1]; 75 80 CoinFactorizationDouble regionValue0 = region[iRow0]; 76 81 CoinFactorizationDouble regionValue1 = region[iRow1]; 77 82 region[iRow0] = regionValue0  thisElement[j] * pivotValue; 78 region[iRow1] = regionValue1  thisElement[j 1] * pivotValue;79 } 80 #elif UNROLL_SCATTER ==281 if ((number &1)!=0) {83 region[iRow1] = regionValue1  thisElement[j  1] * pivotValue; 84 } 85 #elif UNROLL_SCATTER == 2 86 if ((number & 1) != 0) { 82 87 number; 83 88 CoinSimplexInt iRow = thisIndex[number]; 84 89 CoinFactorizationDouble regionValue = region[iRow]; 85 CoinFactorizationDouble value = thisElement[number]; 90 CoinFactorizationDouble value = thisElement[number]; 86 91 region[iRow] = regionValue  value * pivotValue; 87 92 } 88 if ((number &2)!=0) {89 CoinSimplexInt iRow0 = thisIndex[number 1];93 if ((number & 2) != 0) { 94 CoinSimplexInt iRow0 = thisIndex[number  1]; 90 95 CoinFactorizationDouble regionValue0 = region[iRow0]; 91 CoinFactorizationDouble value0 = thisElement[number 1];92 CoinSimplexInt iRow1 = thisIndex[number 2];96 CoinFactorizationDouble value0 = thisElement[number  1]; 97 CoinSimplexInt iRow1 = thisIndex[number  2]; 93 98 CoinFactorizationDouble regionValue1 = region[iRow1]; 94 CoinFactorizationDouble value1 = thisElement[number 2];99 CoinFactorizationDouble value1 = thisElement[number  2]; 95 100 region[iRow0] = regionValue0  value0 * pivotValue; 96 101 region[iRow1] = regionValue1  value1 * pivotValue; 97 number=2; 98 } 99 #pragma cilk grainsize=CILK_FOR_GRAINSIZE 100 cilk_for (CoinBigIndex j=number1 ; j >=0; j=4 ) { 102 number = 2; 103 } 104 #pragma cilk grainsize = CILK_FOR_GRAINSIZE 105 cilk_for(CoinBigIndex j = number  1; j >= 0; j = 4) 106 { 101 107 CoinSimplexInt iRow0 = thisIndex[j]; 102 CoinSimplexInt iRow1 = thisIndex[j 1];108 CoinSimplexInt iRow1 = thisIndex[j  1]; 103 109 CoinFactorizationDouble regionValue0 = region[iRow0]; 104 110 CoinFactorizationDouble regionValue1 = region[iRow1]; 105 111 region[iRow0] = regionValue0  thisElement[j] * pivotValue; 106 region[iRow1] = regionValue1  thisElement[j 1] * pivotValue;107 CoinSimplexInt iRow2 = thisIndex[j 2];108 CoinSimplexInt iRow3 = thisIndex[j 3];112 region[iRow1] = regionValue1  thisElement[j  1] * pivotValue; 113 CoinSimplexInt iRow2 = thisIndex[j  2]; 114 CoinSimplexInt iRow3 = thisIndex[j  3]; 109 115 CoinFactorizationDouble regionValue2 = region[iRow2]; 110 116 CoinFactorizationDouble regionValue3 = region[iRow3]; 111 region[iRow2] = regionValue2  thisElement[j 2] * pivotValue;112 region[iRow3] = regionValue3  thisElement[j 3] * pivotValue;113 } 114 #elif UNROLL_SCATTER ==3117 region[iRow2] = regionValue2  thisElement[j  2] * pivotValue; 118 region[iRow3] = regionValue3  thisElement[j  3] * pivotValue; 119 } 120 #elif UNROLL_SCATTER == 3 115 121 CoinSimplexInt iRow0; 116 122 CoinSimplexInt iRow1; 117 123 CoinFactorizationDouble regionValue0; 118 124 CoinFactorizationDouble regionValue1; 119 switch (static_cast<unsigned int>(number)) {125 switch (static_cast< unsigned int >(number)) { 120 126 case 0: 121 127 break; … … 245 251 break; 246 252 default: 247 if ((number &1)!=0) {253 if ((number & 1) != 0) { 248 254 number; 249 255 CoinSimplexInt iRow = thisIndex[number]; … … 252 258 region[iRow] = regionValue  value * pivotValue; 253 259 } 254 for (CoinBigIndex j =number1 ; j >=0; j=2) {260 for (CoinBigIndex j = number  1; j >= 0; j = 2) { 255 261 CoinSimplexInt iRow0 = thisIndex[j]; 256 CoinSimplexInt iRow1 = thisIndex[j 1];262 CoinSimplexInt iRow1 = thisIndex[j  1]; 257 263 CoinFactorizationDouble regionValue0 = region[iRow0]; 258 264 CoinFactorizationDouble regionValue1 = region[iRow1]; 259 265 region[iRow0] = regionValue0  thisElement[j] * pivotValue; 260 region[iRow1] = regionValue1  thisElement[j 1] * pivotValue;266 region[iRow1] = regionValue1  thisElement[j  1] * pivotValue; 261 267 } 262 268 break; … … 264 270 #endif 265 271 } 266 void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,267 const CoinFactorizationDouble *COIN_RESTRICT thisElement,268 CoinFactorizationDouble *COIN_RESTRICT region)272 void ABC_INLINE inline CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue, 273 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 274 CoinFactorizationDouble *COIN_RESTRICT region) 269 275 { 270 #if UNROLL_SCATTER ==0271 const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);272 for (CoinBigIndex j =number1 ; j >=0; j) {276 #if UNROLL_SCATTER == 0 277 const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number); 278 for (CoinBigIndex j = number  1; j >= 0; j) { 273 279 CoinSimplexInt iRow = thisIndex[j]; 274 280 CoinFactorizationDouble regionValue = region[iRow]; 275 281 CoinFactorizationDouble value = thisElement[j]; 276 assert 282 assert(value); 277 283 region[iRow] = regionValue  value * pivotValue; 278 284 } 279 #elif UNROLL_SCATTER ==1280 const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);281 if ((number &1)!=0) {285 #elif UNROLL_SCATTER == 1 286 const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number); 287 if ((number & 1) != 0) { 282 288 number; 283 289 CoinSimplexInt iRow = thisIndex[number]; … … 286 292 region[iRow] = regionValue  value * pivotValue; 287 293 } 288 for (CoinBigIndex j =number1 ; j >=0; j=2) {294 for (CoinBigIndex j = number  1; j >= 0; j = 2) { 289 295 CoinSimplexInt iRow0 = thisIndex[j]; 290 CoinSimplexInt iRow1 = thisIndex[j 1];296 CoinSimplexInt iRow1 = thisIndex[j  1]; 291 297 CoinFactorizationDouble regionValue0 = region[iRow0]; 292 298 CoinFactorizationDouble regionValue1 = region[iRow1]; 293 299 region[iRow0] = regionValue0  thisElement[j] * pivotValue; 294 region[iRow1] = regionValue1  thisElement[j 1] * pivotValue;295 } 296 #elif UNROLL_SCATTER ==2297 const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);298 if ((number &1)!=0) {300 region[iRow1] = regionValue1  thisElement[j  1] * pivotValue; 301 } 302 #elif UNROLL_SCATTER == 2 303 const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number); 304 if ((number & 1) != 0) { 299 305 number; 300 306 CoinSimplexInt iRow = thisIndex[number]; 301 307 CoinFactorizationDouble regionValue = region[iRow]; 302 CoinFactorizationDouble value = thisElement[number]; 308 CoinFactorizationDouble value = thisElement[number]; 303 309 region[iRow] = regionValue  value * pivotValue; 304 310 } 305 if ((number &2)!=0) {306 CoinSimplexInt iRow0 = thisIndex[number 1];311 if ((number & 2) != 0) { 312 CoinSimplexInt iRow0 = thisIndex[number  1]; 307 313 CoinFactorizationDouble regionValue0 = region[iRow0]; 308 CoinFactorizationDouble value0 = thisElement[number 1];309 CoinSimplexInt iRow1 = thisIndex[number 2];314 CoinFactorizationDouble value0 = thisElement[number  1]; 315 CoinSimplexInt iRow1 = thisIndex[number  2]; 310 316 CoinFactorizationDouble regionValue1 = region[iRow1]; 311 CoinFactorizationDouble value1 = thisElement[number 2];317 CoinFactorizationDouble value1 = thisElement[number  2]; 312 318 region[iRow0] = regionValue0  value0 * pivotValue; 313 319 region[iRow1] = regionValue1  value1 * pivotValue; 314 number =2;315 } 316 #if AVX2 ==22317 CoinFactorizationDouble temp[4] __attribute__ ((aligned(32)));320 number = 2; 321 } 322 #if AVX2 == 22 323 CoinFactorizationDouble temp[4] __attribute__((aligned(32))); 318 324 __m256d pv = _mm256_broadcast_sd(&pivotValue); 319 for (CoinBigIndex j =number1 ; j >=0; j=4) {320 __m256d elements =_mm256_loadu_pd(thisElement+j3);321 CoinSimplexInt iRow0 = thisIndex[j 3];322 CoinSimplexInt iRow1 = thisIndex[j 2];323 CoinSimplexInt iRow2 = thisIndex[j 1];324 CoinSimplexInt iRow3 = thisIndex[j 0];325 for (CoinBigIndex j = number  1; j >= 0; j = 4) { 326 __m256d elements = _mm256_loadu_pd(thisElement + j  3); 327 CoinSimplexInt iRow0 = thisIndex[j  3]; 328 CoinSimplexInt iRow1 = thisIndex[j  2]; 329 CoinSimplexInt iRow2 = thisIndex[j  1]; 330 CoinSimplexInt iRow3 = thisIndex[j  0]; 325 331 temp[0] = region[iRow0]; 326 332 temp[1] = region[iRow1]; 327 333 temp[2] = region[iRow2]; 328 334 temp[3] = region[iRow3]; 329 __m256d t0 =_mm256_load_pd(temp);330 t0 = pv *elements;331 _mm256_store_pd 335 __m256d t0 = _mm256_load_pd(temp); 336 t0 = pv * elements; 337 _mm256_store_pd(temp, t0); 332 338 region[iRow0] = temp[0]; 333 339 region[iRow1] = temp[1]; … … 336 342 } 337 343 #else 338 #pragma cilk grainsize=CILK_FOR_GRAINSIZE 339 cilk_for (CoinBigIndex j=number1 ; j >=0; j=4 ) { 344 #pragma cilk grainsize = CILK_FOR_GRAINSIZE 345 cilk_for(CoinBigIndex j = number  1; j >= 0; j = 4) 346 { 340 347 CoinSimplexInt iRow0 = thisIndex[j]; 341 CoinSimplexInt iRow1 = thisIndex[j 1];348 CoinSimplexInt iRow1 = thisIndex[j  1]; 342 349 CoinFactorizationDouble regionValue0 = region[iRow0]; 343 350 CoinFactorizationDouble regionValue1 = region[iRow1]; 344 351 region[iRow0] = regionValue0  thisElement[j] * pivotValue; 345 region[iRow1] = regionValue1  thisElement[j 1] * pivotValue;346 CoinSimplexInt iRow2 = thisIndex[j 2];347 CoinSimplexInt iRow3 = thisIndex[j 3];352 region[iRow1] = regionValue1  thisElement[j  1] * pivotValue; 353 CoinSimplexInt iRow2 = thisIndex[j  2]; 354 CoinSimplexInt iRow3 = thisIndex[j  3]; 348 355 CoinFactorizationDouble regionValue2 = region[iRow2]; 349 356 CoinFactorizationDouble regionValue3 = region[iRow3]; 350 region[iRow2] = regionValue2  thisElement[j 2] * pivotValue;351 region[iRow3] = regionValue3  thisElement[j 3] * pivotValue;352 } 353 #endif 354 #elif UNROLL_SCATTER ==3355 const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);357 region[iRow2] = regionValue2  thisElement[j  2] * pivotValue; 358 region[iRow3] = regionValue3  thisElement[j  3] * pivotValue; 359 } 360 #endif 361 #elif UNROLL_SCATTER == 3 362 const int *COIN_RESTRICT thisIndex = reinterpret_cast< const int * >(thisElement + number); 356 363 CoinSimplexInt iRow0; 357 364 CoinSimplexInt iRow1; 358 365 CoinFactorizationDouble regionValue0; 359 366 CoinFactorizationDouble regionValue1; 360 switch (static_cast<unsigned int>(number)) {367 switch (static_cast< unsigned int >(number)) { 361 368 case 0: 362 369 break; … … 486 493 break; 487 494 default: 488 if ((number &1)!=0) {495 if ((number & 1) != 0) { 489 496 number; 490 497 CoinSimplexInt iRow = thisIndex[number]; … … 493 500 region[iRow] = regionValue  value * pivotValue; 494 501 } 495 for (CoinBigIndex j =number1 ; j >=0; j=2) {502 for (CoinBigIndex j = number  1; j >= 0; j = 2) { 496 503 CoinSimplexInt iRow0 = thisIndex[j]; 497 CoinSimplexInt iRow1 = thisIndex[j 1];504 CoinSimplexInt iRow1 = thisIndex[j  1]; 498 505 CoinFactorizationDouble regionValue0 = region[iRow0]; 499 506 CoinFactorizationDouble regionValue1 = region[iRow1]; 500 507 region[iRow0] = regionValue0  thisElement[j] * pivotValue; 501 region[iRow1] = regionValue1  thisElement[j 1] * pivotValue;508 region[iRow1] = regionValue1  thisElement[j  1] * pivotValue; 502 509 } 503 510 break; … … 509 516 #ifdef COIN_PREFETCH 510 517 #if 1 511 #define coin_prefetch(mem) \ 512 __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<char *>(mem)))) 513 #define coin_prefetch_const(mem) \ 514 __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<const char *>(mem)))) 515 #else 516 #define coin_prefetch(mem) \ 517 __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<char *>(mem)))) 518 #define coin_prefetch_const(mem) \ 519 __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<const char *>(mem)))) 518 #define coin_prefetch(mem) \ 519 __asm__ __volatile__("prefetchnta %0" \ 520 : \ 521 : "m"(*(reinterpret_cast< char * >(mem)))) 522 #define coin_prefetch_const(mem) \ 523 __asm__ __volatile__("prefetchnta %0" \ 524 : \ 525 : "m"(*(reinterpret_cast< const char * >(mem)))) 526 #else 527 #define coin_prefetch(mem) \ 528 __asm__ __volatile__("prefetch %0" \ 529 : \ 530 : "m"(*(reinterpret_cast< char * >(mem)))) 531 #define coin_prefetch_const(mem) \ 532 __asm__ __volatile__("prefetch %0" \ 533 : \ 534 : "m"(*(reinterpret_cast< const char * >(mem)))) 520 535 #endif 521 536 #else … … 525 540 #endif 526 541 #define NEW_CHUNK_SIZE 4 527 #define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE +NEW_CHUNK_SIZE/2);528 #define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE /2)529 // leaf, pure, nothrow and hot give warnings 542 #define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE + NEW_CHUNK_SIZE / 2); 543 #define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE / 2) 544 // leaf, pure, nothrow and hot give warnings 530 545 // fastcall and sseregparm give wrong results 531 546 //#define SCATTER_ATTRIBUTE __attribute__ ((leaf,fastcall,pure,sseregparm,nothrow,hot)) 532 #define SCATTER_ATTRIBUTE 533 typedef void (*scatterUpdate) (int,CoinFactorizationDouble,const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE;547 #define SCATTER_ATTRIBUTE 548 typedef void (*scatterUpdate)(int, CoinFactorizationDouble, const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE; 534 549 typedef struct { 535 550 scatterUpdate functionPointer; … … 538 553 } scatterStruct; 539 554 void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble multiplier, 540 const CoinFactorizationDouble *COIN_RESTRICT thisElement,541 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;555 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 556 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 542 557 void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier, 543 const CoinFactorizationDouble *COIN_RESTRICT thisElement,544 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;558 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 559 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 545 560 void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier, 546 const CoinFactorizationDouble *COIN_RESTRICT thisElement,547 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;561 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 562 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 548 563 void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier, 549 const CoinFactorizationDouble *COIN_RESTRICT thisElement,550 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;564 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 565 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 551 566 void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier, 552 const CoinFactorizationDouble *COIN_RESTRICT thisElement,553 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;567 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 568 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 554 569 void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier, 555 const CoinFactorizationDouble *COIN_RESTRICT thisElement,556 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;570 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 571 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 557 572 void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier, 558 const CoinFactorizationDouble *COIN_RESTRICT thisElement,559 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;573 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 574 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 560 575 void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier, 561 const CoinFactorizationDouble *COIN_RESTRICT thisElement,562 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;576 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 577 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 563 578 void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier, 564 const CoinFactorizationDouble *COIN_RESTRICT thisElement,565 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;579 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 580 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 566 581 void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier, 567 const CoinFactorizationDouble *COIN_RESTRICT thisElement,568 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;582 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 583 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 569 584 void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier, 570 const CoinFactorizationDouble *COIN_RESTRICT thisElement,571 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;585 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 586 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 572 587 void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier, 573 const CoinFactorizationDouble *COIN_RESTRICT thisElement,574 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;588 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 589 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 575 590 void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier, 576 const CoinFactorizationDouble *COIN_RESTRICT thisElement,577 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;591 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 592 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 578 593 void CoinAbcScatterUpdate1Subtract(int numberIn, CoinFactorizationDouble multiplier, 579 const CoinFactorizationDouble *COIN_RESTRICT thisElement,580 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;594 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 595 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 581 596 void CoinAbcScatterUpdate2Subtract(int numberIn, CoinFactorizationDouble multiplier, 582 const CoinFactorizationDouble *COIN_RESTRICT thisElement,583 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;597 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 598 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 584 599 void CoinAbcScatterUpdate3Subtract(int numberIn, CoinFactorizationDouble multiplier, 585 const CoinFactorizationDouble *COIN_RESTRICT thisElement,586 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;600 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 601 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 587 602 void CoinAbcScatterUpdate4Subtract(int numberIn, CoinFactorizationDouble multiplier, 588 const CoinFactorizationDouble *COIN_RESTRICT thisElement,589 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;603 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 604 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 590 605 void CoinAbcScatterUpdate5Subtract(int numberIn, CoinFactorizationDouble multiplier, 591 const CoinFactorizationDouble *COIN_RESTRICT thisElement,592 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;606 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 607 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 593 608 void CoinAbcScatterUpdate6Subtract(int numberIn, CoinFactorizationDouble multiplier, 594 const CoinFactorizationDouble *COIN_RESTRICT thisElement,595 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;609 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 610 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 596 611 void CoinAbcScatterUpdate7Subtract(int numberIn, CoinFactorizationDouble multiplier, 597 const CoinFactorizationDouble *COIN_RESTRICT thisElement,598 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;612 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 613 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 599 614 void CoinAbcScatterUpdate8Subtract(int numberIn, CoinFactorizationDouble multiplier, 600 const CoinFactorizationDouble *COIN_RESTRICT thisElement,601 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;615 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 616 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 602 617 void CoinAbcScatterUpdate4NSubtract(int numberIn, CoinFactorizationDouble multiplier, 603 const CoinFactorizationDouble *COIN_RESTRICT thisElement,604 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;618 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 619 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 605 620 void CoinAbcScatterUpdate4NPlus1Subtract(int numberIn, CoinFactorizationDouble multiplier, 606 const CoinFactorizationDouble *COIN_RESTRICT thisElement,607 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;621 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 622 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 608 623 void CoinAbcScatterUpdate4NPlus2Subtract(int numberIn, CoinFactorizationDouble multiplier, 609 const CoinFactorizationDouble *COIN_RESTRICT thisElement,610 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;624 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 625 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 611 626 void CoinAbcScatterUpdate4NPlus3Subtract(int numberIn, CoinFactorizationDouble multiplier, 612 const CoinFactorizationDouble *COIN_RESTRICT thisElement,613 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;627 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 628 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 614 629 void CoinAbcScatterUpdate1Add(int numberIn, CoinFactorizationDouble multiplier, 615 const CoinFactorizationDouble *COIN_RESTRICT thisElement,616 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;630 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 631 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 617 632 void CoinAbcScatterUpdate2Add(int numberIn, CoinFactorizationDouble multiplier, 618 const CoinFactorizationDouble *COIN_RESTRICT thisElement,619 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;633 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 634 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 620 635 void CoinAbcScatterUpdate3Add(int numberIn, CoinFactorizationDouble multiplier, 621 const CoinFactorizationDouble *COIN_RESTRICT thisElement,622 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;636 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 637 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 623 638 void CoinAbcScatterUpdate4Add(int numberIn, CoinFactorizationDouble multiplier, 624 const CoinFactorizationDouble *COIN_RESTRICT thisElement,625 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;639 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 640 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 626 641 void CoinAbcScatterUpdate5Add(int numberIn, CoinFactorizationDouble multiplier, 627 const CoinFactorizationDouble *COIN_RESTRICT thisElement,628 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;642 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 643 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 629 644 void CoinAbcScatterUpdate6Add(int numberIn, CoinFactorizationDouble multiplier, 630 const CoinFactorizationDouble *COIN_RESTRICT thisElement,631 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;645 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 646 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 632 647 void CoinAbcScatterUpdate7Add(int numberIn, CoinFactorizationDouble multiplier, 633 const CoinFactorizationDouble *COIN_RESTRICT thisElement,634 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;648 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 649 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 635 650 void CoinAbcScatterUpdate8Add(int numberIn, CoinFactorizationDouble multiplier, 636 const CoinFactorizationDouble *COIN_RESTRICT thisElement,637 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;651 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 652 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 638 653 void CoinAbcScatterUpdate4NAdd(int numberIn, CoinFactorizationDouble multiplier, 639 const CoinFactorizationDouble *COIN_RESTRICT thisElement,640 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;654 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 655 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 641 656 void CoinAbcScatterUpdate4NPlus1Add(int numberIn, CoinFactorizationDouble multiplier, 642 const CoinFactorizationDouble *COIN_RESTRICT thisElement,643 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;657 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 658 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 644 659 void CoinAbcScatterUpdate4NPlus2Add(int numberIn, CoinFactorizationDouble multiplier, 645 const CoinFactorizationDouble *COIN_RESTRICT thisElement,646 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;660 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 661 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 647 662 void CoinAbcScatterUpdate4NPlus3Add(int numberIn, CoinFactorizationDouble multiplier, 648 const CoinFactorizationDouble *COIN_RESTRICT thisElement,649 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE;650 #if INLINE_SCATTER ==0651 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue,652 const CoinFactorizationDouble *COIN_RESTRICT thisElement,653 const int *COIN_RESTRICT thisIndex,654 CoinFactorizationDouble *COIN_RESTRICT region,655 double *COIN_RESTRICT work);663 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 664 CoinFactorizationDouble *COIN_RESTRICT region) SCATTER_ATTRIBUTE; 665 #if INLINE_SCATTER == 0 666 void CoinAbcScatterUpdate(int number, CoinFactorizationDouble pivotValue, 667 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 668 const int *COIN_RESTRICT thisIndex, 669 CoinFactorizationDouble *COIN_RESTRICT region, 670 double *COIN_RESTRICT work); 656 671 #else 657 672 #if 0 … … 662 677 double * COIN_RESTRICT /*work*/) 663 678 { 664 #if UNROLL_SCATTER ==0679 #if UNROLL_SCATTER == 0 665 680 for (CoinBigIndex j=number1 ; j >=0; j ) { 666 681 CoinSimplexInt iRow = thisIndex[j]; … … 670 685 region[iRow] = regionValue  value * pivotValue; 671 686 } 672 #elif UNROLL_SCATTER ==1687 #elif UNROLL_SCATTER == 1 673 688 if ((number&1)!=0) { 674 689 CoinSimplexInt iRow = thisIndex[0]; … … 706 721 #define UNROLL_GATHER 0 707 722 #define INLINE_GATHER 1 708 #if INLINE_GATHER ==0723 #if INLINE_GATHER == 0 709 724 CoinFactorizationDouble CoinAbcGatherUpdate(CoinSimplexInt number, 710 const CoinFactorizationDouble *COIN_RESTRICT thisElement,711 const int *COIN_RESTRICT thisIndex,712 CoinFactorizationDouble *COIN_RESTRICT region);725 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 726 const int *COIN_RESTRICT thisIndex, 727 CoinFactorizationDouble *COIN_RESTRICT region); 713 728 #else 714 729 CoinFactorizationDouble ABC_INLINE inline CoinAbcGatherUpdate(CoinSimplexInt number, 715 const CoinFactorizationDouble *COIN_RESTRICT thisElement,716 const int *COIN_RESTRICT thisIndex,717 CoinFactorizationDouble *COIN_RESTRICT region)730 const CoinFactorizationDouble *COIN_RESTRICT thisElement, 731 const int *COIN_RESTRICT thisIndex, 732 CoinFactorizationDouble *COIN_RESTRICT region) 718 733 { 719 #if UNROLL_GATHER ==0720 CoinFactorizationDouble pivotValue =0.0;721 for (CoinBigIndex j = 0; j < number; j ++) {734 #if UNROLL_GATHER == 0 735 CoinFactorizationDouble pivotValue = 0.0; 736 for (CoinBigIndex j = 0; j < number; j++) { 722 737 CoinFactorizationDouble value = thisElement[j]; 723 738 CoinSimplexInt jRow = thisIndex[j]; … … 733 748 #define UNROLL_MULTIPLY_INDEXED 0 734 749 #define INLINE_MULTIPLY_INDEXED 0 735 #if INLINE_MULTIPLY_INDEXED ==0750 #if INLINE_MULTIPLY_INDEXED == 0 736 751 void CoinAbcMultiplyIndexed(int number, 737 const double *COIN_RESTRICT multiplier,738 const int *COIN_RESTRICT thisIndex,739 CoinFactorizationDouble *COIN_RESTRICT region);752 const double *COIN_RESTRICT multiplier, 753 const int *COIN_RESTRICT thisIndex, 754 CoinFactorizationDouble *COIN_RESTRICT region); 740 755 void CoinAbcMultiplyIndexed(int number, 741 const long double *COIN_RESTRICT multiplier,742 const int *COIN_RESTRICT thisIndex,743 long double *COIN_RESTRICT region);756 const long double *COIN_RESTRICT multiplier, 757 const int *COIN_RESTRICT thisIndex, 758 long double *COIN_RESTRICT region); 744 759 #else 745 760 void ABC_INLINE inline CoinAbcMultiplyIndexed(int number, 746 const double *COIN_RESTRICT multiplier,747 const int *COIN_RESTRICT thisIndex,748 CoinFactorizationDouble *COIN_RESTRICT region)761 const double *COIN_RESTRICT multiplier, 762 const int *COIN_RESTRICT thisIndex, 763 CoinFactorizationDouble *COIN_RESTRICT region) 749 764 { 750 765 } 751 766 #endif 752 double CoinAbcMaximumAbsElement(const double * 753 void CoinAbcMinMaxAbsElement(const double * region, int size,double & minimum , double &maximum);754 void CoinAbcMinMaxAbsNormalValues(const double * region, int size,double & minimum , double &maximum);755 void CoinAbcScale(double * region, double multiplier,int size);756 void CoinAbcScaleNormalValues(double * region, double multiplier,double killIfLessThanThis,int size);767 double CoinAbcMaximumAbsElement(const double *region, int size); 768 void CoinAbcMinMaxAbsElement(const double *region, int size, double &minimum, double &maximum); 769 void CoinAbcMinMaxAbsNormalValues(const double *region, int size, double &minimum, double &maximum); 770 void CoinAbcScale(double *region, double multiplier, int size); 771 void CoinAbcScaleNormalValues(double *region, double multiplier, double killIfLessThanThis, int size); 757 772 /// maximum fabs(region[i]) and then region[i]*=multiplier 758 double CoinAbcMaximumAbsElementAndScale(double * region, double multiplier,int size);759 void CoinAbcSetElements(double * 760 void CoinAbcMultiplyAdd(const double * 761 double *regionChanged, double multiplier2);762 double CoinAbcInnerProduct(const double * region1, int size, const double *region2);763 void CoinAbcGetNorms(const double * region, int size, double & norm1, double &norm2);773 double CoinAbcMaximumAbsElementAndScale(double *region, double multiplier, int size); 774 void CoinAbcSetElements(double *region, int size, double value); 775 void CoinAbcMultiplyAdd(const double *region1, int size, double multiplier1, 776 double *regionChanged, double multiplier2); 777 double CoinAbcInnerProduct(const double *region1, int size, const double *region2); 778 void CoinAbcGetNorms(const double *region, int size, double &norm1, double &norm2); 764 779 /// regionTo[index[i]]=regionFrom[i] 765 void CoinAbcScatterTo(const double * regionFrom, double * regionTo, const int * index,int number);780 void CoinAbcScatterTo(const double *regionFrom, double *regionTo, const int *index, int number); 766 781 /// regionTo[i]=regionFrom[index[i]] 767 void CoinAbcGatherFrom(const double * regionFrom, double * regionTo, const int * index,int number);782 void CoinAbcGatherFrom(const double *regionFrom, double *regionTo, const int *index, int number); 768 783 /// regionTo[index[i]]=0.0 769 void CoinAbcScatterZeroTo(double * regionTo, const int * index,int number);784 void CoinAbcScatterZeroTo(double *regionTo, const int *index, int number); 770 785 /// regionTo[indexScatter[indexList[i]]]=regionFrom[indexList[i]] 771 void CoinAbcScatterToList(const double * regionFrom, double * regionTo,772 const int * indexList, const int * indexScatter ,int number);786 void CoinAbcScatterToList(const double *regionFrom, double *regionTo, 787 const int *indexList, const int *indexScatter, int number); 773 788 /// array[i]=1.0/sqrt(array[i]) 774 void CoinAbcInverseSqrts(double * 775 void CoinAbcReciprocal(double * 776 void CoinAbcMemcpyLong(double * array,const double * arrayFrom,int size);777 void CoinAbcMemcpyLong(int * array,const int * arrayFrom,int size);778 void CoinAbcMemcpyLong(unsigned char * array,const unsigned char * arrayFrom,int size);779 void CoinAbcMemset0Long(double * array,int size);780 void CoinAbcMemset0Long(int * array,int size);781 void CoinAbcMemset0Long(unsigned char * array,int size);782 void CoinAbcMemmove(double * array,const double * arrayFrom,int size);783 void CoinAbcMemmove(int * array,const int * arrayFrom,int size);784 void CoinAbcMemmove(unsigned char * array,const unsigned char * arrayFrom,int size);789 void CoinAbcInverseSqrts(double *array, int n); 790 void CoinAbcReciprocal(double *array, int n, const double *input); 791 void CoinAbcMemcpyLong(double *array, const double *arrayFrom, int size); 792 void CoinAbcMemcpyLong(int *array, const int *arrayFrom, int size); 793 void CoinAbcMemcpyLong(unsigned char *array, const unsigned char *arrayFrom, int size); 794 void CoinAbcMemset0Long(double *array, int size); 795 void CoinAbcMemset0Long(int *array, int size); 796 void CoinAbcMemset0Long(unsigned char *array, int size); 797 void CoinAbcMemmove(double *array, const double *arrayFrom, int size); 798 void CoinAbcMemmove(int *array, const int *arrayFrom, int size); 799 void CoinAbcMemmove(unsigned char *array, const unsigned char *arrayFrom, int size); 785 800 /// This moves down and zeroes out end 786 void CoinAbcMemmoveAndZero(double * array,double * arrayFrom,int size);801 void CoinAbcMemmoveAndZero(double *array, double *arrayFrom, int size); 787 802 /// This compacts several sections and zeroes out end (returns number) 788 int CoinAbcCompact(int numberSections, int alreadyDone,double * array,const int * starts, const int * lengths);803 int CoinAbcCompact(int numberSections, int alreadyDone, double *array, const int *starts, const int *lengths); 789 804 /// This compacts several sections (returns number) 790 int CoinAbcCompact(int numberSections, int alreadyDone,int * array,const int * starts, const int * lengths);805 int CoinAbcCompact(int numberSections, int alreadyDone, int *array, const int *starts, const int *lengths); 791 806 #endif 792 807 #if ABC_CREATE_SCATTER_FUNCTION 793 808 SCATTER_ATTRIBUTE void functionName(ScatterUpdate1)(int numberIn, CoinFactorizationDouble multiplier, 794 const CoinFactorizationDouble *COIN_RESTRICT element,795 CoinFactorizationDouble *COIN_RESTRICT region)809 const CoinFactorizationDouble *COIN_RESTRICT element, 810 CoinFactorizationDouble *COIN_RESTRICT region) 796 811 { 797 812 #ifndef NDEBUG 798 assert (numberIn==1);799 #endif 800 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+1);801 int iColumn0 =thisColumn[0];802 double value0 =region[iColumn0];803 value0 OPERATION multiplier *element[0];804 region[iColumn0] =value0;813 assert(numberIn == 1); 814 #endif 815 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 1); 816 int iColumn0 = thisColumn[0]; 817 double value0 = region[iColumn0]; 818 value0 OPERATION multiplier *element[0]; 819 region[iColumn0] = value0; 805 820 } 806 821 SCATTER_ATTRIBUTE void functionName(ScatterUpdate2)(int numberIn, CoinFactorizationDouble multiplier, 807 const CoinFactorizationDouble *COIN_RESTRICT element,808 CoinFactorizationDouble *COIN_RESTRICT region)822 const CoinFactorizationDouble *COIN_RESTRICT element, 823 CoinFactorizationDouble *COIN_RESTRICT region) 809 824 { 810 825 #ifndef NDEBUG 811 assert (numberIn==2);812 #endif 813 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+2);814 #if NEW_CHUNK_SIZE ==2815 int nFull =2&(~(NEW_CHUNK_SIZE1));816 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {817 coin_prefetch(element +NEW_CHUNK_SIZE_INCREMENT);818 int iColumn0 =thisColumn[0];819 int iColumn1 =thisColumn[1];820 CoinFactorizationDouble value0 =region[iColumn0];821 CoinFactorizationDouble value1 =region[iColumn1];822 value0 OPERATION multiplier *element[0+NEW_CHUNK_SIZE_OFFSET];823 value1 OPERATION multiplier *element[1+NEW_CHUNK_SIZE_OFFSET];824 region[iColumn0] =value0;825 region[iColumn1] =value1;826 element +=NEW_CHUNK_SIZE_INCREMENT;827 thisColumn = reinterpret_cast< const int *>(element);828 } 829 #endif 830 #if NEW_CHUNK_SIZE ==4831 int iColumn0 =thisColumn[0];832 int iColumn1 =thisColumn[1];833 CoinFactorizationDouble value0 =region[iColumn0];834 CoinFactorizationDouble value1 =region[iColumn1];835 value0 OPERATION multiplier *element[0];836 value1 OPERATION multiplier *element[1];837 region[iColumn0] =value0;838 region[iColumn1] =value1;826 assert(numberIn == 2); 827 #endif 828 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 2); 829 #if NEW_CHUNK_SIZE == 2 830 int nFull = 2 & (~(NEW_CHUNK_SIZE  1)); 831 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 832 coin_prefetch(element + NEW_CHUNK_SIZE_INCREMENT); 833 int iColumn0 = thisColumn[0]; 834 int iColumn1 = thisColumn[1]; 835 CoinFactorizationDouble value0 = region[iColumn0]; 836 CoinFactorizationDouble value1 = region[iColumn1]; 837 value0 OPERATION multiplier *element[0 + NEW_CHUNK_SIZE_OFFSET]; 838 value1 OPERATION multiplier *element[1 + NEW_CHUNK_SIZE_OFFSET]; 839 region[iColumn0] = value0; 840 region[iColumn1] = value1; 841 element += NEW_CHUNK_SIZE_INCREMENT; 842 thisColumn = reinterpret_cast< const int * >(element); 843 } 844 #endif 845 #if NEW_CHUNK_SIZE == 4 846 int iColumn0 = thisColumn[0]; 847 int iColumn1 = thisColumn[1]; 848 CoinFactorizationDouble value0 = region[iColumn0]; 849 CoinFactorizationDouble value1 = region[iColumn1]; 850 value0 OPERATION multiplier *element[0]; 851 value1 OPERATION multiplier *element[1]; 852 region[iColumn0] = value0; 853 region[iColumn1] = value1; 839 854 #endif 840 855 } 841 856 SCATTER_ATTRIBUTE void functionName(ScatterUpdate3)(int numberIn, CoinFactorizationDouble multiplier, 842 const CoinFactorizationDouble *COIN_RESTRICT element,843 CoinFactorizationDouble *COIN_RESTRICT region)857 const CoinFactorizationDouble *COIN_RESTRICT element, 858 CoinFactorizationDouble *COIN_RESTRICT region) 844 859 { 845 860 #ifndef NDEBUG 846 assert (numberIn==3);847 #endif 848 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+3);849 #if AVX2 ==1861 assert(numberIn == 3); 862 #endif 863 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 3); 864 #if AVX2 == 1 850 865 double temp[2]; 851 866 #endif 852 #if NEW_CHUNK_SIZE ==2853 int nFull =3&(~(NEW_CHUNK_SIZE1));854 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {867 #if NEW_CHUNK_SIZE == 2 868 int nFull = 3 & (~(NEW_CHUNK_SIZE  1)); 869 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 855 870 //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT); 856 int iColumn0 =thisColumn[0];857 int iColumn1 =thisColumn[1];858 CoinFactorizationDouble value0 =region[iColumn0];859 CoinFactorizationDouble value1 =region[iColumn1];860 value0 OPERATION multiplier *element[0];861 value1 OPERATION multiplier *element[1];862 region[iColumn0] =value0;863 region[iColumn1] =value1;864 element +=NEW_CHUNK_SIZE;865 thisColumn + = NEW_CHUNK_SIZE;866 } 867 #endif 868 #if NEW_CHUNK_SIZE ==2869 int iColumn0 =thisColumn[0];870 double value0 =region[iColumn0];871 value0 OPERATION multiplier *element[0];872 region[iColumn0] =value0;873 #else 874 int iColumn0 =thisColumn[0];875 int iColumn1 =thisColumn[1];876 int iColumn2 =thisColumn[2];877 #if AVX2 ==1871 int iColumn0 = thisColumn[0]; 872 int iColumn1 = thisColumn[1]; 873 CoinFactorizationDouble value0 = region[iColumn0]; 874 CoinFactorizationDouble value1 = region[iColumn1]; 875 value0 OPERATION multiplier *element[0]; 876 value1 OPERATION multiplier *element[1]; 877 region[iColumn0] = value0; 878 region[iColumn1] = value1; 879 element += NEW_CHUNK_SIZE; 880 thisColumn + = NEW_CHUNK_SIZE; 881 } 882 #endif 883 #if NEW_CHUNK_SIZE == 2 884 int iColumn0 = thisColumn[0]; 885 double value0 = region[iColumn0]; 886 value0 OPERATION multiplier *element[0]; 887 region[iColumn0] = value0; 888 #else 889 int iColumn0 = thisColumn[0]; 890 int iColumn1 = thisColumn[1]; 891 int iColumn2 = thisColumn[2]; 892 #if AVX2 == 1 878 893 __v2df bb; 879 double value2 =region[iColumn2];880 value2 OPERATION multiplier *element[2];881 set_const_v2df(bb, multiplier);882 temp[0] =region[iColumn0];883 temp[1] =region[iColumn1];884 region[iColumn2] =value2;885 __v2df v0 = __builtin_ia32_loadupd 886 __v2df a = __builtin_ia32_loadupd 894 double value2 = region[iColumn2]; 895 value2 OPERATION multiplier *element[2]; 896 set_const_v2df(bb, multiplier); 897 temp[0] = region[iColumn0]; 898 temp[1] = region[iColumn1]; 899 region[iColumn2] = value2; 900 __v2df v0 = __builtin_ia32_loadupd(temp); 901 __v2df a = __builtin_ia32_loadupd(element); 887 902 a *= bb; 888 903 v0 OPERATION a; 889 __builtin_ia32_storeupd 890 region[iColumn0] =temp[0];891 region[iColumn1] =temp[1];892 #else 893 double value0 =region[iColumn0];894 double value1 =region[iColumn1];895 double value2 =region[iColumn2];896 value0 OPERATION multiplier *element[0];897 value1 OPERATION multiplier *element[1];898 value2 OPERATION multiplier *element[2];899 region[iColumn0] =value0;900 region[iColumn1] =value1;901 region[iColumn2] =value2;904 __builtin_ia32_storeupd(temp, v0); 905 region[iColumn0] = temp[0]; 906 region[iColumn1] = temp[1]; 907 #else 908 double value0 = region[iColumn0]; 909 double value1 = region[iColumn1]; 910 double value2 = region[iColumn2]; 911 value0 OPERATION multiplier *element[0]; 912 value1 OPERATION multiplier *element[1]; 913 value2 OPERATION multiplier *element[2]; 914 region[iColumn0] = value0; 915 region[iColumn1] = value1; 916 region[iColumn2] = value2; 902 917 #endif 903 918 #endif 904 919 } 905 920 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4)(int numberIn, CoinFactorizationDouble multiplier, 906 const CoinFactorizationDouble *COIN_RESTRICT element,907 CoinFactorizationDouble *COIN_RESTRICT region)921 const CoinFactorizationDouble *COIN_RESTRICT element, 922 CoinFactorizationDouble *COIN_RESTRICT region) 908 923 { 909 924 #ifndef NDEBUG 910 assert (numberIn==4);911 #endif 912 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+4);913 int nFull =4&(~(NEW_CHUNK_SIZE1));914 #if AVX2 ==1925 assert(numberIn == 4); 926 #endif 927 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 4); 928 int nFull = 4 & (~(NEW_CHUNK_SIZE  1)); 929 #if AVX2 == 1 915 930 double temp[4]; 916 931 #endif 917 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {932 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 918 933 //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT); 919 #if NEW_CHUNK_SIZE ==2920 int iColumn0 =thisColumn[0];921 int iColumn1 =thisColumn[1];922 double value0 =region[iColumn0];923 double value1 =region[iColumn1];924 value0 OPERATION multiplier *element[0];925 value1 OPERATION multiplier *element[1];926 region[iColumn0] =value0;927 region[iColumn1] =value1;928 #elif NEW_CHUNK_SIZE ==4929 int iColumn0 =thisColumn[0];930 int iColumn1 =thisColumn[1];931 int iColumn2 =thisColumn[2];932 int iColumn3 =thisColumn[3];933 #if AVX2 ==1934 #if NEW_CHUNK_SIZE == 2 935 int iColumn0 = thisColumn[0]; 936 int iColumn1 = thisColumn[1]; 937 double value0 = region[iColumn0]; 938 double value1 = region[iColumn1]; 939 value0 OPERATION multiplier *element[0]; 940 value1 OPERATION multiplier *element[1]; 941 region[iColumn0] = value0; 942 region[iColumn1] = value1; 943 #elif NEW_CHUNK_SIZE == 4 944 int iColumn0 = thisColumn[0]; 945 int iColumn1 = thisColumn[1]; 946 int iColumn2 = thisColumn[2]; 947 int iColumn3 = thisColumn[3]; 948 #if AVX2 == 1 934 949 __v2df bb; 935 set_const_v2df(bb, multiplier);936 temp[0] =region[iColumn0];937 temp[1] =region[iColumn1];938 temp[2] =region[iColumn2];939 temp[3] =region[iColumn3];940 __v2df v0 = __builtin_ia32_loadupd 941 __v2df v1 = __builtin_ia32_loadupd (temp+2);942 __v2df a = __builtin_ia32_loadupd 950 set_const_v2df(bb, multiplier); 951 temp[0] = region[iColumn0]; 952 temp[1] = region[iColumn1]; 953 temp[2] = region[iColumn2]; 954 temp[3] = region[iColumn3]; 955 __v2df v0 = __builtin_ia32_loadupd(temp); 956 __v2df v1 = __builtin_ia32_loadupd(temp + 2); 957 __v2df a = __builtin_ia32_loadupd(element); 943 958 a *= bb; 944 959 v0 OPERATION a; 945 a = __builtin_ia32_loadupd (element+2);960 a = __builtin_ia32_loadupd(element + 2); 946 961 a *= bb; 947 962 v1 OPERATION a; 948 __builtin_ia32_storeupd 949 __builtin_ia32_storeupd (temp+2, v1);950 region[iColumn0] =temp[0];951 region[iColumn1] =temp[1];952 region[iColumn2] =temp[2];953 region[iColumn3] =temp[3];954 #else 955 double value0 =region[iColumn0];956 double value1 =region[iColumn1];957 double value2 =region[iColumn2];958 double value3 =region[iColumn3];959 value0 OPERATION multiplier *element[0];960 value1 OPERATION multiplier *element[1];961 value2 OPERATION multiplier *element[2];962 value3 OPERATION multiplier *element[3];963 region[iColumn0] =value0;964 region[iColumn1] =value1;965 region[iColumn2] =value2;966 region[iColumn3] =value3;963 __builtin_ia32_storeupd(temp, v0); 964 __builtin_ia32_storeupd(temp + 2, v1); 965 region[iColumn0] = temp[0]; 966 region[iColumn1] = temp[1]; 967 region[iColumn2] = temp[2]; 968 region[iColumn3] = temp[3]; 969 #else 970 double value0 = region[iColumn0]; 971 double value1 = region[iColumn1]; 972 double value2 = region[iColumn2]; 973 double value3 = region[iColumn3]; 974 value0 OPERATION multiplier *element[0]; 975 value1 OPERATION multiplier *element[1]; 976 value2 OPERATION multiplier *element[2]; 977 value3 OPERATION multiplier *element[3]; 978 region[iColumn0] = value0; 979 region[iColumn1] = value1; 980 region[iColumn2] = value2; 981 region[iColumn3] = value3; 967 982 #endif 968 983 #else 969 984 abort(); 970 985 #endif 971 element +=NEW_CHUNK_SIZE;986 element += NEW_CHUNK_SIZE; 972 987 thisColumn += NEW_CHUNK_SIZE; 973 988 } 974 989 } 975 990 SCATTER_ATTRIBUTE void functionName(ScatterUpdate5)(int numberIn, CoinFactorizationDouble multiplier, 976 const CoinFactorizationDouble *COIN_RESTRICT element,977 CoinFactorizationDouble *COIN_RESTRICT region)991 const CoinFactorizationDouble *COIN_RESTRICT element, 992 CoinFactorizationDouble *COIN_RESTRICT region) 978 993 { 979 994 #ifndef NDEBUG 980 assert (numberIn==5);981 #endif 982 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+5);983 int nFull =5&(~(NEW_CHUNK_SIZE1));984 #if AVX2 ==1995 assert(numberIn == 5); 996 #endif 997 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 5); 998 int nFull = 5 & (~(NEW_CHUNK_SIZE  1)); 999 #if AVX2 == 1 985 1000 double temp[4]; 986 1001 #endif 987 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {1002 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 988 1003 //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT); 989 #if NEW_CHUNK_SIZE ==2990 int iColumn0 =thisColumn[0];991 int iColumn1 =thisColumn[1];992 double value0 =region[iColumn0];993 double value1 =region[iColumn1];994 value0 OPERATION multiplier *element[0];995 value1 OPERATION multiplier *element[1];996 region[iColumn0] =value0;997 region[iColumn1] =value1;998 #elif NEW_CHUNK_SIZE ==4999 int iColumn0 =thisColumn[0];1000 int iColumn1 =thisColumn[1];1001 int iColumn2 =thisColumn[2];1002 int iColumn3 =thisColumn[3];1003 #if AVX2 ==11004 #if NEW_CHUNK_SIZE == 2 1005 int iColumn0 = thisColumn[0]; 1006 int iColumn1 = thisColumn[1]; 1007 double value0 = region[iColumn0]; 1008 double value1 = region[iColumn1]; 1009 value0 OPERATION multiplier *element[0]; 1010 value1 OPERATION multiplier *element[1]; 1011 region[iColumn0] = value0; 1012 region[iColumn1] = value1; 1013 #elif NEW_CHUNK_SIZE == 4 1014 int iColumn0 = thisColumn[0]; 1015 int iColumn1 = thisColumn[1]; 1016 int iColumn2 = thisColumn[2]; 1017 int iColumn3 = thisColumn[3]; 1018 #if AVX2 == 1 1004 1019 __v2df bb; 1005 set_const_v2df(bb, multiplier);1006 temp[0] =region[iColumn0];1007 temp[1] =region[iColumn1];1008 temp[2] =region[iColumn2];1009 temp[3] =region[iColumn3];1010 __v2df v0 = __builtin_ia32_loadupd 1011 __v2df v1 = __builtin_ia32_loadupd (temp+2);1012 __v2df a = __builtin_ia32_loadupd 1020 set_const_v2df(bb, multiplier); 1021 temp[0] = region[iColumn0]; 1022 temp[1] = region[iColumn1]; 1023 temp[2] = region[iColumn2]; 1024 temp[3] = region[iColumn3]; 1025 __v2df v0 = __builtin_ia32_loadupd(temp); 1026 __v2df v1 = __builtin_ia32_loadupd(temp + 2); 1027 __v2df a = __builtin_ia32_loadupd(element); 1013 1028 a *= bb; 1014 1029 v0 OPERATION a; 1015 a = __builtin_ia32_loadupd (element+2);1030 a = __builtin_ia32_loadupd(element + 2); 1016 1031 a *= bb; 1017 1032 v1 OPERATION a; 1018 __builtin_ia32_storeupd 1019 __builtin_ia32_storeupd (temp+2, v1);1020 region[iColumn0] =temp[0];1021 region[iColumn1] =temp[1];1022 region[iColumn2] =temp[2];1023 region[iColumn3] =temp[3];1024 #else 1025 double value0 =region[iColumn0];1026 double value1 =region[iColumn1];1027 double value2 =region[iColumn2];1028 double value3 =region[iColumn3];1029 value0 OPERATION multiplier *element[0];1030 value1 OPERATION multiplier *element[1];1031 value2 OPERATION multiplier *element[2];1032 value3 OPERATION multiplier *element[3];1033 region[iColumn0] =value0;1034 region[iColumn1] =value1;1035 region[iColumn2] =value2;1036 region[iColumn3] =value3;1033 __builtin_ia32_storeupd(temp, v0); 1034 __builtin_ia32_storeupd(temp + 2, v1); 1035 region[iColumn0] = temp[0]; 1036 region[iColumn1] = temp[1]; 1037 region[iColumn2] = temp[2]; 1038 region[iColumn3] = temp[3]; 1039 #else 1040 double value0 = region[iColumn0]; 1041 double value1 = region[iColumn1]; 1042 double value2 = region[iColumn2]; 1043 double value3 = region[iColumn3]; 1044 value0 OPERATION multiplier *element[0]; 1045 value1 OPERATION multiplier *element[1]; 1046 value2 OPERATION multiplier *element[2]; 1047 value3 OPERATION multiplier *element[3]; 1048 region[iColumn0] = value0; 1049 region[iColumn1] = value1; 1050 region[iColumn2] = value2; 1051 region[iColumn3] = value3; 1037 1052 #endif 1038 1053 #else 1039 1054 abort(); 1040 1055 #endif 1041 element +=NEW_CHUNK_SIZE;1056 element += NEW_CHUNK_SIZE; 1042 1057 thisColumn += NEW_CHUNK_SIZE; 1043 1058 } 1044 int iColumn0 =thisColumn[0];1045 double value0 =region[iColumn0];1046 value0 OPERATION multiplier *element[0];1047 region[iColumn0] =value0;1059 int iColumn0 = thisColumn[0]; 1060 double value0 = region[iColumn0]; 1061 value0 OPERATION multiplier *element[0]; 1062 region[iColumn0] = value0; 1048 1063 } 1049 1064 SCATTER_ATTRIBUTE void functionName(ScatterUpdate6)(int numberIn, CoinFactorizationDouble multiplier, 1050 const CoinFactorizationDouble *COIN_RESTRICT element,1051 CoinFactorizationDouble *COIN_RESTRICT region)1065 const CoinFactorizationDouble *COIN_RESTRICT element, 1066 CoinFactorizationDouble *COIN_RESTRICT region) 1052 1067 { 1053 1068 #ifndef NDEBUG 1054 assert (numberIn==6);1055 #endif 1056 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+6);1057 int nFull =6&(~(NEW_CHUNK_SIZE1));1058 #if AVX2 ==11069 assert(numberIn == 6); 1070 #endif 1071 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 6); 1072 int nFull = 6 & (~(NEW_CHUNK_SIZE  1)); 1073 #if AVX2 == 1 1059 1074 double temp[4]; 1060 1075 #endif 1061 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {1062 coin_prefetch_const(element +6);1063 #if NEW_CHUNK_SIZE ==21064 int iColumn0 =thisColumn[0];1065 int iColumn1 =thisColumn[1];1066 double value0 =region[iColumn0];1067 double value1 =region[iColumn1];1068 value0 OPERATION multiplier *element[0];1069 value1 OPERATION multiplier *element[1];1070 region[iColumn0] =value0;1071 region[iColumn1] =value1;1072 #elif NEW_CHUNK_SIZE ==41073 int iColumn0 =thisColumn[0];1074 int iColumn1 =thisColumn[1];1075 int iColumn2 =thisColumn[2];1076 int iColumn3 =thisColumn[3];1077 #if AVX2 ==11076 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 1077 coin_prefetch_const(element + 6); 1078 #if NEW_CHUNK_SIZE == 2 1079 int iColumn0 = thisColumn[0]; 1080 int iColumn1 = thisColumn[1]; 1081 double value0 = region[iColumn0]; 1082 double value1 = region[iColumn1]; 1083 value0 OPERATION multiplier *element[0]; 1084 value1 OPERATION multiplier *element[1]; 1085 region[iColumn0] = value0; 1086 region[iColumn1] = value1; 1087 #elif NEW_CHUNK_SIZE == 4 1088 int iColumn0 = thisColumn[0]; 1089 int iColumn1 = thisColumn[1]; 1090 int iColumn2 = thisColumn[2]; 1091 int iColumn3 = thisColumn[3]; 1092 #if AVX2 == 1 1078 1093 __v2df bb; 1079 set_const_v2df(bb, multiplier);1080 temp[0] =region[iColumn0];1081 temp[1] =region[iColumn1];1082 temp[2] =region[iColumn2];1083 temp[3] =region[iColumn3];1084 __v2df v0 = __builtin_ia32_loadupd 1085 __v2df v1 = __builtin_ia32_loadupd (temp+2);1086 __v2df a = __builtin_ia32_loadupd 1094 set_const_v2df(bb, multiplier); 1095 temp[0] = region[iColumn0]; 1096 temp[1] = region[iColumn1]; 1097 temp[2] = region[iColumn2]; 1098 temp[3] = region[iColumn3]; 1099 __v2df v0 = __builtin_ia32_loadupd(temp); 1100 __v2df v1 = __builtin_ia32_loadupd(temp + 2); 1101 __v2df a = __builtin_ia32_loadupd(element); 1087 1102 a *= bb; 1088 1103 v0 OPERATION a; 1089 a = __builtin_ia32_loadupd (element+2);1104 a = __builtin_ia32_loadupd(element + 2); 1090 1105 a *= bb; 1091 1106 v1 OPERATION a; 1092 __builtin_ia32_storeupd 1093 __builtin_ia32_storeupd (temp+2, v1);1094 region[iColumn0] =temp[0];1095 region[iColumn1] =temp[1];1096 region[iColumn2] =temp[2];1097 region[iColumn3] =temp[3];1098 #else 1099 double value0 =region[iColumn0];1100 double value1 =region[iColumn1];1101 double value2 =region[iColumn2];1102 double value3 =region[iColumn3];1103 value0 OPERATION multiplier *element[0];1104 value1 OPERATION multiplier *element[1];1105 value2 OPERATION multiplier *element[2];1106 value3 OPERATION multiplier *element[3];1107 region[iColumn0] =value0;1108 region[iColumn1] =value1;1109 region[iColumn2] =value2;1110 region[iColumn3] =value3;1107 __builtin_ia32_storeupd(temp, v0); 1108 __builtin_ia32_storeupd(temp + 2, v1); 1109 region[iColumn0] = temp[0]; 1110 region[iColumn1] = temp[1]; 1111 region[iColumn2] = temp[2]; 1112 region[iColumn3] = temp[3]; 1113 #else 1114 double value0 = region[iColumn0]; 1115 double value1 = region[iColumn1]; 1116 double value2 = region[iColumn2]; 1117 double value3 = region[iColumn3]; 1118 value0 OPERATION multiplier *element[0]; 1119 value1 OPERATION multiplier *element[1]; 1120 value2 OPERATION multiplier *element[2]; 1121 value3 OPERATION multiplier *element[3]; 1122 region[iColumn0] = value0; 1123 region[iColumn1] = value1; 1124 region[iColumn2] = value2; 1125 region[iColumn3] = value3; 1111 1126 #endif 1112 1127 #else 1113 1128 abort(); 1114 1129 #endif 1115 element +=NEW_CHUNK_SIZE;1130 element += NEW_CHUNK_SIZE; 1116 1131 thisColumn += NEW_CHUNK_SIZE; 1117 1132 } 1118 #if NEW_CHUNK_SIZE ==41119 int iColumn0 =thisColumn[0];1120 int iColumn1 =thisColumn[1];1121 double value0 =region[iColumn0];1122 double value1 =region[iColumn1];1123 value0 OPERATION multiplier *element[0];1124 value1 OPERATION multiplier *element[1];1125 region[iColumn0] =value0;1126 region[iColumn1] =value1;1133 #if NEW_CHUNK_SIZE == 4 1134 int iColumn0 = thisColumn[0]; 1135 int iColumn1 = thisColumn[1]; 1136 double value0 = region[iColumn0]; 1137 double value1 = region[iColumn1]; 1138 value0 OPERATION multiplier *element[0]; 1139 value1 OPERATION multiplier *element[1]; 1140 region[iColumn0] = value0; 1141 region[iColumn1] = value1; 1127 1142 #endif 1128 1143 } 1129 1144 SCATTER_ATTRIBUTE void functionName(ScatterUpdate7)(int numberIn, CoinFactorizationDouble multiplier, 1130 const CoinFactorizationDouble *COIN_RESTRICT element,1131 CoinFactorizationDouble *COIN_RESTRICT region)1145 const CoinFactorizationDouble *COIN_RESTRICT element, 1146 CoinFactorizationDouble *COIN_RESTRICT region) 1132 1147 { 1133 1148 #ifndef NDEBUG 1134 assert (numberIn==7);1135 #endif 1136 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+7);1137 int nFull =7&(~(NEW_CHUNK_SIZE1));1138 #if AVX2 ==11149 assert(numberIn == 7); 1150 #endif 1151 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 7); 1152 int nFull = 7 & (~(NEW_CHUNK_SIZE  1)); 1153 #if AVX2 == 1 1139 1154 double temp[4]; 1140 1155 #endif 1141 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {1142 coin_prefetch_const(element +6);1143 #if NEW_CHUNK_SIZE ==21144 int iColumn0 =thisColumn[0];1145 int iColumn1 =thisColumn[1];1146 double value0 =region[iColumn0];1147 double value1 =region[iColumn1];1148 value0 OPERATION multiplier *element[0];1149 value1 OPERATION multiplier *element[1];1150 region[iColumn0] =value0;1151 region[iColumn1] =value1;1152 #elif NEW_CHUNK_SIZE ==41153 int iColumn0 =thisColumn[0];1154 int iColumn1 =thisColumn[1];1155 int iColumn2 =thisColumn[2];1156 int iColumn3 =thisColumn[3];1157 #if AVX2 ==11156 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 1157 coin_prefetch_const(element + 6); 1158 #if NEW_CHUNK_SIZE == 2 1159 int iColumn0 = thisColumn[0]; 1160 int iColumn1 = thisColumn[1]; 1161 double value0 = region[iColumn0]; 1162 double value1 = region[iColumn1]; 1163 value0 OPERATION multiplier *element[0]; 1164 value1 OPERATION multiplier *element[1]; 1165 region[iColumn0] = value0; 1166 region[iColumn1] = value1; 1167 #elif NEW_CHUNK_SIZE == 4 1168 int iColumn0 = thisColumn[0]; 1169 int iColumn1 = thisColumn[1]; 1170 int iColumn2 = thisColumn[2]; 1171 int iColumn3 = thisColumn[3]; 1172 #if AVX2 == 1 1158 1173 __v2df bb; 1159 set_const_v2df(bb, multiplier);1160 temp[0] =region[iColumn0];1161 temp[1] =region[iColumn1];1162 temp[2] =region[iColumn2];1163 temp[3] =region[iColumn3];1164 __v2df v0 = __builtin_ia32_loadupd 1165 __v2df v1 = __builtin_ia32_loadupd (temp+2);1166 __v2df a = __builtin_ia32_loadupd 1174 set_const_v2df(bb, multiplier); 1175 temp[0] = region[iColumn0]; 1176 temp[1] = region[iColumn1]; 1177 temp[2] = region[iColumn2]; 1178 temp[3] = region[iColumn3]; 1179 __v2df v0 = __builtin_ia32_loadupd(temp); 1180 __v2df v1 = __builtin_ia32_loadupd(temp + 2); 1181 __v2df a = __builtin_ia32_loadupd(element); 1167 1182 a *= bb; 1168 1183 v0 OPERATION a; 1169 a = __builtin_ia32_loadupd (element+2);1184 a = __builtin_ia32_loadupd(element + 2); 1170 1185 a *= bb; 1171 1186 v1 OPERATION a; 1172 __builtin_ia32_storeupd 1173 __builtin_ia32_storeupd (temp+2, v1);1174 region[iColumn0] =temp[0];1175 region[iColumn1] =temp[1];1176 region[iColumn2] =temp[2];1177 region[iColumn3] =temp[3];1178 #else 1179 double value0 =region[iColumn0];1180 double value1 =region[iColumn1];1181 double value2 =region[iColumn2];1182 double value3 =region[iColumn3];1183 value0 OPERATION multiplier *element[0];1184 value1 OPERATION multiplier *element[1];1185 value2 OPERATION multiplier *element[2];1186 value3 OPERATION multiplier *element[3];1187 region[iColumn0] =value0;1188 region[iColumn1] =value1;1189 region[iColumn2] =value2;1190 region[iColumn3] =value3;1187 __builtin_ia32_storeupd(temp, v0); 1188 __builtin_ia32_storeupd(temp + 2, v1); 1189 region[iColumn0] = temp[0]; 1190 region[iColumn1] = temp[1]; 1191 region[iColumn2] = temp[2]; 1192 region[iColumn3] = temp[3]; 1193 #else 1194 double value0 = region[iColumn0]; 1195 double value1 = region[iColumn1]; 1196 double value2 = region[iColumn2]; 1197 double value3 = region[iColumn3]; 1198 value0 OPERATION multiplier *element[0]; 1199 value1 OPERATION multiplier *element[1]; 1200 value2 OPERATION multiplier *element[2]; 1201 value3 OPERATION multiplier *element[3]; 1202 region[iColumn0] = value0; 1203 region[iColumn1] = value1; 1204 region[iColumn2] = value2; 1205 region[iColumn3] = value3; 1191 1206 #endif 1192 1207 #else 1193 1208 abort(); 1194 1209 #endif 1195 element +=NEW_CHUNK_SIZE;1210 element += NEW_CHUNK_SIZE; 1196 1211 thisColumn += NEW_CHUNK_SIZE; 1197 1212 } 1198 #if NEW_CHUNK_SIZE ==21199 int iColumn0 =thisColumn[0];1200 double value0 =region[iColumn0];1201 value0 OPERATION multiplier *element[0];1202 region[iColumn0] =value0;1203 #else 1204 int iColumn0 =thisColumn[0];1205 int iColumn1 =thisColumn[1];1206 int iColumn2 =thisColumn[2];1207 double value0 =region[iColumn0];1208 double value1 =region[iColumn1];1209 double value2 =region[iColumn2];1210 value0 OPERATION multiplier *element[0];1211 value1 OPERATION multiplier *element[1];1212 value2 OPERATION multiplier *element[2];1213 region[iColumn0] =value0;1214 region[iColumn1] =value1;1215 region[iColumn2] =value2;1213 #if NEW_CHUNK_SIZE == 2 1214 int iColumn0 = thisColumn[0]; 1215 double value0 = region[iColumn0]; 1216 value0 OPERATION multiplier *element[0]; 1217 region[iColumn0] = value0; 1218 #else 1219 int iColumn0 = thisColumn[0]; 1220 int iColumn1 = thisColumn[1]; 1221 int iColumn2 = thisColumn[2]; 1222 double value0 = region[iColumn0]; 1223 double value1 = region[iColumn1]; 1224 double value2 = region[iColumn2]; 1225 value0 OPERATION multiplier *element[0]; 1226 value1 OPERATION multiplier *element[1]; 1227 value2 OPERATION multiplier *element[2]; 1228 region[iColumn0] = value0; 1229 region[iColumn1] = value1; 1230 region[iColumn2] = value2; 1216 1231 #endif 1217 1232 } 1218 1233 SCATTER_ATTRIBUTE void functionName(ScatterUpdate8)(int numberIn, CoinFactorizationDouble multiplier, 1219 const CoinFactorizationDouble *COIN_RESTRICT element,1220 CoinFactorizationDouble *COIN_RESTRICT region)1234 const CoinFactorizationDouble *COIN_RESTRICT element, 1235 CoinFactorizationDouble *COIN_RESTRICT region) 1221 1236 { 1222 1237 #ifndef NDEBUG 1223 assert (numberIn==8);1224 #endif 1225 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+8);1226 int nFull =8&(~(NEW_CHUNK_SIZE1));1227 #if AVX2 ==11238 assert(numberIn == 8); 1239 #endif 1240 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + 8); 1241 int nFull = 8 & (~(NEW_CHUNK_SIZE  1)); 1242 #if AVX2 == 1 1228 1243 double temp[4]; 1229 1244 #endif 1230 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {1231 coin_prefetch_const(element +6);1232 #if NEW_CHUNK_SIZE ==21233 int iColumn0 =thisColumn[0];1234 int iColumn1 =thisColumn[1];1235 double value0 =region[iColumn0];1236 double value1 =region[iColumn1];1237 value0 OPERATION multiplier *element[0];1238 value1 OPERATION multiplier *element[1];1239 region[iColumn0] =value0;1240 region[iColumn1] =value1;1241 #elif NEW_CHUNK_SIZE ==41242 int iColumn0 =thisColumn[0];1243 int iColumn1 =thisColumn[1];1244 int iColumn2 =thisColumn[2];1245 int iColumn3 =thisColumn[3];1246 #if AVX2 ==11245 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 1246 coin_prefetch_const(element + 6); 1247 #if NEW_CHUNK_SIZE == 2 1248 int iColumn0 = thisColumn[0]; 1249 int iColumn1 = thisColumn[1]; 1250 double value0 = region[iColumn0]; 1251 double value1 = region[iColumn1]; 1252 value0 OPERATION multiplier *element[0]; 1253 value1 OPERATION multiplier *element[1]; 1254 region[iColumn0] = value0; 1255 region[iColumn1] = value1; 1256 #elif NEW_CHUNK_SIZE == 4 1257 int iColumn0 = thisColumn[0]; 1258 int iColumn1 = thisColumn[1]; 1259 int iColumn2 = thisColumn[2]; 1260 int iColumn3 = thisColumn[3]; 1261 #if AVX2 == 1 1247 1262 __v2df bb; 1248 set_const_v2df(bb, multiplier);1249 temp[0] =region[iColumn0];1250 temp[1] =region[iColumn1];1251 temp[2] =region[iColumn2];1252 temp[3] =region[iColumn3];1253 __v2df v0 = __builtin_ia32_loadupd 1254 __v2df v1 = __builtin_ia32_loadupd (temp+2);1255 __v2df a = __builtin_ia32_loadupd 1263 set_const_v2df(bb, multiplier); 1264 temp[0] = region[iColumn0]; 1265 temp[1] = region[iColumn1]; 1266 temp[2] = region[iColumn2]; 1267 temp[3] = region[iColumn3]; 1268 __v2df v0 = __builtin_ia32_loadupd(temp); 1269 __v2df v1 = __builtin_ia32_loadupd(temp + 2); 1270 __v2df a = __builtin_ia32_loadupd(element); 1256 1271 a *= bb; 1257 1272 v0 OPERATION a; 1258 a = __builtin_ia32_loadupd (element+2);1273 a = __builtin_ia32_loadupd(element + 2); 1259 1274 a *= bb; 1260 1275 v1 OPERATION a; 1261 __builtin_ia32_storeupd 1262 __builtin_ia32_storeupd (temp+2, v1);1263 region[iColumn0] =temp[0];1264 region[iColumn1] =temp[1];1265 region[iColumn2] =temp[2];1266 region[iColumn3] =temp[3];1267 #else 1268 double value0 =region[iColumn0];1269 double value1 =region[iColumn1];1270 double value2 =region[iColumn2];1271 double value3 =region[iColumn3];1272 value0 OPERATION multiplier *element[0];1273 value1 OPERATION multiplier *element[1];1274 value2 OPERATION multiplier *element[2];1275 value3 OPERATION multiplier *element[3];1276 region[iColumn0] =value0;1277 region[iColumn1] =value1;1278 region[iColumn2] =value2;1279 region[iColumn3] =value3;1276 __builtin_ia32_storeupd(temp, v0); 1277 __builtin_ia32_storeupd(temp + 2, v1); 1278 region[iColumn0] = temp[0]; 1279 region[iColumn1] = temp[1]; 1280 region[iColumn2] = temp[2]; 1281 region[iColumn3] = temp[3]; 1282 #else 1283 double value0 = region[iColumn0]; 1284 double value1 = region[iColumn1]; 1285 double value2 = region[iColumn2]; 1286 double value3 = region[iColumn3]; 1287 value0 OPERATION multiplier *element[0]; 1288 value1 OPERATION multiplier *element[1]; 1289 value2 OPERATION multiplier *element[2]; 1290 value3 OPERATION multiplier *element[3]; 1291 region[iColumn0] = value0; 1292 region[iColumn1] = value1; 1293 region[iColumn2] = value2; 1294 region[iColumn3] = value3; 1280 1295 #endif 1281 1296 #else 1282 1297 abort(); 1283 1298 #endif 1284 element +=NEW_CHUNK_SIZE;1299 element += NEW_CHUNK_SIZE; 1285 1300 thisColumn += NEW_CHUNK_SIZE; 1286 1301 } 1287 1302 } 1288 1303 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4N)(int numberIn, CoinFactorizationDouble multiplier, 1289 const CoinFactorizationDouble *COIN_RESTRICT element,1290 CoinFactorizationDouble *COIN_RESTRICT region)1304 const CoinFactorizationDouble *COIN_RESTRICT element, 1305 CoinFactorizationDouble *COIN_RESTRICT region) 1291 1306 { 1292 assert ((numberIn&3)==0);1293 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);1294 int nFull =numberIn&(~(NEW_CHUNK_SIZE1));1295 #if AVX2 ==11307 assert((numberIn & 3) == 0); 1308 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn); 1309 int nFull = numberIn & (~(NEW_CHUNK_SIZE  1)); 1310 #if AVX2 == 1 1296 1311 double temp[4]; 1297 1312 #endif 1298 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {1299 coin_prefetch_const(element +16);1300 coin_prefetch_const(thisColumn +32);1301 #if NEW_CHUNK_SIZE ==21302 int iColumn0 =thisColumn[0];1303 int iColumn1 =thisColumn[1];1304 double value0 =region[iColumn0];1305 double value1 =region[iColumn1];1306 value0 OPERATION multiplier *element[0];1307 value1 OPERATION multiplier *element[1];1308 region[iColumn0] =value0;1309 region[iColumn1] =value1;1310 #elif NEW_CHUNK_SIZE ==41311 int iColumn0 =thisColumn[0];1312 int iColumn1 =thisColumn[1];1313 int iColumn2 =thisColumn[2];1314 int iColumn3 =thisColumn[3];1315 #if AVX2 ==11313 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 1314 coin_prefetch_const(element + 16); 1315 coin_prefetch_const(thisColumn + 32); 1316 #if NEW_CHUNK_SIZE == 2 1317 int iColumn0 = thisColumn[0]; 1318 int iColumn1 = thisColumn[1]; 1319 double value0 = region[iColumn0]; 1320 double value1 = region[iColumn1]; 1321 value0 OPERATION multiplier *element[0]; 1322 value1 OPERATION multiplier *element[1]; 1323 region[iColumn0] = value0; 1324 region[iColumn1] = value1; 1325 #elif NEW_CHUNK_SIZE == 4 1326 int iColumn0 = thisColumn[0]; 1327 int iColumn1 = thisColumn[1]; 1328 int iColumn2 = thisColumn[2]; 1329 int iColumn3 = thisColumn[3]; 1330 #if AVX2 == 1 1316 1331 __v2df bb; 1317 set_const_v2df(bb, multiplier);1318 temp[0] =region[iColumn0];1319 temp[1] =region[iColumn1];1320 temp[2] =region[iColumn2];1321 temp[3] =region[iColumn3];1322 __v2df v0 = __builtin_ia32_loadupd 1323 __v2df v1 = __builtin_ia32_loadupd (temp+2);1324 __v2df a = __builtin_ia32_loadupd 1332 set_const_v2df(bb, multiplier); 1333 temp[0] = region[iColumn0]; 1334 temp[1] = region[iColumn1]; 1335 temp[2] = region[iColumn2]; 1336 temp[3] = region[iColumn3]; 1337 __v2df v0 = __builtin_ia32_loadupd(temp); 1338 __v2df v1 = __builtin_ia32_loadupd(temp + 2); 1339 __v2df a = __builtin_ia32_loadupd(element); 1325 1340 a *= bb; 1326 1341 v0 OPERATION a; 1327 a = __builtin_ia32_loadupd (element+2);1342 a = __builtin_ia32_loadupd(element + 2); 1328 1343 a *= bb; 1329 1344 v1 OPERATION a; 1330 __builtin_ia32_storeupd 1331 __builtin_ia32_storeupd (temp+2, v1);1332 region[iColumn0] =temp[0];1333 region[iColumn1] =temp[1];1334 region[iColumn2] =temp[2];1335 region[iColumn3] =temp[3];1336 #else 1337 double value0 =region[iColumn0];1338 double value1 =region[iColumn1];1339 double value2 =region[iColumn2];1340 double value3 =region[iColumn3];1341 value0 OPERATION multiplier *element[0];1342 value1 OPERATION multiplier *element[1];1343 value2 OPERATION multiplier *element[2];1344 value3 OPERATION multiplier *element[3];1345 region[iColumn0] =value0;1346 region[iColumn1] =value1;1347 region[iColumn2] =value2;1348 region[iColumn3] =value3;1345 __builtin_ia32_storeupd(temp, v0); 1346 __builtin_ia32_storeupd(temp + 2, v1); 1347 region[iColumn0] = temp[0]; 1348 region[iColumn1] = temp[1]; 1349 region[iColumn2] = temp[2]; 1350 region[iColumn3] = temp[3]; 1351 #else 1352 double value0 = region[iColumn0]; 1353 double value1 = region[iColumn1]; 1354 double value2 = region[iColumn2]; 1355 double value3 = region[iColumn3]; 1356 value0 OPERATION multiplier *element[0]; 1357 value1 OPERATION multiplier *element[1]; 1358 value2 OPERATION multiplier *element[2]; 1359 value3 OPERATION multiplier *element[3]; 1360 region[iColumn0] = value0; 1361 region[iColumn1] = value1; 1362 region[iColumn2] = value2; 1363 region[iColumn3] = value3; 1349 1364 #endif 1350 1365 #else 1351 1366 abort(); 1352 1367 #endif 1353 element +=NEW_CHUNK_SIZE;1368 element += NEW_CHUNK_SIZE; 1354 1369 thisColumn += NEW_CHUNK_SIZE; 1355 1370 } 1356 1371 } 1357 1372 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus1)(int numberIn, CoinFactorizationDouble multiplier, 1358 const CoinFactorizationDouble *COIN_RESTRICT element,1359 CoinFactorizationDouble *COIN_RESTRICT region)1373 const CoinFactorizationDouble *COIN_RESTRICT element, 1374 CoinFactorizationDouble *COIN_RESTRICT region) 1360 1375 { 1361 assert ((numberIn&3)==1);1362 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);1363 int nFull =numberIn&(~(NEW_CHUNK_SIZE1));1364 #if AVX2 ==11376 assert((numberIn & 3) == 1); 1377 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn); 1378 int nFull = numberIn & (~(NEW_CHUNK_SIZE  1)); 1379 #if AVX2 == 1 1365 1380 double temp[4]; 1366 1381 #endif 1367 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {1368 coin_prefetch_const(element +16);1369 coin_prefetch_const(thisColumn +32);1370 #if NEW_CHUNK_SIZE ==21371 int iColumn0 =thisColumn[0];1372 int iColumn1 =thisColumn[1];1373 double value0 =region[iColumn0];1374 double value1 =region[iColumn1];1375 value0 OPERATION multiplier *element[0];1376 value1 OPERATION multiplier *element[1];1377 region[iColumn0] =value0;1378 region[iColumn1] =value1;1379 #elif NEW_CHUNK_SIZE ==41380 int iColumn0 =thisColumn[0];1381 int iColumn1 =thisColumn[1];1382 int iColumn2 =thisColumn[2];1383 int iColumn3 =thisColumn[3];1384 #if AVX2 ==11382 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 1383 coin_prefetch_const(element + 16); 1384 coin_prefetch_const(thisColumn + 32); 1385 #if NEW_CHUNK_SIZE == 2 1386 int iColumn0 = thisColumn[0]; 1387 int iColumn1 = thisColumn[1]; 1388 double value0 = region[iColumn0]; 1389 double value1 = region[iColumn1]; 1390 value0 OPERATION multiplier *element[0]; 1391 value1 OPERATION multiplier *element[1]; 1392 region[iColumn0] = value0; 1393 region[iColumn1] = value1; 1394 #elif NEW_CHUNK_SIZE == 4 1395 int iColumn0 = thisColumn[0]; 1396 int iColumn1 = thisColumn[1]; 1397 int iColumn2 = thisColumn[2]; 1398 int iColumn3 = thisColumn[3]; 1399 #if AVX2 == 1 1385 1400 __v2df bb; 1386 set_const_v2df(bb, multiplier);1387 temp[0] =region[iColumn0];1388 temp[1] =region[iColumn1];1389 temp[2] =region[iColumn2];1390 temp[3] =region[iColumn3];1391 __v2df v0 = __builtin_ia32_loadupd 1392 __v2df v1 = __builtin_ia32_loadupd (temp+2);1393 __v2df a = __builtin_ia32_loadupd 1401 set_const_v2df(bb, multiplier); 1402 temp[0] = region[iColumn0]; 1403 temp[1] = region[iColumn1]; 1404 temp[2] = region[iColumn2]; 1405 temp[3] = region[iColumn3]; 1406 __v2df v0 = __builtin_ia32_loadupd(temp); 1407 __v2df v1 = __builtin_ia32_loadupd(temp + 2); 1408 __v2df a = __builtin_ia32_loadupd(element); 1394 1409 a *= bb; 1395 1410 v0 OPERATION a; 1396 a = __builtin_ia32_loadupd (element+2);1411 a = __builtin_ia32_loadupd(element + 2); 1397 1412 a *= bb; 1398 1413 v1 OPERATION a; 1399 __builtin_ia32_storeupd 1400 __builtin_ia32_storeupd (temp+2, v1);1401 region[iColumn0] =temp[0];1402 region[iColumn1] =temp[1];1403 region[iColumn2] =temp[2];1404 region[iColumn3] =temp[3];1405 #else 1406 double value0 =region[iColumn0];1407 double value1 =region[iColumn1];1408 double value2 =region[iColumn2];1409 double value3 =region[iColumn3];1410 value0 OPERATION multiplier *element[0];1411 value1 OPERATION multiplier *element[1];1412 value2 OPERATION multiplier *element[2];1413 value3 OPERATION multiplier *element[3];1414 region[iColumn0] =value0;1415 region[iColumn1] =value1;1416 region[iColumn2] =value2;1417 region[iColumn3] =value3;1414 __builtin_ia32_storeupd(temp, v0); 1415 __builtin_ia32_storeupd(temp + 2, v1); 1416 region[iColumn0] = temp[0]; 1417 region[iColumn1] = temp[1]; 1418 region[iColumn2] = temp[2]; 1419 region[iColumn3] = temp[3]; 1420 #else 1421 double value0 = region[iColumn0]; 1422 double value1 = region[iColumn1]; 1423 double value2 = region[iColumn2]; 1424 double value3 = region[iColumn3]; 1425 value0 OPERATION multiplier *element[0]; 1426 value1 OPERATION multiplier *element[1]; 1427 value2 OPERATION multiplier *element[2]; 1428 value3 OPERATION multiplier *element[3]; 1429 region[iColumn0] = value0; 1430 region[iColumn1] = value1; 1431 region[iColumn2] = value2; 1432 region[iColumn3] = value3; 1418 1433 #endif 1419 1434 #else 1420 1435 abort(); 1421 1436 #endif 1422 element +=NEW_CHUNK_SIZE;1437 element += NEW_CHUNK_SIZE; 1423 1438 thisColumn += NEW_CHUNK_SIZE; 1424 1439 } 1425 int iColumn0 =thisColumn[0];1426 double value0 =region[iColumn0];1427 value0 OPERATION multiplier *element[0];1428 region[iColumn0] =value0;1440 int iColumn0 = thisColumn[0]; 1441 double value0 = region[iColumn0]; 1442 value0 OPERATION multiplier *element[0]; 1443 region[iColumn0] = value0; 1429 1444 } 1430 1445 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus2)(int numberIn, CoinFactorizationDouble multiplier, 1431 const CoinFactorizationDouble *COIN_RESTRICT element,1432 CoinFactorizationDouble *COIN_RESTRICT region)1446 const CoinFactorizationDouble *COIN_RESTRICT element, 1447 CoinFactorizationDouble *COIN_RESTRICT region) 1433 1448 { 1434 assert ((numberIn&3)==2);1435 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);1436 int nFull =numberIn&(~(NEW_CHUNK_SIZE1));1437 #if AVX2 ==11449 assert((numberIn & 3) == 2); 1450 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn); 1451 int nFull = numberIn & (~(NEW_CHUNK_SIZE  1)); 1452 #if AVX2 == 1 1438 1453 double temp[4]; 1439 1454 #endif 1440 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {1441 coin_prefetch_const(element +16);1442 coin_prefetch_const(thisColumn +32);1443 #if NEW_CHUNK_SIZE ==21444 int iColumn0 =thisColumn[0];1445 int iColumn1 =thisColumn[1];1446 double value0 =region[iColumn0];1447 double value1 =region[iColumn1];1448 value0 OPERATION multiplier *element[0];1449 value1 OPERATION multiplier *element[1];1450 region[iColumn0] =value0;1451 region[iColumn1] =value1;1452 #elif NEW_CHUNK_SIZE ==41453 int iColumn0 =thisColumn[0];1454 int iColumn1 =thisColumn[1];1455 int iColumn2 =thisColumn[2];1456 int iColumn3 =thisColumn[3];1457 #if AVX2 ==11455 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 1456 coin_prefetch_const(element + 16); 1457 coin_prefetch_const(thisColumn + 32); 1458 #if NEW_CHUNK_SIZE == 2 1459 int iColumn0 = thisColumn[0]; 1460 int iColumn1 = thisColumn[1]; 1461 double value0 = region[iColumn0]; 1462 double value1 = region[iColumn1]; 1463 value0 OPERATION multiplier *element[0]; 1464 value1 OPERATION multiplier *element[1]; 1465 region[iColumn0] = value0; 1466 region[iColumn1] = value1; 1467 #elif NEW_CHUNK_SIZE == 4 1468 int iColumn0 = thisColumn[0]; 1469 int iColumn1 = thisColumn[1]; 1470 int iColumn2 = thisColumn[2]; 1471 int iColumn3 = thisColumn[3]; 1472 #if AVX2 == 1 1458 1473 __v2df bb; 1459 set_const_v2df(bb, multiplier);1460 temp[0] =region[iColumn0];1461 temp[1] =region[iColumn1];1462 temp[2] =region[iColumn2];1463 temp[3] =region[iColumn3];1464 __v2df v0 = __builtin_ia32_loadupd 1465 __v2df v1 = __builtin_ia32_loadupd (temp+2);1466 __v2df a = __builtin_ia32_loadupd 1474 set_const_v2df(bb, multiplier); 1475 temp[0] = region[iColumn0]; 1476 temp[1] = region[iColumn1]; 1477 temp[2] = region[iColumn2]; 1478 temp[3] = region[iColumn3]; 1479 __v2df v0 = __builtin_ia32_loadupd(temp); 1480 __v2df v1 = __builtin_ia32_loadupd(temp + 2); 1481 __v2df a = __builtin_ia32_loadupd(element); 1467 1482 a *= bb; 1468 1483 v0 OPERATION a; 1469 a = __builtin_ia32_loadupd (element+2);1484 a = __builtin_ia32_loadupd(element + 2); 1470 1485 a *= bb; 1471 1486 v1 OPERATION a; 1472 __builtin_ia32_storeupd 1473 __builtin_ia32_storeupd (temp+2, v1);1474 region[iColumn0] =temp[0];1475 region[iColumn1] =temp[1];1476 region[iColumn2] =temp[2];1477 region[iColumn3] =temp[3];1478 #else 1479 double value0 =region[iColumn0];1480 double value1 =region[iColumn1];1481 double value2 =region[iColumn2];1482 double value3 =region[iColumn3];1483 value0 OPERATION multiplier *element[0];1484 value1 OPERATION multiplier *element[1];1485 value2 OPERATION multiplier *element[2];1486 value3 OPERATION multiplier *element[3];1487 region[iColumn0] =value0;1488 region[iColumn1] =value1;1489 region[iColumn2] =value2;1490 region[iColumn3] =value3;1487 __builtin_ia32_storeupd(temp, v0); 1488 __builtin_ia32_storeupd(temp + 2, v1); 1489 region[iColumn0] = temp[0]; 1490 region[iColumn1] = temp[1]; 1491 region[iColumn2] = temp[2]; 1492 region[iColumn3] = temp[3]; 1493 #else 1494 double value0 = region[iColumn0]; 1495 double value1 = region[iColumn1]; 1496 double value2 = region[iColumn2]; 1497 double value3 = region[iColumn3]; 1498 value0 OPERATION multiplier *element[0]; 1499 value1 OPERATION multiplier *element[1]; 1500 value2 OPERATION multiplier *element[2]; 1501 value3 OPERATION multiplier *element[3]; 1502 region[iColumn0] = value0; 1503 region[iColumn1] = value1; 1504 region[iColumn2] = value2; 1505 region[iColumn3] = value3; 1491 1506 #endif 1492 1507 #else 1493 1508 abort(); 1494 1509 #endif 1495 element +=NEW_CHUNK_SIZE;1510 element += NEW_CHUNK_SIZE; 1496 1511 thisColumn += NEW_CHUNK_SIZE; 1497 1512 } 1498 #if NEW_CHUNK_SIZE ==41499 int iColumn0 =thisColumn[0];1500 int iColumn1 =thisColumn[1];1501 double value0 =region[iColumn0];1502 double value1 =region[iColumn1];1503 value0 OPERATION multiplier *element[0];1504 value1 OPERATION multiplier *element[1];1505 region[iColumn0] =value0;1506 region[iColumn1] =value1;1513 #if NEW_CHUNK_SIZE == 4 1514 int iColumn0 = thisColumn[0]; 1515 int iColumn1 = thisColumn[1]; 1516 double value0 = region[iColumn0]; 1517 double value1 = region[iColumn1]; 1518 value0 OPERATION multiplier *element[0]; 1519 value1 OPERATION multiplier *element[1]; 1520 region[iColumn0] = value0; 1521 region[iColumn1] = value1; 1507 1522 #endif 1508 1523 } 1509 1524 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus3)(int numberIn, CoinFactorizationDouble multiplier, 1510 const CoinFactorizationDouble *COIN_RESTRICT element,1511 CoinFactorizationDouble *COIN_RESTRICT region)1525 const CoinFactorizationDouble *COIN_RESTRICT element, 1526 CoinFactorizationDouble *COIN_RESTRICT region) 1512 1527 { 1513 assert ((numberIn&3)==3);1514 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);1515 int nFull =numberIn&(~(NEW_CHUNK_SIZE1));1516 #if AVX2 ==11528 assert((numberIn & 3) == 3); 1529 const int *COIN_RESTRICT thisColumn = reinterpret_cast< const int * >(element + numberIn); 1530 int nFull = numberIn & (~(NEW_CHUNK_SIZE  1)); 1531 #if AVX2 == 1 1517 1532 double temp[4]; 1518 1533 #endif 1519 for (int j =0;j<nFull;j+=NEW_CHUNK_SIZE) {1520 coin_prefetch_const(element +16);1521 coin_prefetch_const(thisColumn +32);1522 #if NEW_CHUNK_SIZE ==21523 int iColumn0 =thisColumn[0];1524 int iColumn1 =thisColumn[1];1525 double value0 =region[iColumn0];1526 double value1 =region[iColumn1];1527 value0 OPERATION multiplier *element[0];1528 value1 OPERATION multiplier *element[1];1529 region[iColumn0] =value0;1530 region[iColumn1] =value1;1531 #elif NEW_CHUNK_SIZE ==41532 int iColumn0 =thisColumn[0];1533 int iColumn1 =thisColumn[1];1534 int iColumn2 =thisColumn[2];1535 int iColumn3 =thisColumn[3];1536 #if AVX2 ==11534 for (int j = 0; j < nFull; j += NEW_CHUNK_SIZE) { 1535 coin_prefetch_const(element + 16); 1536 coin_prefetch_const(thisColumn + 32); 1537 #if NEW_CHUNK_SIZE == 2 1538 int iColumn0 = thisColumn[0]; 1539 int iColumn1 = thisColumn[1]; 1540 double value0 = region[iColumn0]; 1541 double value1 = region[iColumn1]; 1542 value0 OPERATION multiplier *element[0]; 1543 value1 OPERATION multiplier *element[1]; 1544 region[iColumn0] = value0; 1545 region[iColumn1] = value1; 1546 #elif NEW_CHUNK_SIZE == 4 1547 int iColumn0 = thisColumn[0]; 1548 int iColumn1 = thisColumn[1]; 1549 int iColumn2 = thisColumn[2]; 1550 int iColumn3 = thisColumn[3]; 1551 #if AVX2 == 1 1537 1552 __v2df bb; 1538 set_const_v2df(bb, multiplier);1539 temp[0] =region[iColumn0];1540 temp[1] =region[iColumn1];1541 temp[2] =region[iColumn2];1542 temp[3] =region[iColumn3];1543 __v2df v0 = __builtin_ia32_loadupd 1544 __v2df v1 = __builtin_ia32_loadupd (temp+2);1545 __v2df a = __builtin_ia32_loadupd 1553 set_const_v2df(bb, multiplier); 1554 temp[0] = region[iColumn0]; 1555 temp[1] = region[iColumn1]; 1556 temp[2] = region[iColumn2]; 1557 temp[3] = region[iColumn3]; 1558 __v2df v0 = __builtin_ia32_loadupd(temp); 1559 __v2df v1 = __builtin_ia32_loadupd(temp + 2); 1560 __v2df a = __builtin_ia32_loadupd(element); 1546 1561 a *= bb; 1547 1562 v0 OPERATION a; 1548 a = __builtin_ia32_loadupd (element+2);1563 a = __builtin_ia32_loadupd(element + 2); 1549 1564 a *= bb; 1550 1565 v1 OPERATION a; 1551 __builtin_ia32_storeupd 1552 __builtin_ia32_storeupd (temp+2, v1);1553 region[iColumn0] =temp[0];1554 region[iColumn1] =temp[1];1555 region[iColumn2] =temp[2];1556 region[iColumn3] =temp[3];1557 #else 1558 double value0 =region[iColumn0];1559 double value1 =region[iColumn1];1560 double value2 =region[iColumn2];1561 double value3 =region[iColumn3];1562 value0 OPERATION multiplier *element[0];1563 value1 OPERATION multiplier *element[1];1564 value2 OPERATION multiplier *element[2];1565 value3 OPERATION multiplier *element[3];1566 region[iColumn0] =value0;1567 region[iColumn1] =value1;1568 region[iColumn2] =value2;1569 region[iColumn3] =value3;1566 __builtin_ia32_storeupd(temp, v0); 1567 __builtin_ia32_storeupd(temp + 2, v1); 1568 region[iColumn0] = temp[0]; 1569 region[iColumn1] = temp[1]; 1570 region[iColumn2] = temp[2]; 1571 region[iColumn3] = temp[3]; 1572 #else 1573 double value0 = region[iColumn0]; 1574 double value1 = region[iColumn1]; 1575 double value2 = region[iColumn2]; 1576 double value3 = region[iColumn3]; 1577 value0 OPERATION multiplier *element[0]; 1578 value1 OPERATION multiplier *element[1]; 1579 value2 OPERATION multiplier *element[2]; 1580 value3 OPERATION multiplier *element[3]; 1581 region[iColumn0] = value0; 1582 region[iColumn1] = value1; 1583 region[iColumn2] = value2; 1584 region[iColumn3] = value3; 1570 1585 #endif 1571 1586 #else 1572 1587 abort(); 1573 1588 #endif 1574 element +=NEW_CHUNK_SIZE;1589 element += NEW_CHUNK_SIZE; 1575 1590 thisColumn += NEW_CHUNK_SIZE; 1576 1591 } 1577 #if NEW_CHUNK_SIZE ==21578 int iColumn0 =thisColumn[0];1579 double value0 =region[iColumn0];1580 value0 OPERATION multiplier *element[0];1581 region[iColumn0] =value0;1582 #else 1583 int iColumn0 =thisColumn[0];1584 int iColumn1 =thisColumn[1];1585 int iColumn2 =thisColumn[2];1586 double value0 =region[iColumn0];1587 double value1 =region[iColumn1];1588 double value2 =region[iColumn2];1589 value0 OPERATION multiplier *element[0];1590 value1 OPERATION multiplier *element[1];1591 value2 OPERATION multiplier *element[2];1592 region[iColumn0] =value0;1593 region[iColumn1] =value1;1594 region[iColumn2] =value2;1592 #if NEW_CHUNK_SIZE == 2 1593 int iColumn0 = thisColumn[0]; 1594 double value0 = region[iColumn0]; 1595 value0 OPERATION multiplier *element[0]; 1596 region[iColumn0] = value0; 1597 #else 1598 int iColumn0 = thisColumn[0]; 1599 int iColumn1 = thisColumn[1]; 1600 int iColumn2 = thisColumn[2]; 1601 double value0 = region[iColumn0]; 1602 double value1 = region[iColumn1]; 1603 double value2 = region[iColumn2]; 1604 value0 OPERATION multiplier *element[0]; 1605 value1 OPERATION multiplier *element[1]; 1606 value2 OPERATION multiplier *element[2]; 1607 region[iColumn0] = value0; 1608 region[iColumn1] = value1; 1609 region[iColumn2] = value2; 1595 1610 #endif 1596 1611 } 1597 1612 #endif 1613 1614 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2 1615 */
Note: See TracChangeset
for help on using the changeset viewer.