Changeset 2385 for trunk/Clp/src/ClpCholeskyDense.cpp
 Timestamp:
 Jan 6, 2019 2:43:06 PM (4 months ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/Clp/src/ClpCholeskyDense.cpp
r2271 r2385 17 17 #include "CoinAbcCommon.hpp" 18 18 #endif 19 #if CLP_HAS_ABC <320 #undef cilk_for 19 #if CLP_HAS_ABC < 3 20 #undef cilk_for 21 21 #undef cilk_spawn 22 22 #undef cilk_sync 23 #define cilk_for for 23 #define cilk_for for 24 24 #define cilk_spawn 25 25 #define cilk_sync … … 33 33 /* Default Constructor */ 34 34 /**/ 35 ClpCholeskyDense::ClpCholeskyDense () 36 : ClpCholeskyBase(), 37 borrowSpace_(false) 38 { 39 type_ = 11;; 35 ClpCholeskyDense::ClpCholeskyDense() 36 : ClpCholeskyBase() 37 , borrowSpace_(false) 38 { 39 type_ = 11; 40 ; 40 41 } 41 42 … … 43 44 /* Copy constructor */ 44 45 /**/ 45 ClpCholeskyDense::ClpCholeskyDense (const ClpCholeskyDense & rhs) 46 : ClpCholeskyBase(rhs), 47 borrowSpace_(rhs.borrowSpace_) 48 { 49 assert(!rhs.borrowSpace_  !rhs.sizeFactor_); /* can't do if borrowing space*/ 50 } 51 46 ClpCholeskyDense::ClpCholeskyDense(const ClpCholeskyDense &rhs) 47 : ClpCholeskyBase(rhs) 48 , borrowSpace_(rhs.borrowSpace_) 49 { 50 assert(!rhs.borrowSpace_  !rhs.sizeFactor_); /* can't do if borrowing space*/ 51 } 52 52 53 53 /**/ 54 54 /* Destructor */ 55 55 /**/ 56 ClpCholeskyDense::~ClpCholeskyDense 57 { 58 59 60 61 62 63 56 ClpCholeskyDense::~ClpCholeskyDense() 57 { 58 if (borrowSpace_) { 59 /* set NULL*/ 60 sparseFactor_ = NULL; 61 workDouble_ = NULL; 62 diagonal_ = NULL; 63 } 64 64 } 65 65 … … 68 68 /**/ 69 69 ClpCholeskyDense & 70 ClpCholeskyDense::operator=(const ClpCholeskyDense &rhs)71 { 72 73 74 75 76 77 70 ClpCholeskyDense::operator=(const ClpCholeskyDense &rhs) 71 { 72 if (this != &rhs) { 73 assert(!rhs.borrowSpace_  !rhs.sizeFactor_); /* can't do if borrowing space*/ 74 ClpCholeskyBase::operator=(rhs); 75 borrowSpace_ = rhs.borrowSpace_; 76 } 77 return *this; 78 78 } 79 79 /**/ 80 80 /* Clone*/ 81 81 /**/ 82 ClpCholeskyBase * 83 { 84 82 ClpCholeskyBase *ClpCholeskyDense::clone() const 83 { 84 return new ClpCholeskyDense(*this); 85 85 } 86 86 /* If not power of 2 then need to redo a bit*/ … … 90 90 #define BLOCKUNROLL 91 91 92 #define BLOCKSQ ( BLOCK*BLOCK)93 #define BLOCKSQSHIFT ( BLOCKSHIFT+BLOCKSHIFT)94 #define number_blocks(x) (((x) +BLOCK1)>>BLOCKSHIFT)95 #define number_rows(x) ((x) <<BLOCKSHIFT)96 #define number_entries(x) ((x) <<BLOCKSQSHIFT)92 #define BLOCKSQ (BLOCK * BLOCK) 93 #define BLOCKSQSHIFT (BLOCKSHIFT + BLOCKSHIFT) 94 #define number_blocks(x) (((x) + BLOCK  1) >> BLOCKSHIFT) 95 #define number_rows(x) ((x) << BLOCKSHIFT) 96 #define number_entries(x) ((x) << BLOCKSQSHIFT) 97 97 /* Gets space */ 98 int 99 ClpCholeskyDense::reserveSpace(const ClpCholeskyBase * factor, int numberRows) 100 { 101 numberRows_ = numberRows; 102 int numberBlocks = (numberRows_ + BLOCK  1) >> BLOCKSHIFT; 103 /* allow one stripe extra*/ 104 numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2); 105 sizeFactor_ = numberBlocks * BLOCKSQ; 106 /*#define CHOL_COMPARE*/ 98 int ClpCholeskyDense::reserveSpace(const ClpCholeskyBase *factor, int numberRows) 99 { 100 numberRows_ = numberRows; 101 int numberBlocks = (numberRows_ + BLOCK  1) >> BLOCKSHIFT; 102 /* allow one stripe extra*/ 103 numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2); 104 sizeFactor_ = numberBlocks * BLOCKSQ; 105 /*#define CHOL_COMPARE*/ 107 106 #ifdef CHOL_COMPARE 108 109 #endif 110 111 sparseFactor_ = new longDouble[sizeFactor_];112 rowsDropped_ = new char[numberRows_];113 114 115 116 117 118 119 120 121 122 123 124 107 sizeFactor_ += 95000; 108 #endif 109 if (!factor) { 110 sparseFactor_ = new longDouble[sizeFactor_]; 111 rowsDropped_ = new char[numberRows_]; 112 memset(rowsDropped_, 0, numberRows_); 113 workDouble_ = new longDouble[numberRows_]; 114 diagonal_ = new longDouble[numberRows_]; 115 } else { 116 borrowSpace_ = true; 117 int numberFull = factor>numberRows(); 118 sparseFactor_ = factor>sparseFactor() + (factor>size()  sizeFactor_); 119 workDouble_ = factor>workDouble() + (numberFull  numberRows_); 120 diagonal_ = factor>diagonal() + (numberFull  numberRows_); 121 } 122 numberRowsDropped_ = 0; 123 return 0; 125 124 } 126 125 /* Returns space needed */ 127 int 128 ClpCholeskyDense::space( int numberRows) const 129 { 130 int numberBlocks = (numberRows + BLOCK  1) >> BLOCKSHIFT; 131 /* allow one stripe extra*/ 132 numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2); 133 int sizeFactor = numberBlocks * BLOCKSQ; 126 int ClpCholeskyDense::space(int numberRows) const 127 { 128 int numberBlocks = (numberRows + BLOCK  1) >> BLOCKSHIFT; 129 /* allow one stripe extra*/ 130 numberBlocks = numberBlocks + ((numberBlocks * (numberBlocks + 1)) / 2); 131 int sizeFactor = numberBlocks * BLOCKSQ; 134 132 #ifdef CHOL_COMPARE 135 136 #endif 137 133 sizeFactor += 95000; 134 #endif 135 return sizeFactor; 138 136 } 139 137 /* Orders rows and saves pointer to matrix.and model */ 140 int 141 ClpCholeskyDense::order(ClpInterior * model) 142 { 143 model_ = model; 144 int numberRows; 145 int numberRowsModel = model_>numberRows(); 146 int numberColumns = model_>numberColumns(); 147 if (!doKKT_) { 148 numberRows = numberRowsModel; 149 } else { 150 numberRows = 2 * numberRowsModel + numberColumns; 151 } 152 reserveSpace(NULL, numberRows); 153 rowCopy_ = model>clpMatrix()>reverseOrderedCopy(); 154 return 0; 138 int ClpCholeskyDense::order(ClpInterior *model) 139 { 140 model_ = model; 141 int numberRows; 142 int numberRowsModel = model_>numberRows(); 143 int numberColumns = model_>numberColumns(); 144 if (!doKKT_) { 145 numberRows = numberRowsModel; 146 } else { 147 numberRows = 2 * numberRowsModel + numberColumns; 148 } 149 reserveSpace(NULL, numberRows); 150 rowCopy_ = model>clpMatrix()>reverseOrderedCopy(); 151 return 0; 155 152 } 156 153 /* Does Symbolic factorization given permutation. … … 158 155 user must provide factorize and solve. Otherwise the default factorization is used 159 156 returns nonzero if not enough memory */ 160 int 161 ClpCholeskyDense::symbolic() 162 { 163 return 0; 157 int ClpCholeskyDense::symbolic() 158 { 159 return 0; 164 160 } 165 161 /* Factorize  filling in rowsDropped and returning number dropped */ 166 int 167 ClpCholeskyDense::factorize(const CoinWorkDouble * diagonal, int * rowsDropped) 168 { 169 assert (!borrowSpace_); 170 const CoinBigIndex * columnStart = model_>clpMatrix()>getVectorStarts(); 171 const int * columnLength = model_>clpMatrix()>getVectorLengths(); 172 const int * row = model_>clpMatrix()>getIndices(); 173 const double * element = model_>clpMatrix()>getElements(); 174 const CoinBigIndex * rowStart = rowCopy_>getVectorStarts(); 175 const int * rowLength = rowCopy_>getVectorLengths(); 176 const int * column = rowCopy_>getIndices(); 177 const double * elementByRow = rowCopy_>getElements(); 178 int numberColumns = model_>clpMatrix()>getNumCols(); 179 CoinZeroN(sparseFactor_, sizeFactor_); 180 /*perturbation*/ 181 CoinWorkDouble perturbation = model_>diagonalPerturbation() * model_>diagonalNorm(); 182 perturbation = perturbation * perturbation; 183 if (perturbation > 1.0) { 162 int ClpCholeskyDense::factorize(const CoinWorkDouble *diagonal, int *rowsDropped) 163 { 164 assert(!borrowSpace_); 165 const CoinBigIndex *columnStart = model_>clpMatrix()>getVectorStarts(); 166 const int *columnLength = model_>clpMatrix()>getVectorLengths(); 167 const int *row = model_>clpMatrix()>getIndices(); 168 const double *element = model_>clpMatrix()>getElements(); 169 const CoinBigIndex *rowStart = rowCopy_>getVectorStarts(); 170 const int *rowLength = rowCopy_>getVectorLengths(); 171 const int *column = rowCopy_>getIndices(); 172 const double *elementByRow = rowCopy_>getElements(); 173 int numberColumns = model_>clpMatrix()>getNumCols(); 174 CoinZeroN(sparseFactor_, sizeFactor_); 175 /*perturbation*/ 176 CoinWorkDouble perturbation = model_>diagonalPerturbation() * model_>diagonalNorm(); 177 perturbation = perturbation * perturbation; 178 if (perturbation > 1.0) { 184 179 #ifdef COIN_DEVELOP 185 /*if (model_>model()>logLevel()&4) */ 186 std::cout << "large perturbation " << perturbation << std::endl; 187 #endif 188 perturbation = CoinSqrt(perturbation);; 189 perturbation = 1.0; 190 } 191 int iRow; 192 int newDropped = 0; 193 CoinWorkDouble largest = 1.0; 194 CoinWorkDouble smallest = COIN_DBL_MAX; 195 CoinWorkDouble delta2 = model_>delta(); /* add delta*delta to diagonal*/ 196 delta2 *= delta2; 197 if (!doKKT_) { 198 longDouble * work = sparseFactor_; 199 work; /* skip diagonal*/ 200 int addOffset = numberRows_  1; 201 const CoinWorkDouble * diagonalSlack = diagonal + numberColumns; 202 /* largest in initial matrix*/ 203 CoinWorkDouble largest2 = 1.0e20; 204 for (iRow = 0; iRow < numberRows_; iRow++) { 205 if (!rowsDropped_[iRow]) { 206 CoinBigIndex startRow = rowStart[iRow]; 207 CoinBigIndex endRow = rowStart[iRow] + rowLength[iRow]; 208 CoinWorkDouble diagonalValue = diagonalSlack[iRow] + delta2; 209 for (CoinBigIndex k = startRow; k < endRow; k++) { 210 int iColumn = column[k]; 211 CoinBigIndex start = columnStart[iColumn]; 212 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn]; 213 CoinWorkDouble multiplier = diagonal[iColumn] * elementByRow[k]; 214 for (CoinBigIndex j = start; j < end; j++) { 215 int jRow = row[j]; 216 if (!rowsDropped_[jRow]) { 217 if (jRow > iRow) { 218 CoinWorkDouble value = element[j] * multiplier; 219 work[jRow] += value; 220 } else if (jRow == iRow) { 221 CoinWorkDouble value = element[j] * multiplier; 222 diagonalValue += value; 223 } 224 } 225 } 226 } 227 for (int j = iRow + 1; j < numberRows_; j++) 228 largest2 = CoinMax(largest2, CoinAbs(work[j])); 229 diagonal_[iRow] = diagonalValue; 230 largest2 = CoinMax(largest2, CoinAbs(diagonalValue)); 231 } else { 232 /* dropped*/ 233 diagonal_[iRow] = 1.0; 234 } 235 addOffset; 236 work += addOffset; 180 /*if (model_>model()>logLevel()&4) */ 181 std::cout << "large perturbation " << perturbation << std::endl; 182 #endif 183 perturbation = CoinSqrt(perturbation); 184 ; 185 perturbation = 1.0; 186 } 187 int iRow; 188 int newDropped = 0; 189 CoinWorkDouble largest = 1.0; 190 CoinWorkDouble smallest = COIN_DBL_MAX; 191 CoinWorkDouble delta2 = model_>delta(); /* add delta*delta to diagonal*/ 192 delta2 *= delta2; 193 if (!doKKT_) { 194 longDouble *work = sparseFactor_; 195 work; /* skip diagonal*/ 196 int addOffset = numberRows_  1; 197 const CoinWorkDouble *diagonalSlack = diagonal + numberColumns; 198 /* largest in initial matrix*/ 199 CoinWorkDouble largest2 = 1.0e20; 200 for (iRow = 0; iRow < numberRows_; iRow++) { 201 if (!rowsDropped_[iRow]) { 202 CoinBigIndex startRow = rowStart[iRow]; 203 CoinBigIndex endRow = rowStart[iRow] + rowLength[iRow]; 204 CoinWorkDouble diagonalValue = diagonalSlack[iRow] + delta2; 205 for (CoinBigIndex k = startRow; k < endRow; k++) { 206 int iColumn = column[k]; 207 CoinBigIndex start = columnStart[iColumn]; 208 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn]; 209 CoinWorkDouble multiplier = diagonal[iColumn] * elementByRow[k]; 210 for (CoinBigIndex j = start; j < end; j++) { 211 int jRow = row[j]; 212 if (!rowsDropped_[jRow]) { 213 if (jRow > iRow) { 214 CoinWorkDouble value = element[j] * multiplier; 215 work[jRow] += value; 216 } else if (jRow == iRow) { 217 CoinWorkDouble value = element[j] * multiplier; 218 diagonalValue += value; 219 } 220 } 237 221 } 238 /*check sizes*/ 239 largest2 *= 1.0e20; 240 largest = CoinMin(largest2, CHOL_SMALL_VALUE); 241 int numberDroppedBefore = 0; 242 for (iRow = 0; iRow < numberRows_; iRow++) { 243 int dropped = rowsDropped_[iRow]; 244 /* Move to int array*/ 245 rowsDropped[iRow] = dropped; 246 if (!dropped) { 247 CoinWorkDouble diagonal = diagonal_[iRow]; 248 if (diagonal > largest2) { 249 diagonal_[iRow] = diagonal + perturbation; 250 } else { 251 diagonal_[iRow] = diagonal + perturbation; 252 rowsDropped[iRow] = 2; 253 numberDroppedBefore++; 254 } 255 } 222 } 223 for (int j = iRow + 1; j < numberRows_; j++) 224 largest2 = CoinMax(largest2, CoinAbs(work[j])); 225 diagonal_[iRow] = diagonalValue; 226 largest2 = CoinMax(largest2, CoinAbs(diagonalValue)); 227 } else { 228 /* dropped*/ 229 diagonal_[iRow] = 1.0; 230 } 231 addOffset; 232 work += addOffset; 233 } 234 /*check sizes*/ 235 largest2 *= 1.0e20; 236 largest = CoinMin(largest2, CHOL_SMALL_VALUE); 237 int numberDroppedBefore = 0; 238 for (iRow = 0; iRow < numberRows_; iRow++) { 239 int dropped = rowsDropped_[iRow]; 240 /* Move to int array*/ 241 rowsDropped[iRow] = dropped; 242 if (!dropped) { 243 CoinWorkDouble diagonal = diagonal_[iRow]; 244 if (diagonal > largest2) { 245 diagonal_[iRow] = diagonal + perturbation; 246 } else { 247 diagonal_[iRow] = diagonal + perturbation; 248 rowsDropped[iRow] = 2; 249 numberDroppedBefore++; 250 } 251 } 252 } 253 doubleParameters_[10] = CoinMax(1.0e20, largest); 254 integerParameters_[20] = 0; 255 doubleParameters_[3] = 0.0; 256 doubleParameters_[4] = COIN_DBL_MAX; 257 integerParameters_[34] = 0; /* say all must be positive*/ 258 #ifdef CHOL_COMPARE 259 if (numberRows_ < 200) 260 factorizePart3(rowsDropped); 261 #endif 262 factorizePart2(rowsDropped); 263 newDropped = integerParameters_[20] + numberDroppedBefore; 264 largest = doubleParameters_[3]; 265 smallest = doubleParameters_[4]; 266 if (model_>messageHandler()>logLevel() > 1) 267 std::cout << "Cholesky  largest " << largest << " smallest " << smallest << std::endl; 268 choleskyCondition_ = largest / smallest; 269 /*drop fresh makes some formADAT easier*/ 270 if (newDropped  numberRowsDropped_) { 271 newDropped = 0; 272 for (int i = 0; i < numberRows_; i++) { 273 char dropped = static_cast< char >(rowsDropped[i]); 274 rowsDropped_[i] = dropped; 275 if (dropped == 2) { 276 /*dropped this time*/ 277 rowsDropped[newDropped++] = i; 278 rowsDropped_[i] = 0; 279 } 280 } 281 numberRowsDropped_ = newDropped; 282 newDropped = (2 + newDropped); 283 } 284 } else { 285 /* KKT*/ 286 CoinPackedMatrix *quadratic = NULL; 287 ClpQuadraticObjective *quadraticObj = (dynamic_cast< ClpQuadraticObjective * >(model_>objectiveAsObject())); 288 if (quadraticObj) 289 quadratic = quadraticObj>quadraticObjective(); 290 /* matrix*/ 291 int numberRowsModel = model_>numberRows(); 292 int numberColumns = model_>numberColumns(); 293 int numberTotal = numberColumns + numberRowsModel; 294 longDouble *work = sparseFactor_; 295 work; /* skip diagonal*/ 296 int addOffset = numberRows_  1; 297 int iColumn; 298 if (!quadratic) { 299 for (iColumn = 0; iColumn < numberColumns; iColumn++) { 300 CoinWorkDouble value = diagonal[iColumn]; 301 if (CoinAbs(value) > 1.0e100) { 302 value = 1.0 / value; 303 largest = CoinMax(largest, CoinAbs(value)); 304 diagonal_[iColumn] = value; 305 CoinBigIndex start = columnStart[iColumn]; 306 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn]; 307 for (CoinBigIndex j = start; j < end; j++) { 308 /*choleskyRow_[numberElements]=row[j]+numberTotal;*/ 309 /*sparseFactor_[numberElements++]=element[j];*/ 310 work[row[j] + numberTotal] = element[j]; 311 largest = CoinMax(largest, CoinAbs(element[j])); 256 312 } 257 doubleParameters_[10] = CoinMax(1.0e20, largest); 258 integerParameters_[20] = 0; 259 doubleParameters_[3] = 0.0; 260 doubleParameters_[4] = COIN_DBL_MAX; 261 integerParameters_[34] = 0; /* say all must be positive*/ 313 } else { 314 diagonal_[iColumn] = value; 315 } 316 addOffset; 317 work += addOffset; 318 } 319 } else { 320 /* Quadratic*/ 321 const int *columnQuadratic = quadratic>getIndices(); 322 const CoinBigIndex *columnQuadraticStart = quadratic>getVectorStarts(); 323 const int *columnQuadraticLength = quadratic>getVectorLengths(); 324 const double *quadraticElement = quadratic>getElements(); 325 for (iColumn = 0; iColumn < numberColumns; iColumn++) { 326 CoinWorkDouble value = diagonal[iColumn]; 327 CoinBigIndex j; 328 if (CoinAbs(value) > 1.0e100) { 329 value = 1.0 / value; 330 for (j = columnQuadraticStart[iColumn]; 331 j < columnQuadraticStart[iColumn] + columnQuadraticLength[iColumn]; j++) { 332 int jColumn = columnQuadratic[j]; 333 if (jColumn > iColumn) { 334 work[jColumn] = quadraticElement[j]; 335 } else if (iColumn == jColumn) { 336 value += quadraticElement[j]; 337 } 338 } 339 largest = CoinMax(largest, CoinAbs(value)); 340 diagonal_[iColumn] = value; 341 CoinBigIndex start = columnStart[iColumn]; 342 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn]; 343 for (j = start; j < end; j++) { 344 work[row[j] + numberTotal] = element[j]; 345 largest = CoinMax(largest, CoinAbs(element[j])); 346 } 347 } else { 348 value = 1.0e100; 349 diagonal_[iColumn] = value; 350 } 351 addOffset; 352 work += addOffset; 353 } 354 } 355 /* slacks*/ 356 for (iColumn = numberColumns; iColumn < numberTotal; iColumn++) { 357 CoinWorkDouble value = diagonal[iColumn]; 358 if (CoinAbs(value) > 1.0e100) { 359 value = 1.0 / value; 360 largest = CoinMax(largest, CoinAbs(value)); 361 } else { 362 value = 1.0e100; 363 } 364 diagonal_[iColumn] = value; 365 work[iColumn  numberColumns + numberTotal] = 1.0; 366 addOffset; 367 work += addOffset; 368 } 369 /* Finish diagonal*/ 370 for (iRow = 0; iRow < numberRowsModel; iRow++) { 371 diagonal_[iRow + numberTotal] = delta2; 372 } 373 /*check sizes*/ 374 largest *= 1.0e20; 375 largest = CoinMin(largest, CHOL_SMALL_VALUE); 376 doubleParameters_[10] = CoinMax(1.0e20, largest); 377 integerParameters_[20] = 0; 378 doubleParameters_[3] = 0.0; 379 doubleParameters_[4] = COIN_DBL_MAX; 380 /* Set up LDL cutoff*/ 381 integerParameters_[34] = numberTotal; 382 /* KKT*/ 383 int *rowsDropped2 = new int[numberRows_]; 384 CoinZeroN(rowsDropped2, numberRows_); 262 385 #ifdef CHOL_COMPARE 263 if (numberRows_ < 200) 264 factorizePart3(rowsDropped); 265 #endif 266 factorizePart2(rowsDropped); 267 newDropped = integerParameters_[20] + numberDroppedBefore; 268 largest = doubleParameters_[3]; 269 smallest = doubleParameters_[4]; 270 if (model_>messageHandler()>logLevel() > 1) 271 std::cout << "Cholesky  largest " << largest << " smallest " << smallest << std::endl; 272 choleskyCondition_ = largest / smallest; 273 /*drop fresh makes some formADAT easier*/ 274 if (newDropped  numberRowsDropped_) { 275 newDropped = 0; 276 for (int i = 0; i < numberRows_; i++) { 277 char dropped = static_cast<char>(rowsDropped[i]); 278 rowsDropped_[i] = dropped; 279 if (dropped == 2) { 280 /*dropped this time*/ 281 rowsDropped[newDropped++] = i; 282 rowsDropped_[i] = 0; 283 } 284 } 285 numberRowsDropped_ = newDropped; 286 newDropped = (2 + newDropped); 287 } 288 } else { 289 /* KKT*/ 290 CoinPackedMatrix * quadratic = NULL; 291 ClpQuadraticObjective * quadraticObj = 292 (dynamic_cast< ClpQuadraticObjective*>(model_>objectiveAsObject())); 293 if (quadraticObj) 294 quadratic = quadraticObj>quadraticObjective(); 295 /* matrix*/ 296 int numberRowsModel = model_>numberRows(); 297 int numberColumns = model_>numberColumns(); 298 int numberTotal = numberColumns + numberRowsModel; 299 longDouble * work = sparseFactor_; 300 work; /* skip diagonal*/ 301 int addOffset = numberRows_  1; 302 int iColumn; 303 if (!quadratic) { 304 for (iColumn = 0; iColumn < numberColumns; iColumn++) { 305 CoinWorkDouble value = diagonal[iColumn]; 306 if (CoinAbs(value) > 1.0e100) { 307 value = 1.0 / value; 308 largest = CoinMax(largest, CoinAbs(value)); 309 diagonal_[iColumn] = value; 310 CoinBigIndex start = columnStart[iColumn]; 311 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn]; 312 for (CoinBigIndex j = start; j < end; j++) { 313 /*choleskyRow_[numberElements]=row[j]+numberTotal;*/ 314 /*sparseFactor_[numberElements++]=element[j];*/ 315 work[row[j] + numberTotal] = element[j]; 316 largest = CoinMax(largest, CoinAbs(element[j])); 317 } 318 } else { 319 diagonal_[iColumn] = value; 320 } 321 addOffset; 322 work += addOffset; 323 } 324 } else { 325 /* Quadratic*/ 326 const int * columnQuadratic = quadratic>getIndices(); 327 const CoinBigIndex * columnQuadraticStart = quadratic>getVectorStarts(); 328 const int * columnQuadraticLength = quadratic>getVectorLengths(); 329 const double * quadraticElement = quadratic>getElements(); 330 for (iColumn = 0; iColumn < numberColumns; iColumn++) { 331 CoinWorkDouble value = diagonal[iColumn]; 332 CoinBigIndex j; 333 if (CoinAbs(value) > 1.0e100) { 334 value = 1.0 / value; 335 for (j = columnQuadraticStart[iColumn]; 336 j < columnQuadraticStart[iColumn] + columnQuadraticLength[iColumn]; j++) { 337 int jColumn = columnQuadratic[j]; 338 if (jColumn > iColumn) { 339 work[jColumn] = quadraticElement[j]; 340 } else if (iColumn == jColumn) { 341 value += quadraticElement[j]; 342 } 343 } 344 largest = CoinMax(largest, CoinAbs(value)); 345 diagonal_[iColumn] = value; 346 CoinBigIndex start = columnStart[iColumn]; 347 CoinBigIndex end = columnStart[iColumn] + columnLength[iColumn]; 348 for (j = start; j < end; j++) { 349 work[row[j] + numberTotal] = element[j]; 350 largest = CoinMax(largest, CoinAbs(element[j])); 351 } 352 } else { 353 value = 1.0e100; 354 diagonal_[iColumn] = value; 355 } 356 addOffset; 357 work += addOffset; 358 } 359 } 360 /* slacks*/ 361 for (iColumn = numberColumns; iColumn < numberTotal; iColumn++) { 362 CoinWorkDouble value = diagonal[iColumn]; 363 if (CoinAbs(value) > 1.0e100) { 364 value = 1.0 / value; 365 largest = CoinMax(largest, CoinAbs(value)); 366 } else { 367 value = 1.0e100; 368 } 369 diagonal_[iColumn] = value; 370 work[iColumnnumberColumns+numberTotal] = 1.0; 371 addOffset; 372 work += addOffset; 373 } 374 /* Finish diagonal*/ 375 for (iRow = 0; iRow < numberRowsModel; iRow++) { 376 diagonal_[iRow+numberTotal] = delta2; 377 } 378 /*check sizes*/ 379 largest *= 1.0e20; 380 largest = CoinMin(largest, CHOL_SMALL_VALUE); 381 doubleParameters_[10] = CoinMax(1.0e20, largest); 382 integerParameters_[20] = 0; 383 doubleParameters_[3] = 0.0; 384 doubleParameters_[4] = COIN_DBL_MAX; 385 /* Set up LDL cutoff*/ 386 integerParameters_[34] = numberTotal; 387 /* KKT*/ 388 int * rowsDropped2 = new int[numberRows_]; 389 CoinZeroN(rowsDropped2, numberRows_); 390 #ifdef CHOL_COMPARE 391 if (numberRows_ < 200) 392 factorizePart3(rowsDropped2); 393 #endif 394 factorizePart2(rowsDropped2); 395 newDropped = integerParameters_[20]; 396 largest = doubleParameters_[3]; 397 smallest = doubleParameters_[4]; 398 if (model_>messageHandler()>logLevel() > 1) 399 COIN_DETAIL_PRINT(std::cout << "Cholesky  largest " << largest << " smallest " << smallest << std::endl); 400 choleskyCondition_ = largest / smallest; 401 /* Should save adjustments in ..R_*/ 402 int n1 = 0, n2 = 0; 403 CoinWorkDouble * primalR = model_>primalR(); 404 CoinWorkDouble * dualR = model_>dualR(); 405 for (iRow = 0; iRow < numberTotal; iRow++) { 406 if (rowsDropped2[iRow]) { 407 n1++; 408 /*printf("row region1 %d dropped\n",iRow);*/ 409 /*rowsDropped_[iRow]=1;*/ 410 rowsDropped_[iRow] = 0; 411 primalR[iRow] = doubleParameters_[20]; 412 } else { 413 rowsDropped_[iRow] = 0; 414 primalR[iRow] = 0.0; 415 } 416 } 417 for (; iRow < numberRows_; iRow++) { 418 if (rowsDropped2[iRow]) { 419 n2++; 420 /*printf("row region2 %d dropped\n",iRow);*/ 421 /*rowsDropped_[iRow]=1;*/ 422 rowsDropped_[iRow] = 0; 423 dualR[iRownumberTotal] = doubleParameters_[34]; 424 } else { 425 rowsDropped_[iRow] = 0; 426 dualR[iRownumberTotal] = 0.0; 427 } 428 } 429 } 430 return 0; 386 if (numberRows_ < 200) 387 factorizePart3(rowsDropped2); 388 #endif 389 factorizePart2(rowsDropped2); 390 newDropped = integerParameters_[20]; 391 largest = doubleParameters_[3]; 392 smallest = doubleParameters_[4]; 393 if (model_>messageHandler()>logLevel() > 1) 394 COIN_DETAIL_PRINT(std::cout << "Cholesky  largest " << largest << " smallest " << smallest << std::endl); 395 choleskyCondition_ = largest / smallest; 396 /* Should save adjustments in ..R_*/ 397 int n1 = 0, n2 = 0; 398 CoinWorkDouble *primalR = model_>primalR(); 399 CoinWorkDouble *dualR = model_>dualR(); 400 for (iRow = 0; iRow < numberTotal; iRow++) { 401 if (rowsDropped2[iRow]) { 402 n1++; 403 /*printf("row region1 %d dropped\n",iRow);*/ 404 /*rowsDropped_[iRow]=1;*/ 405 rowsDropped_[iRow] = 0; 406 primalR[iRow] = doubleParameters_[20]; 407 } else { 408 rowsDropped_[iRow] = 0; 409 primalR[iRow] = 0.0; 410 } 411 } 412 for (; iRow < numberRows_; iRow++) { 413 if (rowsDropped2[iRow]) { 414 n2++; 415 /*printf("row region2 %d dropped\n",iRow);*/ 416 /*rowsDropped_[iRow]=1;*/ 417 rowsDropped_[iRow] = 0; 418 dualR[iRow  numberTotal] = doubleParameters_[34]; 419 } else { 420 rowsDropped_[iRow] = 0; 421 dualR[iRow  numberTotal] = 0.0; 422 } 423 } 424 } 425 return 0; 431 426 } 432 427 /* Factorize  filling in rowsDropped and returning number dropped */ 433 void 434 ClpCholeskyDense::factorizePart3( int * rowsDropped) 435 { 436 int iColumn; 437 longDouble * xx = sparseFactor_; 438 longDouble * yy = diagonal_; 439 diagonal_ = sparseFactor_ + 40000; 440 sparseFactor_ = diagonal_ + numberRows_; 441 /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/ 442 CoinMemcpyN(xx, 40000, sparseFactor_); 443 CoinMemcpyN(yy, numberRows_, diagonal_); 444 int numberDropped = 0; 445 CoinWorkDouble largest = 0.0; 446 CoinWorkDouble smallest = COIN_DBL_MAX; 447 double dropValue = doubleParameters_[10]; 448 int firstPositive = integerParameters_[34]; 449 longDouble * work = sparseFactor_; 450 /* Allow for triangular*/ 451 int addOffset = numberRows_  1; 452 work; 453 for (iColumn = 0; iColumn < numberRows_; iColumn++) { 454 int iRow; 455 int addOffsetNow = numberRows_  1;; 456 longDouble * workNow = sparseFactor_  1 + iColumn; 457 CoinWorkDouble diagonalValue = diagonal_[iColumn]; 458 for (iRow = 0; iRow < iColumn; iRow++) { 459 double aj = *workNow; 460 addOffsetNow; 461 workNow += addOffsetNow; 462 diagonalValue = aj * aj * workDouble_[iRow]; 463 } 464 bool dropColumn = false; 465 if (iColumn < firstPositive) { 466 /* must be negative*/ 467 if (diagonalValue <= dropValue) { 468 smallest = CoinMin(smallest, diagonalValue); 469 largest = CoinMax(largest, diagonalValue); 470 workDouble_[iColumn] = diagonalValue; 471 diagonalValue = 1.0 / diagonalValue; 472 } else { 473 dropColumn = true; 474 workDouble_[iColumn] = 1.0e100; 475 diagonalValue = 0.0; 476 integerParameters_[20]++; 477 } 478 } else { 479 /* must be positive*/ 480 if (diagonalValue >= dropValue) { 481 smallest = CoinMin(smallest, diagonalValue); 482 largest = CoinMax(largest, diagonalValue); 483 workDouble_[iColumn] = diagonalValue; 484 diagonalValue = 1.0 / diagonalValue; 485 } else { 486 dropColumn = true; 487 workDouble_[iColumn] = 1.0e100; 488 diagonalValue = 0.0; 489 integerParameters_[20]++; 490 } 491 } 492 if (!dropColumn) { 493 diagonal_[iColumn] = diagonalValue; 494 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) { 495 double value = work[iRow]; 496 workNow = sparseFactor_  1; 497 int addOffsetNow = numberRows_  1;; 498 for (int jColumn = 0; jColumn < iColumn; jColumn++) { 499 double aj = workNow[iColumn]; 500 double multiplier = workDouble_[jColumn]; 501 double ai = workNow[iRow]; 502 addOffsetNow; 503 workNow += addOffsetNow; 504 value = aj * ai * multiplier; 505 } 506 work[iRow] = value * diagonalValue; 507 } 508 } else { 509 /* drop column*/ 510 rowsDropped[iColumn] = 2; 511 numberDropped++; 512 diagonal_[iColumn] = 0.0; 513 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) { 514 work[iRow] = 0.0; 515 } 516 } 517 addOffset; 518 work += addOffset; 519 } 520 doubleParameters_[3] = largest; 521 doubleParameters_[4] = smallest; 522 integerParameters_[20] = numberDropped; 523 sparseFactor_ = xx; 524 diagonal_ = yy; 428 void ClpCholeskyDense::factorizePart3(int *rowsDropped) 429 { 430 int iColumn; 431 longDouble *xx = sparseFactor_; 432 longDouble *yy = diagonal_; 433 diagonal_ = sparseFactor_ + 40000; 434 sparseFactor_ = diagonal_ + numberRows_; 435 /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/ 436 CoinMemcpyN(xx, 40000, sparseFactor_); 437 CoinMemcpyN(yy, numberRows_, diagonal_); 438 int numberDropped = 0; 439 CoinWorkDouble largest = 0.0; 440 CoinWorkDouble smallest = COIN_DBL_MAX; 441 double dropValue = doubleParameters_[10]; 442 int firstPositive = integerParameters_[34]; 443 longDouble *work = sparseFactor_; 444 /* Allow for triangular*/ 445 int addOffset = numberRows_  1; 446 work; 447 for (iColumn = 0; iColumn < numberRows_; iColumn++) { 448 int iRow; 449 int addOffsetNow = numberRows_  1; 450 ; 451 longDouble *workNow = sparseFactor_  1 + iColumn; 452 CoinWorkDouble diagonalValue = diagonal_[iColumn]; 453 for (iRow = 0; iRow < iColumn; iRow++) { 454 double aj = *workNow; 455 addOffsetNow; 456 workNow += addOffsetNow; 457 diagonalValue = aj * aj * workDouble_[iRow]; 458 } 459 bool dropColumn = false; 460 if (iColumn < firstPositive) { 461 /* must be negative*/ 462 if (diagonalValue <= dropValue) { 463 smallest = CoinMin(smallest, diagonalValue); 464 largest = CoinMax(largest, diagonalValue); 465 workDouble_[iColumn] = diagonalValue; 466 diagonalValue = 1.0 / diagonalValue; 467 } else { 468 dropColumn = true; 469 workDouble_[iColumn] = 1.0e100; 470 diagonalValue = 0.0; 471 integerParameters_[20]++; 472 } 473 } else { 474 /* must be positive*/ 475 if (diagonalValue >= dropValue) { 476 smallest = CoinMin(smallest, diagonalValue); 477 largest = CoinMax(largest, diagonalValue); 478 workDouble_[iColumn] = diagonalValue; 479 diagonalValue = 1.0 / diagonalValue; 480 } else { 481 dropColumn = true; 482 workDouble_[iColumn] = 1.0e100; 483 diagonalValue = 0.0; 484 integerParameters_[20]++; 485 } 486 } 487 if (!dropColumn) { 488 diagonal_[iColumn] = diagonalValue; 489 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) { 490 double value = work[iRow]; 491 workNow = sparseFactor_  1; 492 int addOffsetNow = numberRows_  1; 493 ; 494 for (int jColumn = 0; jColumn < iColumn; jColumn++) { 495 double aj = workNow[iColumn]; 496 double multiplier = workDouble_[jColumn]; 497 double ai = workNow[iRow]; 498 addOffsetNow; 499 workNow += addOffsetNow; 500 value = aj * ai * multiplier; 501 } 502 work[iRow] = value * diagonalValue; 503 } 504 } else { 505 /* drop column*/ 506 rowsDropped[iColumn] = 2; 507 numberDropped++; 508 diagonal_[iColumn] = 0.0; 509 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) { 510 work[iRow] = 0.0; 511 } 512 } 513 addOffset; 514 work += addOffset; 515 } 516 doubleParameters_[3] = largest; 517 doubleParameters_[4] = smallest; 518 integerParameters_[20] = numberDropped; 519 sparseFactor_ = xx; 520 diagonal_ = yy; 525 521 } 526 522 /*#define POS_DEBUG*/ 527 523 #ifdef POS_DEBUG 528 524 static int counter = 0; 529 int ClpCholeskyDense::bNumber(const longDouble * 530 { 531 532 longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks;533 534 assert((k % BLOCKSQ) == 0);535 536 537 538 539 540 541 542 543 544 545 546 if(counter > 100000)547 548 525 int ClpCholeskyDense::bNumber(const longDouble *array, int &iRow, int &iCol) 526 { 527 int numberBlocks = (numberRows_ + BLOCK  1) >> BLOCKSHIFT; 528 longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks; 529 int k = array  a; 530 assert((k % BLOCKSQ) == 0); 531 iCol = 0; 532 int kk = k >> BLOCKSQSHIFT; 533 /*printf("%d %d %d %d\n",k,kk,BLOCKSQ,BLOCKSQSHIFT);*/ 534 k = kk; 535 while (k >= numberBlocks) { 536 iCol++; 537 k = numberBlocks; 538 numberBlocks; 539 } 540 iRow = iCol + k; 541 counter++; 542 if (counter > 100000) 543 exit(77); 544 return kk; 549 545 } 550 546 #endif 551 547 /* Factorize  filling in rowsDropped and returning number dropped */ 552 void 553 ClpCholeskyDense::factorizePart2( int * rowsDropped) 554 { 555 int iColumn; 556 /*longDouble * xx = sparseFactor_;*/ 557 /*longDouble * yy = diagonal_;*/ 558 /*diagonal_ = sparseFactor_ + 40000;*/ 559 /*sparseFactor_=diagonal_ + numberRows_;*/ 560 /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/ 561 /*memcpy(sparseFactor_,xx,40000*sizeof(double));*/ 562 /*memcpy(diagonal_,yy,numberRows_*sizeof(double));*/ 563 int numberBlocks = (numberRows_ + BLOCK  1) >> BLOCKSHIFT; 564 /* later align on boundary*/ 565 longDouble * a = sparseFactor_ + BLOCKSQ * numberBlocks; 566 int n = numberRows_; 567 int nRound = numberRows_ & (~(BLOCK  1)); 568 /* adjust if exact*/ 569 if (nRound == n) 570 nRound = BLOCK; 571 int sizeLastBlock = n  nRound; 572 int get = n * (n  1) / 2; /* ? as no diagonal*/ 573 int block = numberBlocks * (numberBlocks + 1) / 2; 574 int ifOdd; 575 int rowLast; 576 if (sizeLastBlock != BLOCK) { 577 longDouble * aa = &a[(block1)*BLOCKSQ]; 578 rowLast = nRound  1; 579 ifOdd = 1; 580 int put = BLOCKSQ; 581 /* do last separately*/ 582 put = (BLOCK  sizeLastBlock) * (BLOCK + 1); 583 for (iColumn = numberRows_  1; iColumn >= nRound; iColumn) { 584 int put2 = put; 585 put = BLOCK; 586 for (int iRow = numberRows_  1; iRow > iColumn; iRow) { 587 aa[put2] = sparseFactor_[get]; 588 assert (aa + put2 >= sparseFactor_ + get); 589 } 590 /* save diagonal as well*/ 591 aa[put2] = diagonal_[iColumn]; 592 } 593 n = nRound; 594 block; 595 } else { 596 /* exact fit*/ 597 rowLast = numberRows_  1; 598 ifOdd = 0; 599 } 600 /* Now main loop*/ 601 int nBlock = 0; 602 for (; n > 0; n = BLOCK) { 603 longDouble * aa = &a[(block1)*BLOCKSQ]; 604 longDouble * aaLast = NULL; 605 int put = BLOCKSQ; 606 int putLast = 0; 607 /* see if we have small block*/ 608 if (ifOdd) { 609 aaLast = &a[(block1)*BLOCKSQ]; 610 aa = aaLast  BLOCKSQ; 611 putLast = BLOCKSQ  BLOCK + sizeLastBlock; 612 } 613 for (iColumn = n  1; iColumn >= n  BLOCK; iColumn) { 614 if (aaLast) { 615 /* last bit*/ 616 for (int iRow = numberRows_  1; iRow > rowLast; iRow) { 617 aaLast[putLast] = sparseFactor_[get]; 618 assert (aaLast + putLast >= sparseFactor_ + get); 619 } 620 putLast = BLOCK  sizeLastBlock; 621 } 622 longDouble * aPut = aa; 623 int j = rowLast; 624 for (int jBlock = 0; jBlock <= nBlock; jBlock++) { 625 int put2 = put; 626 int last = CoinMax(j  BLOCK, iColumn); 627 for (int iRow = j; iRow > last; iRow) { 628 aPut[put2] = sparseFactor_[get]; 629 assert (aPut + put2 >= sparseFactor_ + get); 630 } 631 if (j  BLOCK < iColumn) { 632 /* save diagonal as well*/ 633 aPut[put2] = diagonal_[iColumn]; 634 } 635 j = BLOCK; 636 aPut = BLOCKSQ; 637 } 638 put = BLOCK; 639 } 640 nBlock++; 641 block = nBlock + ifOdd; 642 } 643 ClpCholeskyDenseC info; 644 info.diagonal_ = diagonal_; 645 info.doubleParameters_[0] = doubleParameters_[10]; 646 info.integerParameters_[0] = integerParameters_[34]; 548 void ClpCholeskyDense::factorizePart2(int *rowsDropped) 549 { 550 int iColumn; 551 /*longDouble * xx = sparseFactor_;*/ 552 /*longDouble * yy = diagonal_;*/ 553 /*diagonal_ = sparseFactor_ + 40000;*/ 554 /*sparseFactor_=diagonal_ + numberRows_;*/ 555 /*memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));*/ 556 /*memcpy(sparseFactor_,xx,40000*sizeof(double));*/ 557 /*memcpy(diagonal_,yy,numberRows_*sizeof(double));*/ 558 int numberBlocks = (numberRows_ + BLOCK  1) >> BLOCKSHIFT; 559 /* later align on boundary*/ 560 longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks; 561 int n = numberRows_; 562 int nRound = numberRows_ & (~(BLOCK  1)); 563 /* adjust if exact*/ 564 if (nRound == n) 565 nRound = BLOCK; 566 int sizeLastBlock = n  nRound; 567 int get = n * (n  1) / 2; /* ? as no diagonal*/ 568 int block = numberBlocks * (numberBlocks + 1) / 2; 569 int ifOdd; 570 int rowLast; 571 if (sizeLastBlock != BLOCK) { 572 longDouble *aa = &a[(block  1) * BLOCKSQ]; 573 rowLast = nRound  1; 574 ifOdd = 1; 575 int put = BLOCKSQ; 576 /* do last separately*/ 577 put = (BLOCK  sizeLastBlock) * (BLOCK + 1); 578 for (iColumn = numberRows_  1; iColumn >= nRound; iColumn) { 579 int put2 = put; 580 put = BLOCK; 581 for (int iRow = numberRows_  1; iRow > iColumn; iRow) { 582 aa[put2] = sparseFactor_[get]; 583 assert(aa + put2 >= sparseFactor_ + get); 584 } 585 /* save diagonal as well*/ 586 aa[put2] = diagonal_[iColumn]; 587 } 588 n = nRound; 589 block; 590 } else { 591 /* exact fit*/ 592 rowLast = numberRows_  1; 593 ifOdd = 0; 594 } 595 /* Now main loop*/ 596 int nBlock = 0; 597 for (; n > 0; n = BLOCK) { 598 longDouble *aa = &a[(block  1) * BLOCKSQ]; 599 longDouble *aaLast = NULL; 600 int put = BLOCKSQ; 601 int putLast = 0; 602 /* see if we have small block*/ 603 if (ifOdd) { 604 aaLast = &a[(block  1) * BLOCKSQ]; 605 aa = aaLast  BLOCKSQ; 606 putLast = BLOCKSQ  BLOCK + sizeLastBlock; 607 } 608 for (iColumn = n  1; iColumn >= n  BLOCK; iColumn) { 609 if (aaLast) { 610 /* last bit*/ 611 for (int iRow = numberRows_  1; iRow > rowLast; iRow) { 612 aaLast[putLast] = sparseFactor_[get]; 613 assert(aaLast + putLast >= sparseFactor_ + get); 614 } 615 putLast = BLOCK  sizeLastBlock; 616 } 617 longDouble *aPut = aa; 618 int j = rowLast; 619 for (int jBlock = 0; jBlock <= nBlock; jBlock++) { 620 int put2 = put; 621 int last = CoinMax(j  BLOCK, iColumn); 622 for (int iRow = j; iRow > last; iRow) { 623 aPut[put2] = sparseFactor_[get]; 624 assert(aPut + put2 >= sparseFactor_ + get); 625 } 626 if (j  BLOCK < iColumn) { 627 /* save diagonal as well*/ 628 aPut[put2] = diagonal_[iColumn]; 629 } 630 j = BLOCK; 631 aPut = BLOCKSQ; 632 } 633 put = BLOCK; 634 } 635 nBlock++; 636 block = nBlock + ifOdd; 637 } 638 ClpCholeskyDenseC info; 639 info.diagonal_ = diagonal_; 640 info.doubleParameters_[0] = doubleParameters_[10]; 641 info.integerParameters_[0] = integerParameters_[34]; 647 642 #ifndef CLP_CILK 648 649 643 ClpCholeskyCfactor(&info, a, numberRows_, numberBlocks, 644 diagonal_, workDouble_, rowsDropped); 650 645 #else 651 652 653 654 655 656 657 658 #endif 659 660 661 662 663 664 665 666 667 668 669 670 671 672 646 info.a = a; 647 info.n = numberRows_; 648 info.numberBlocks = numberBlocks; 649 info.work = workDouble_; 650 info.rowsDropped = rowsDropped; 651 info.integerParameters_[1] = model_>numberThreads(); 652 ClpCholeskySpawn(&info); 653 #endif 654 double largest = 0.0; 655 double smallest = COIN_DBL_MAX; 656 int numberDropped = 0; 657 for (int i = 0; i < numberRows_; i++) { 658 if (diagonal_[i]) { 659 largest = CoinMax(largest, CoinAbs(diagonal_[i])); 660 smallest = CoinMin(smallest, CoinAbs(diagonal_[i])); 661 } else { 662 numberDropped++; 663 } 664 } 665 doubleParameters_[3] = CoinMax(doubleParameters_[3], 1.0 / smallest); 666 doubleParameters_[4] = CoinMin(doubleParameters_[4], 1.0 / largest); 667 integerParameters_[20] += numberDropped; 673 668 } 674 669 /* Non leaf recursive factor*/ 675 void 676 ClpCholeskyCfactor(ClpCholeskyDenseC * thisStruct, longDouble * a, int n, int numberBlocks, 677 longDouble * diagonal, longDouble * work, int * rowsDropped) 678 { 679 if (n <= BLOCK) { 680 ClpCholeskyCfactorLeaf(thisStruct, a, n, diagonal, work, rowsDropped); 681 } else { 682 int nb = number_blocks((n + 1) >> 1); 683 int nThis = number_rows(nb); 684 longDouble * aother; 685 int nLeft = n  nThis; 686 int nintri = (nb * (nb + 1)) >> 1; 687 int nbelow = (numberBlocks  nb) * nb; 688 ClpCholeskyCfactor(thisStruct, a, nThis, numberBlocks, diagonal, work, rowsDropped); 689 ClpCholeskyCtriRec(thisStruct, a, nThis, a + number_entries(nb), diagonal, work, nLeft, nb, 0, numberBlocks); 690 aother = a + number_entries(nintri + nbelow); 691 ClpCholeskyCrecTri(thisStruct, a + number_entries(nb), nLeft, nThis, nb, 0, aother, diagonal, work, numberBlocks); 692 ClpCholeskyCfactor(thisStruct, aother, nLeft, 693 numberBlocks  nb, diagonal + nThis, work + nThis, rowsDropped); 694 } 670 void ClpCholeskyCfactor(ClpCholeskyDenseC *thisStruct, longDouble *a, int n, int numberBlocks, 671 longDouble *diagonal, longDouble *work, int *rowsDropped) 672 { 673 if (n <= BLOCK) { 674 ClpCholeskyCfactorLeaf(thisStruct, a, n, diagonal, work, rowsDropped); 675 } else { 676 int nb = number_blocks((n + 1) >> 1); 677 int nThis = number_rows(nb); 678 longDouble *aother; 679 int nLeft = n  nThis; 680 int nintri = (nb * (nb + 1)) >> 1; 681 int nbelow = (numberBlocks  nb) * nb; 682 ClpCholeskyCfactor(thisStruct, a, nThis, numberBlocks, diagonal, work, rowsDropped); 683 ClpCholeskyCtriRec(thisStruct, a, nThis, a + number_entries(nb), diagonal, work, nLeft, nb, 0, numberBlocks); 684 aother = a + number_entries(nintri + nbelow); 685 ClpCholeskyCrecTri(thisStruct, a + number_entries(nb), nLeft, nThis, nb, 0, aother, diagonal, work, numberBlocks); 686 ClpCholeskyCfactor(thisStruct, aother, nLeft, 687 numberBlocks  nb, diagonal + nThis, work + nThis, rowsDropped); 688 } 695 689 } 696 690 /* Non leaf recursive triangle rectangle update*/ 697 void 698 ClpCholeskyCtriRec(ClpCholeskyDenseC * thisStruct, longDouble * aTri, int nThis, longDouble * aUnder, 699 longDouble * diagonal, longDouble * work, 700 int nLeft, int iBlock, int jBlock, 701 int numberBlocks) 702 { 703 if (nThis <= BLOCK && nLeft <= BLOCK) { 704 ClpCholeskyCtriRecLeaf(/*thisStruct,*/ aTri, aUnder, diagonal, work, nLeft); 705 } else if (nThis < nLeft) { 706 int nb = number_blocks((nLeft + 1) >> 1); 707 int nLeft2 = number_rows(nb); 708 cilk_spawn ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder, diagonal, work, nLeft2, iBlock, jBlock, numberBlocks); 709 ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder + number_entries(nb), diagonal, work, nLeft  nLeft2, 710 iBlock + nb, jBlock, numberBlocks); 711 cilk_sync; 712 } else { 713 int nb = number_blocks((nThis + 1) >> 1); 714 int nThis2 = number_rows(nb); 715 longDouble * aother; 716 int kBlock = jBlock + nb; 717 int i; 718 int nintri = (nb * (nb + 1)) >> 1; 719 int nbelow = (numberBlocks  nb) * nb; 720 ClpCholeskyCtriRec(thisStruct, aTri, nThis2, aUnder, diagonal, work, nLeft, iBlock, jBlock, numberBlocks); 721 /* and rectangular update */ 722 i = ((numberBlocks  jBlock) * (numberBlocks  jBlock  1)  723 (numberBlocks  jBlock  nb) * (numberBlocks  jBlock  nb  1)) >> 1; 724 aother = aUnder + number_entries(i); 725 ClpCholeskyCrecRec(thisStruct, aTri + number_entries(nb), nThis  nThis2, nLeft, nThis2, aUnder, aother, 726 work, kBlock, jBlock, numberBlocks); 727 ClpCholeskyCtriRec(thisStruct, aTri + number_entries(nintri + nbelow), nThis  nThis2, aother, diagonal + nThis2, 728 work + nThis2, nLeft, 729 iBlock  nb, kBlock  nb, numberBlocks  nb); 730 } 691 void ClpCholeskyCtriRec(ClpCholeskyDenseC *thisStruct, longDouble *aTri, int nThis, longDouble *aUnder, 692 longDouble *diagonal, longDouble *work, 693 int nLeft, int iBlock, int jBlock, 694 int numberBlocks) 695 { 696 if (nThis <= BLOCK && nLeft <= BLOCK) { 697 ClpCholeskyCtriRecLeaf(/*thisStruct,*/ aTri, aUnder, diagonal, work, nLeft); 698 } else if (nThis < nLeft) { 699 int nb = number_blocks((nLeft + 1) >> 1); 700 int nLeft2 = number_rows(nb); 701 cilk_spawn ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder, diagonal, work, nLeft2, iBlock, jBlock, numberBlocks); 702 ClpCholeskyCtriRec(thisStruct, aTri, nThis, aUnder + number_entries(nb), diagonal, work, nLeft  nLeft2, 703 iBlock + nb, jBlock, numberBlocks); 704 cilk_sync; 705 } else { 706 int nb = number_blocks((nThis + 1) >> 1); 707 int nThis2 = number_rows(nb); 708 longDouble *aother; 709 int kBlock = jBlock + nb; 710 int i; 711 int nintri = (nb * (nb + 1)) >> 1; 712 int nbelow = (numberBlocks  nb) * nb; 713 ClpCholeskyCtriRec(thisStruct, aTri, nThis2, aUnder, diagonal, work, nLeft, iBlock, jBlock, numberBlocks); 714 /* and rectangular update */ 715 i = ((numberBlocks  jBlock) * (numberBlocks  jBlock  1)  (numberBlocks  jBlock  nb) * (numberBlocks  jBlock  nb  1)) >> 1; 716 aother = aUnder + number_entries(i); 717 ClpCholeskyCrecRec(thisStruct, aTri + number_entries(nb), nThis  nThis2, nLeft, nThis2, aUnder, aother, 718 work, kBlock, jBlock, numberBlocks); 719 ClpCholeskyCtriRec(thisStruct, aTri + number_entries(nintri + nbelow), nThis  nThis2, aother, diagonal + nThis2, 720 work + nThis2, nLeft, 721 iBlock  nb, kBlock  nb, numberBlocks  nb); 722 } 731 723 } 732 724 /* Non leaf recursive rectangle triangle update*/ 733 void 734 ClpCholeskyCrecTri(ClpCholeskyDenseC * thisStruct, longDouble * aUnder, int nTri, int nDo, 735 int iBlock, int jBlock, longDouble * aTri, 736 longDouble * diagonal, longDouble * work, 737 int numberBlocks) 738 { 739 if (nTri <= BLOCK && nDo <= BLOCK) { 740 ClpCholeskyCrecTriLeaf(/*thisStruct,*/ aUnder, aTri,/*diagonal,*/work, nTri); 741 } else if (nTri < nDo) { 742 int nb = number_blocks((nDo + 1) >> 1); 743 int nDo2 = number_rows(nb); 744 longDouble * aother; 745 int i; 746 ClpCholeskyCrecTri(thisStruct, aUnder, nTri, nDo2, iBlock, jBlock, aTri, diagonal, work, numberBlocks); 747 i = ((numberBlocks  jBlock) * (numberBlocks  jBlock  1)  748 (numberBlocks  jBlock  nb) * (numberBlocks  jBlock  nb  1)) >> 1; 749 aother = aUnder + number_entries(i); 750 ClpCholeskyCrecTri(thisStruct, aother, nTri, nDo  nDo2, iBlock  nb, jBlock, aTri, diagonal + nDo2, 751 work + nDo2, numberBlocks  nb); 752 } else { 753 int nb = number_blocks((nTri + 1) >> 1); 754 int nTri2 = number_rows(nb); 755 longDouble * aother; 756 int i; 757 cilk_spawn ClpCholeskyCrecTri(thisStruct, aUnder, nTri2, nDo, iBlock, jBlock, aTri, diagonal, work, numberBlocks); 758 /* and rectangular update */ 759 i = ((numberBlocks  iBlock) * (numberBlocks  iBlock + 1)  760 (numberBlocks  iBlock  nb) * (numberBlocks  iBlock  nb + 1)) >> 1; 761 aother = aTri + number_entries(nb); 762 ClpCholeskyCrecRec(thisStruct, aUnder, nTri2, nTri  nTri2, nDo, aUnder + number_entries(nb), aother, 763 work, iBlock, jBlock, numberBlocks); 764 ClpCholeskyCrecTri(thisStruct, aUnder + number_entries(nb), nTri  nTri2, nDo, iBlock + nb, jBlock, 765 aTri + number_entries(i), diagonal, work, numberBlocks); 766 cilk_sync; 767 } 725 void ClpCholeskyCrecTri(ClpCholeskyDenseC *thisStruct, longDouble *aUnder, int nTri, int nDo, 726 int iBlock, int jBlock, longDouble *aTri, 727 longDouble *diagonal, longDouble *work, 728 int numberBlocks) 729 { 730 if (nTri <= BLOCK && nDo <= BLOCK) { 731 ClpCholeskyCrecTriLeaf(/*thisStruct,*/ aUnder, aTri, /*diagonal,*/ work, nTri); 732 } else if (nTri < nDo) { 733 int nb = number_blocks((nDo + 1) >> 1); 734 int nDo2 = number_rows(nb); 735 longDouble *aother; 736 int i; 737 ClpCholeskyCrecTri(thisStruct, aUnder, nTri, nDo2, iBlock, jBlock, aTri, diagonal, work, numberBlocks); 738 i = ((numberBlocks  jBlock) * (numberBlocks  jBlock  1)  (numberBlocks  jBlock  nb) * (numberBlocks  jBlock  nb  1)) >> 1; 739 aother = aUnder + number_entries(i); 740 ClpCholeskyCrecTri(thisStruct, aother, nTri, nDo  nDo2, iBlock  nb, jBlock, aTri, diagonal + nDo2, 741 work + nDo2, numberBlocks  nb); 742 } else { 743 int nb = number_blocks((nTri + 1) >> 1); 744 int nTri2 = number_rows(nb); 745 longDouble *aother; 746 int i; 747 cilk_spawn ClpCholeskyCrecTri(thisStruct, aUnder, nTri2, nDo, iBlock, jBlock, aTri, diagonal, work, numberBlocks); 748 /* and rectangular update */ 749 i = ((numberBlocks  iBlock) * (numberBlocks  iBlock + 1)  (numberBlocks  iBlock  nb) * (numberBlocks  iBlock  nb + 1)) >> 1; 750 aother = aTri + number_entries(nb); 751 ClpCholeskyCrecRec(thisStruct, aUnder, nTri2, nTri  nTri2, nDo, aUnder + number_entries(nb), aother, 752 work, iBlock, jBlock, numberBlocks); 753 ClpCholeskyCrecTri(thisStruct, aUnder + number_entries(nb), nTri  nTri2, nDo, iBlock + nb, jBlock, 754 aTri + number_entries(i), diagonal, work, numberBlocks); 755 cilk_sync; 756 } 768 757 } 769 758 /* Non leaf recursive rectangle rectangle update, … … 771 760 nUnderK is number of rows in kBlock 772 761 */ 773 void 774 ClpCholeskyCrecRec(ClpCholeskyDenseC * thisStruct, longDouble * above, int nUnder, int nUnderK, 775 int nDo, longDouble * aUnder, longDouble *aOther, 776 longDouble * work, 777 int iBlock, int jBlock, 778 int numberBlocks) 779 { 780 if (nDo <= BLOCK && nUnder <= BLOCK && nUnderK <= BLOCK) { 781 assert (nDo == BLOCK && nUnder == BLOCK); 782 ClpCholeskyCrecRecLeaf(/*thisStruct,*/ above , aUnder , aOther, work, nUnderK); 783 } else if (nDo <= nUnderK && nUnder <= nUnderK) { 784 int nb = number_blocks((nUnderK + 1) >> 1); 785 int nUnder2 = number_rows(nb); 786 cilk_spawn ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnder2, nDo, aUnder, aOther, work, 787 iBlock, jBlock, numberBlocks); 788 ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK  nUnder2, nDo, aUnder + number_entries(nb), 789 aOther + number_entries(nb), work, iBlock, jBlock, numberBlocks); 790 cilk_sync; 791 } else if (nUnderK <= nDo && nUnder <= nDo) { 792 int nb = number_blocks((nDo + 1) >> 1); 793 int nDo2 = number_rows(nb); 794 int i; 795 ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK, nDo2, aUnder, aOther, work, 796 iBlock, jBlock, numberBlocks); 797 i = ((numberBlocks  jBlock) * (numberBlocks  jBlock  1)  798 (numberBlocks  jBlock  nb) * (numberBlocks  jBlock  nb  1)) >> 1; 799 ClpCholeskyCrecRec(thisStruct, above + number_entries(i), nUnder, nUnderK, nDo  nDo2, 800 aUnder + number_entries(i), 801 aOther, work + nDo2, 802 iBlock  nb, jBlock, numberBlocks  nb); 803 } else { 804 int nb = number_blocks((nUnder + 1) >> 1); 805 int nUnder2 = number_rows(nb); 806 int i; 807 cilk_spawn ClpCholeskyCrecRec(thisStruct, above, nUnder2, nUnderK, nDo, aUnder, aOther, work, 808 iBlock, jBlock, numberBlocks); 809 i = ((numberBlocks  iBlock) * (numberBlocks  iBlock  1)  810 (numberBlocks  iBlock  nb) * (numberBlocks  iBlock  nb  1)) >> 1; 811 ClpCholeskyCrecRec(thisStruct, above + number_entries(nb), nUnder  nUnder2, nUnderK, nDo, aUnder, 812 aOther + number_entries(i), work, iBlock + nb, jBlock, numberBlocks); 813 cilk_sync; 814 } 762 void ClpCholeskyCrecRec(ClpCholeskyDenseC *thisStruct, longDouble *above, int nUnder, int nUnderK, 763 int nDo, longDouble *aUnder, longDouble *aOther, 764 longDouble *work, 765 int iBlock, int jBlock, 766 int numberBlocks) 767 { 768 if (nDo <= BLOCK && nUnder <= BLOCK && nUnderK <= BLOCK) { 769 assert(nDo == BLOCK && nUnder == BLOCK); 770 ClpCholeskyCrecRecLeaf(/*thisStruct,*/ above, aUnder, aOther, work, nUnderK); 771 } else if (nDo <= nUnderK && nUnder <= nUnderK) { 772 int nb = number_blocks((nUnderK + 1) >> 1); 773 int nUnder2 = number_rows(nb); 774 cilk_spawn ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnder2, nDo, aUnder, aOther, work, 775 iBlock, jBlock, numberBlocks); 776 ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK  nUnder2, nDo, aUnder + number_entries(nb), 777 aOther + number_entries(nb), work, iBlock, jBlock, numberBlocks); 778 cilk_sync; 779 } else if (nUnderK <= nDo && nUnder <= nDo) { 780 int nb = number_blocks((nDo + 1) >> 1); 781 int nDo2 = number_rows(nb); 782 int i; 783 ClpCholeskyCrecRec(thisStruct, above, nUnder, nUnderK, nDo2, aUnder, aOther, work, 784 iBlock, jBlock, numberBlocks); 785 i = ((numberBlocks  jBlock) * (numberBlocks  jBlock  1)  (numberBlocks  jBlock  nb) * (numberBlocks  jBlock  nb  1)) >> 1; 786 ClpCholeskyCrecRec(thisStruct, above + number_entries(i), nUnder, nUnderK, nDo  nDo2, 787 aUnder + number_entries(i), 788 aOther, work + nDo2, 789 iBlock  nb, jBlock, numberBlocks  nb); 790 } else { 791 int nb = number_blocks((nUnder + 1) >> 1); 792 int nUnder2 = number_rows(nb); 793 int i; 794 cilk_spawn ClpCholeskyCrecRec(thisStruct, above, nUnder2, nUnderK, nDo, aUnder, aOther, work, 795 iBlock, jBlock, numberBlocks); 796 i = ((numberBlocks  iBlock) * (numberBlocks  iBlock  1)  (numberBlocks  iBlock  nb) * (numberBlocks  iBlock  nb  1)) >> 1; 797 ClpCholeskyCrecRec(thisStruct, above + number_entries(nb), nUnder  nUnder2, nUnderK, nDo, aUnder, 798 aOther + number_entries(i), work, iBlock + nb, jBlock, numberBlocks); 799 cilk_sync; 800 } 815 801 } 816 802 /* Leaf recursive factor*/ 817 void 818 ClpCholeskyCfactorLeaf(ClpCholeskyDenseC * thisStruct, longDouble * a, int n, 819 longDouble * diagonal, longDouble * work, int * rowsDropped) 820 { 821 double dropValue = thisStruct>doubleParameters_[0]; 822 int firstPositive = thisStruct>integerParameters_[0]; 823 int rowOffset = static_cast<int>(diagonal  thisStruct>diagonal_); 824 int i, j, k; 825 CoinWorkDouble t00, temp1; 826 longDouble * aa; 827 aa = a  BLOCK; 828 for (j = 0; j < n; j ++) { 829 bool dropColumn; 830 CoinWorkDouble useT00; 831 aa += BLOCK; 832 t00 = aa[j]; 833 for (k = 0; k < j; ++k) { 834 CoinWorkDouble multiplier = work[k]; 835 t00 = a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier; 836 } 837 dropColumn = false; 838 useT00 = t00; 839 if (j + rowOffset < firstPositive) { 840 /* must be negative*/ 841 if (t00 <= dropValue) { 842 /*aa[j]=t00;*/ 843 t00 = 1.0 / t00; 844 } else { 845 dropColumn = true; 846 /*aa[j]=1.0e100;*/ 847 useT00 = 1.0e100; 848 t00 = 0.0; 849 } 850 } else { 851 /* must be positive*/ 852 if (t00 >= dropValue) { 853 /*aa[j]=t00;*/ 854 t00 = 1.0 / t00; 855 } else { 856 dropColumn = true; 857 /*aa[j]=1.0e100;*/ 858 useT00 = 1.0e100; 859 t00 = 0.0; 860 } 861 } 862 if (!dropColumn) { 863 diagonal[j] = t00; 864 work[j] = useT00; 865 temp1 = t00; 866 for (i = j + 1; i < n; i++) { 867 t00 = aa[i]; 868 for (k = 0; k < j; ++k) { 869 CoinWorkDouble multiplier = work[k]; 870 t00 = a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier; 871 } 872 aa[i] = t00 * temp1; 873 } 874 } else { 875 /* drop column*/ 876 rowsDropped[j+rowOffset] = 2; 877 diagonal[j] = 0.0; 878 /*aa[j]=1.0e100;*/ 879 work[j] = 1.0e100; 880 for (i = j + 1; i < n; i++) { 881 aa[i] = 0.0; 882 } 883 } 884 } 803 void ClpCholeskyCfactorLeaf(ClpCholeskyDenseC *thisStruct, longDouble *a, int n, 804 longDouble *diagonal, longDouble *work, int *rowsDropped) 805 { 806 double dropValue = thisStruct>doubleParameters_[0]; 807 int firstPositive = thisStruct>integerParameters_[0]; 808 int rowOffset = static_cast< int >(diagonal  thisStruct>diagonal_); 809 int i, j, k; 810 CoinWorkDouble t00, temp1; 811 longDouble *aa; 812 aa = a  BLOCK; 813 for (j = 0; j < n; j++) { 814 bool dropColumn; 815 CoinWorkDouble useT00; 816 aa += BLOCK; 817 t00 = aa[j]; 818 for (k = 0; k < j; ++k) { 819 CoinWorkDouble multiplier = work[k]; 820 t00 = a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier; 821 } 822 dropColumn = false; 823 useT00 = t00; 824 if (j + rowOffset < firstPositive) { 825 /* must be negative*/ 826 if (t00 <= dropValue) { 827 /*aa[j]=t00;*/ 828 t00 = 1.0 / t00; 829 } else { 830 dropColumn = true; 831 /*aa[j]=1.0e100;*/ 832 useT00 = 1.0e100; 833 t00 = 0.0; 834 } 835 } else { 836 /* must be positive*/ 837 if (t00 >= dropValue) { 838 /*aa[j]=t00;*/ 839 t00 = 1.0 / t00; 840 } else { 841 dropColumn = true; 842 /*aa[j]=1.0e100;*/ 843 useT00 = 1.0e100; 844 t00 = 0.0; 845 } 846 } 847 if (!dropColumn) { 848 diagonal[j] = t00; 849 work[j] = useT00; 850 temp1 = t00; 851 for (i = j + 1; i < n; i++) { 852 t00 = aa[i]; 853 for (k = 0; k < j; ++k) { 854 CoinWorkDouble multiplier = work[k]; 855 t00 = a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier; 856 } 857 aa[i] = t00 * temp1; 858 } 859 } else { 860 /* drop column*/ 861 rowsDropped[j + rowOffset] = 2; 862 diagonal[j] = 0.0; 863 /*aa[j]=1.0e100;*/ 864 work[j] = 1.0e100; 865 for (i = j + 1; i < n; i++) { 866 aa[i] = 0.0; 867 } 868 } 869 } 885 870 } 886 871 /* Leaf recursive triangle rectangle update*/ 887 void 888 ClpCholeskyCtriRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble * aTri, longDouble * aUnder, longDouble * diagonal, longDouble * work, 889 int nUnder) 872 void ClpCholeskyCtriRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble *aTri, longDouble *aUnder, longDouble *diagonal, longDouble *work, 873 int nUnder) 890 874 { 891 875 #ifdef POS_DEBUG 892 893 894 895 896 897 898 899 assert(diagonal == thisStruct>diagonal_ + ict * BLOCK);900 #endif 901 902 longDouble *aa;876 int iru, icu; 877 int iu = bNumber(aUnder, iru, icu); 878 int irt, ict; 879 int it = bNumber(aTri, irt, ict); 880 /*printf("%d %d\n",iu,it);*/ 881 printf("trirecleaf under (%d,%d), tri (%d,%d)\n", 882 iru, icu, irt, ict); 883 assert(diagonal == thisStruct>diagonal_ + ict * BLOCK); 884 #endif 885 int j; 886 longDouble *aa; 903 887 #ifdef BLOCKUNROLL 904 905 906 907 908 909 CoinWorkDouble temp1 = diagonal[j+1];910 911 for (i = 0; i < BLOCK; i += 2) {912 913 CoinWorkDouble t00 = aUnder[i+j*BLOCK];914 CoinWorkDouble t10 = aUnder[i+ BLOCK + j*BLOCK];915 CoinWorkDouble t01 = aUnder[i+1+j*BLOCK];916 CoinWorkDouble t11 = aUnder[i+1+ BLOCK + j*BLOCK];917 918 919 920 921 922 923 924 925 926 927 928 929 930 at1 = aTri[j + 1 + j*BLOCK] * work[j];931 932 933 934 aUnder[i+j*BLOCK] = t00;935 aUnder[i+1+j*BLOCK] = t01;936 aUnder[i+ BLOCK + j*BLOCK] = t10 * temp1;937 aUnder[i+1+ BLOCK + j*BLOCK] = t11 * temp1;938 939 940 941 #endif 942 943 for (j = 0; j < BLOCK; j++) {944 945 946 947 948 949 CoinWorkDouble t00 = aUnder[i+j*BLOCK];950 for (k = 0; k < j; ++k) {951 952 953 954 aUnder[i+j*BLOCK] = t00 * temp1;955 956 888 if (nUnder == BLOCK) { 889 aa = aTri  2 * BLOCK; 890 for (j = 0; j < BLOCK; j += 2) { 891 int i; 892 CoinWorkDouble temp0 = diagonal[j]; 893 CoinWorkDouble temp1 = diagonal[j + 1]; 894 aa += 2 * BLOCK; 895 for (i = 0; i < BLOCK; i += 2) { 896 CoinWorkDouble at1; 897 CoinWorkDouble t00 = aUnder[i + j * BLOCK]; 898 CoinWorkDouble t10 = aUnder[i + BLOCK + j * BLOCK]; 899 CoinWorkDouble t01 = aUnder[i + 1 + j * BLOCK]; 900 CoinWorkDouble t11 = aUnder[i + 1 + BLOCK + j * BLOCK]; 901 int k; 902 for (k = 0; k < j; ++k) { 903 CoinWorkDouble multiplier = work[k]; 904 CoinWorkDouble au0 = aUnder[i + k * BLOCK] * multiplier; 905 CoinWorkDouble au1 = aUnder[i + 1 + k * BLOCK] * multiplier; 906 CoinWorkDouble at0 = aTri[j + k * BLOCK]; 907 CoinWorkDouble at1 = aTri[j + 1 + k * BLOCK]; 908 t00 = au0 * at0; 909 t10 = au0 * at1; 910 t01 = au1 * at0; 911 t11 = au1 * at1; 912 } 913 t00 *= temp0; 914 at1 = aTri[j + 1 + j * BLOCK] * work[j]; 915 t10 = t00 * at1; 916 t01 *= temp0; 917 t11 = t01 * at1; 918 aUnder[i + j * BLOCK] = t00; 919 aUnder[i + 1 + j * BLOCK] = t01; 920 aUnder[i + BLOCK + j * BLOCK] = t10 * temp1; 921 aUnder[i + 1 + BLOCK + j * BLOCK] = t11 * temp1; 922 } 923 } 924 } else { 925 #endif 926 aa = aTri  BLOCK; 927 for (j = 0; j < BLOCK; j++) { 928 int i; 929 CoinWorkDouble temp1 = diagonal[j]; 930 aa += BLOCK; 931 for (i = 0; i < nUnder; i++) { 932 int k; 933 CoinWorkDouble t00 = aUnder[i + j * BLOCK]; 934 for (k = 0; k < j; ++k) { 935 CoinWorkDouble multiplier = work[k]; 936 t00 = aUnder[i + k * BLOCK] * aTri[j + k * BLOCK] * multiplier; 937 } 938 aUnder[i + j * BLOCK] = t00 * temp1; 939 } 940 } 957 941 #ifdef BLOCKUNROLL 958 942 } 959 943 #endif 960 944 } 961 945 /* Leaf recursive rectangle triangle update*/ 962 void ClpCholeskyCrecTriLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble * aUnder, longDouble *aTri,963 /*longDouble * diagonal,*/ longDouble *work, int nUnder)946 void ClpCholeskyCrecTriLeaf(/*ClpCholeskyDenseC * thisStruct,*/ longDouble *aUnder, longDouble *aTri, 947 /*longDouble * diagonal,*/ longDouble *work, int nUnder) 964 948 { 965 949 #ifdef POS_DEBUG 966 967 968 969 970 971 972 973 assert(diagonal == thisStruct>diagonal_ + icu * BLOCK);974 #endif 975 976 977 longDouble *aa;950 int iru, icu; 951 int iu = bNumber(aUnder, iru, icu); 952 int irt, ict; 953 int it = bNumber(aTri, irt, ict); 954 /*printf("%d %d\n",iu,it);*/ 955 printf("rectrileaf under (%d,%d), tri (%d,%d)\n", 956 iru, icu, irt, ict); 957 assert(diagonal == thisStruct>diagonal_ + icu * BLOCK); 958 #endif 959 int i, j, k; 960 CoinWorkDouble t00; 961 longDouble *aa; 978 962 #ifdef BLOCKUNROLL 979 980 longDouble *aUnder2 = aUnder  2;981 982 983 984 985 986 987 t01 = aa[j+1];988 t10 = aa[j+1+BLOCK];989 990 991 992 993 994 995 996 997 998 999 1000 aa[j+1] = t01;1001 aa[j+1+BLOCK] = t10;1002 1003 1004 t01 = aa[i+BLOCK];1005 t10 = aa[i+1];1006 t11 = aa[i+1+BLOCK];1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 aa[i+BLOCK] = t01;1018 aa[i+1] = t10;1019 aa[i+1+BLOCK] = t11;1020 1021 1022 1023 #endif 1024 1025 for (j = 0; j < nUnder; j++) {1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 963 if (nUnder == BLOCK) { 964 longDouble *aUnder2 = aUnder  2; 965 aa = aTri  2 * BLOCK; 966 for (j = 0; j < BLOCK; j += 2) { 967 CoinWorkDouble t00, t01, t10, t11; 968 aa += 2 * BLOCK; 969 aUnder2 += 2; 970 t00 = aa[j]; 971 t01 = aa[j + 1]; 972 t10 = aa[j + 1 + BLOCK]; 973 for (k = 0; k < BLOCK; ++k) { 974 CoinWorkDouble multiplier = work[k]; 975 CoinWorkDouble a0 = aUnder2[k * BLOCK]; 976 CoinWorkDouble a1 = aUnder2[1 + k * BLOCK]; 977 CoinWorkDouble x0 = a0 * multiplier; 978 CoinWorkDouble x1 = a1 * multiplier; 979 t00 = a0 * x0; 980 t01 = a1 * x0; 981 t10 = a1 * x1; 982 } 983 aa[j] = t00; 984 aa[j + 1] = t01; 985 aa[j + 1 + BLOCK] = t10; 986 for (i = j + 2; i < BLOCK; i += 2) { 987 t00 = aa[i]; 988 t01 = aa[i + BLOCK]; 989 t10 = aa[i + 1]; 990 t11 = aa[i + 1 + BLOCK]; 991 for (k = 0; k < BLOCK; ++k) { 992 CoinWorkDouble multiplier = work[k]; 993 CoinWorkDouble a0 = aUnder2[k * BLOCK] * multiplier; 994 CoinWorkDouble a1 = aUnder2[1 + k * BLOCK] * multiplier; 995 t00 = aUnder[i + k * BLOCK] * a0; 996 t01 = aUnder[i + k * BLOCK] * a1; 997 t10 = aUnder[i + 1 + k * BLOCK] * a0; 998 t11 = aUnder[i + 1 + k * BLOCK] * a1; 999 } 1000 aa[i] = t00; 1001 aa[i + BLOCK] = t01; 1002 aa[i + 1] = t10; 1003 aa[i + 1 + BLOCK] = t11; 1004 } 1005 } 1006 } else { 1007 #endif 1008 aa = aTri  BLOCK; 1009 for (j = 0; j < nUnder; j++) { 1010 aa += BLOCK; 1011 for (i = j; i < nUnder; i++) { 1012 t00 = aa[i]; 1013 for (k = 0; k < BLOCK; ++k) { 1014 CoinWorkDouble multiplier = work[k]; 1015 t00 = aUnder[i + k * BLOCK] * aUnder[j + k * BLOCK] * multiplier; 1016 } 1017 aa[i] = t00; 1018 } 1019 } 1036 1020 #ifdef BLOCKUNROLL 1037 1021 } 1038 1022 #endif 1039 1023 } … … 1042 1026 nUnderK is number of rows in kBlock 1043 1027 */ 1044 void 1045 ClpCholeskyCrecRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ 1046 const longDouble * COIN_RESTRICT above, 1047 const longDouble * COIN_RESTRICT aUnder, 1048 longDouble * COIN_RESTRICT aOther, 1049 const longDouble * COIN_RESTRICT work, 1050 int nUnder) 1028 void ClpCholeskyCrecRecLeaf(/*ClpCholeskyDenseC * thisStruct,*/ 1029 const longDouble *COIN_RESTRICT above, 1030 const longDouble *COIN_RESTRICT aUnder, 1031 longDouble *COIN_RESTRICT aOther, 1032 const longDouble *COIN_RESTRICT work, 1033 int nUnder) 1051 1034 { 1052 1035 #ifdef POS_DEBUG 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 #endif 1063 1064 longDouble *aa;1036 int ira, ica; 1037 int ia = bNumber(above, ira, ica); 1038 int iru, icu; 1039 int iu = bNumber(aUnder, iru, icu); 1040 int iro, ico; 1041 int io = bNumber(aOther, iro, ico); 1042 /*printf("%d %d %d\n",ia,iu,io);*/ 1043 printf("recrecleaf above (%d,%d), under (%d,%d), other (%d,%d)\n", 1044 ira, ica, iru, icu, iro, ico); 1045 #endif 1046 int i, j, k; 1047 longDouble *aa; 1065 1048 #ifdef BLOCKUNROLL 1066 1067 1068 1049 aa = aOther  4 * BLOCK; 1050 if (nUnder == BLOCK) { 1051 /*#define INTEL*/ 1069 1052 #ifdef INTEL 1070 1071 1072 1073 1074 CoinWorkDouble t00 = aa[i+0*BLOCK];1075 CoinWorkDouble t10 = aa[i+1*BLOCK];1076 CoinWorkDouble t01 = aa[i+1+0*BLOCK];1077 CoinWorkDouble t11 = aa[i+1+1*BLOCK];1078 1079 1080 CoinWorkDouble a00 = aUnder[i+k*BLOCK] * multiplier;1081 CoinWorkDouble a01 = aUnder[i+1+k*BLOCK] * multiplier;1082 1083 1084 1085 1086 1087 aa[i+0*BLOCK] = t00;1088 aa[i+1*BLOCK] = t10;1089 aa[i+1+0*BLOCK] = t01;1090 aa[i+1+1*BLOCK] = t11;1091 1092 1053 aa += 2 * BLOCK; 1054 for (j = 0; j < BLOCK; j += 2) { 1055 aa += 2 * BLOCK; 1056 for (i = 0; i < BLOCK; i += 2) { 1057 CoinWorkDouble t00 = aa[i + 0 * BLOCK]; 1058 CoinWorkDouble t10 = aa[i + 1 * BLOCK]; 1059 CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK]; 1060 CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK]; 1061 for (k = 0; k < BLOCK; k++) { 1062 CoinWorkDouble multiplier = work[k]; 1063 CoinWorkDouble a00 = aUnder[i + k * BLOCK] * multiplier; 1064 CoinWorkDouble a01 = aUnder[i + 1 + k * BLOCK] * multiplier; 1065 t00 = a00 * above[j + 0 + k * BLOCK]; 1066 t10 = a00 * above[j + 1 + k * BLOCK]; 1067 t01 = a01 * above[j + 0 + k * BLOCK]; 1068 t11 = a01 * above[j + 1 + k * BLOCK]; 1069 } 1070 aa[i + 0 * BLOCK] = t00; 1071 aa[i + 1 * BLOCK] = t10; 1072 aa[i + 1 + 0 * BLOCK] = t01; 1073 aa[i + 1 + 1 * BLOCK] = t11; 1074 } 1075 } 1093 1076 #else 1094 1095 1096 1097 CoinWorkDouble t00 = aa[i+0+0*BLOCK];1098 CoinWorkDouble t10 = aa[i+0+1*BLOCK];1099 CoinWorkDouble t20 = aa[i+0+2*BLOCK];1100 CoinWorkDouble t30 = aa[i+0+3*BLOCK];1101 CoinWorkDouble t01 = aa[i+1+0*BLOCK];1102 CoinWorkDouble t11 = aa[i+1+1*BLOCK];1103 CoinWorkDouble t21 = aa[i+1+2*BLOCK];1104 CoinWorkDouble t31 = aa[i+1+3*BLOCK];1105 CoinWorkDouble t02 = aa[i+2+0*BLOCK];1106 CoinWorkDouble t12 = aa[i+2+1*BLOCK];1107 CoinWorkDouble t22 = aa[i+2+2*BLOCK];1108 CoinWorkDouble t32 = aa[i+2+3*BLOCK];1109 CoinWorkDouble t03 = aa[i+3+0*BLOCK];1110 CoinWorkDouble t13 = aa[i+3+1*BLOCK];1111 CoinWorkDouble t23 = aa[i+3+2*BLOCK];1112 CoinWorkDouble t33 = aa[i+3+3*BLOCK];1113 const longDouble *COIN_RESTRICT aUnderNow = aUnder + i;1114 const longDouble *COIN_RESTRICT aboveNow = above + j;1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 aa[i+0+0*BLOCK] = t00;1141 aa[i+0+1*BLOCK] = t10;1142 aa[i+0+2*BLOCK] = t20;1143 aa[i+0+3*BLOCK] = t30;1144 aa[i+1+0*BLOCK] = t01;1145 aa[i+1+1*BLOCK] = t11;1146 aa[i+1+2*BLOCK] = t21;1147 aa[i+1+3*BLOCK] = t31;1148 aa[i+2+0*BLOCK] = t02;1149 aa[i+2+1*BLOCK] = t12;1150 aa[i+2+2*BLOCK] = t22;1151 aa[i+2+3*BLOCK] = t32;1152 aa[i+3+0*BLOCK] = t03;1153 aa[i+3+1*BLOCK] = t13;1154 aa[i+3+2*BLOCK] = t23;1155 aa[i+3+3*BLOCK] = t33;1156 1157 1158 #endif 1159 1160 1161 1162 1163 1164 1165 CoinWorkDouble t00 = aa[i+0*BLOCK];1166 CoinWorkDouble t10 = aa[i+1*BLOCK];1167 CoinWorkDouble t20 = aa[i+2*BLOCK];1168 CoinWorkDouble t30 = aa[i+3*BLOCK];1169 CoinWorkDouble t01 = aa[i+1+0*BLOCK];1170 CoinWorkDouble t11 = aa[i+1+1*BLOCK];1171 CoinWorkDouble t21 = aa[i+1+2*BLOCK];1172 CoinWorkDouble t31 = aa[i+1+3*BLOCK];1173 const longDouble *COIN_RESTRICT aUnderNow = aUnder + i;1174 const longDouble *COIN_RESTRICT aboveNow = above + j;1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 aa[i+0*BLOCK] = t00;1191 aa[i+1*BLOCK] = t10;1192 aa[i+2*BLOCK] = t20;1193 aa[i+3*BLOCK] = t30;1194 aa[i+1+0*BLOCK] = t01;1195 aa[i+1+1*BLOCK] = t11;1196 aa[i+1+2*BLOCK] = t21;1197 aa[i+1+3*BLOCK] = t31;1198 1199 1200 CoinWorkDouble t0 = aa[n+0*BLOCK];1201 CoinWorkDouble t1 = aa[n+1*BLOCK];1202 CoinWorkDouble t2 = aa[n+2*BLOCK];1203 CoinWorkDouble t3 = aa[n+3*BLOCK];1204 1205 1206 a0 = aUnder[n+k*BLOCK] * work[k];1207 1208 1209 1210 1211 1212 aa[n+0*BLOCK] = t0;1213 aa[n+1*BLOCK] = t1;1214 aa[n+2*BLOCK] = t2;1215 aa[n+3*BLOCK] = t3;1216 1217 1218 1077 for (j = 0; j < BLOCK; j += 4) { 1078 aa += 4 * BLOCK; 1079 for (i = 0; i < BLOCK; i += 4) { 1080 CoinWorkDouble t00 = aa[i + 0 + 0 * BLOCK]; 1081 CoinWorkDouble t10 = aa[i + 0 + 1 * BLOCK]; 1082 CoinWorkDouble t20 = aa[i + 0 + 2 * BLOCK]; 1083 CoinWorkDouble t30 = aa[i + 0 + 3 * BLOCK]; 1084 CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK]; 1085 CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK]; 1086 CoinWorkDouble t21 = aa[i + 1 + 2 * BLOCK]; 1087 CoinWorkDouble t31 = aa[i + 1 + 3 * BLOCK]; 1088 CoinWorkDouble t02 = aa[i + 2 + 0 * BLOCK]; 1089 CoinWorkDouble t12 = aa[i + 2 + 1 * BLOCK]; 1090 CoinWorkDouble t22 = aa[i + 2 + 2 * BLOCK]; 1091 CoinWorkDouble t32 = aa[i + 2 + 3 * BLOCK]; 1092 CoinWorkDouble t03 = aa[i + 3 + 0 * BLOCK]; 1093 CoinWorkDouble t13 = aa[i + 3 + 1 * BLOCK]; 1094 CoinWorkDouble t23 = aa[i + 3 + 2 * BLOCK]; 1095 CoinWorkDouble t33 = aa[i + 3 + 3 * BLOCK]; 1096 const longDouble *COIN_RESTRICT aUnderNow = aUnder + i; 1097 const longDouble *COIN_RESTRICT aboveNow = above + j; 1098 for (k = 0; k < BLOCK; k++) { 1099 CoinWorkDouble multiplier = work[k]; 1100 CoinWorkDouble a00 = aUnderNow[0] * multiplier; 1101 CoinWorkDouble a01 = aUnderNow[1] * multiplier; 1102 CoinWorkDouble a02 = aUnderNow[2] * multiplier; 1103 CoinWorkDouble a03 = aUnderNow[3] * multiplier; 1104 t00 = a00 * aboveNow[0]; 1105 t10 = a00 * aboveNow[1]; 1106 t20 = a00 * aboveNow[2]; 1107 t30 = a00 * aboveNow[3]; 1108 t01 = a01 * aboveNow[0]; 1109 t11 = a01 * aboveNow[1]; 1110 t21 = a01 * aboveNow[2]; 1111 t31 = a01 * aboveNow[3]; 1112 t02 = a02 * aboveNow[0]; 1113 t12 = a02 * aboveNow[1]; 1114 t22 = a02 * aboveNow[2]; 1115 t32 = a02 * aboveNow[3]; 1116 t03 = a03 * aboveNow[0]; 1117 t13 = a03 * aboveNow[1]; 1118 t23 = a03 * aboveNow[2]; 1119 t33 = a03 * aboveNow[3]; 1120 aUnderNow += BLOCK; 1121 aboveNow += BLOCK; 1122 } 1123 aa[i + 0 + 0 * BLOCK] = t00; 1124 aa[i + 0 + 1 * BLOCK] = t10; 1125 aa[i + 0 + 2 * BLOCK] = t20; 1126 aa[i + 0 + 3 * BLOCK] = t30; 1127 aa[i + 1 + 0 * BLOCK] = t01; 1128 aa[i + 1 + 1 * BLOCK] = t11; 1129 aa[i + 1 + 2 * BLOCK] = t21; 1130 aa[i + 1 + 3 * BLOCK] = t31; 1131 aa[i + 2 + 0 * BLOCK] = t02; 1132 aa[i + 2 + 1 * BLOCK] = t12; 1133 aa[i + 2 + 2 * BLOCK] = t22; 1134 aa[i + 2 + 3 * BLOCK] = t32; 1135 aa[i + 3 + 0 * BLOCK] = t03; 1136 aa[i + 3 + 1 * BLOCK] = t13; 1137 aa[i + 3 + 2 * BLOCK] = t23; 1138 aa[i + 3 + 3 * BLOCK] = t33; 1139 } 1140 } 1141 #endif 1142 } else { 1143 int odd = nUnder & 1; 1144 int n = nUnder  odd; 1145 for (j = 0; j < BLOCK; j += 4) { 1146 aa += 4 * BLOCK; 1147 for (i = 0; i < n; i += 2) { 1148 CoinWorkDouble t00 = aa[i + 0 * BLOCK]; 1149 CoinWorkDouble t10 = aa[i + 1 * BLOCK]; 1150 CoinWorkDouble t20 = aa[i + 2 * BLOCK]; 1151 CoinWorkDouble t30 = aa[i + 3 * BLOCK]; 1152 CoinWorkDouble t01 = aa[i + 1 + 0 * BLOCK]; 1153 CoinWorkDouble t11 = aa[i + 1 + 1 * BLOCK]; 1154 CoinWorkDouble t21 = aa[i + 1 + 2 * BLOCK]; 1155 CoinWorkDouble t31 = aa[i + 1 + 3 * BLOCK]; 1156 const longDouble *COIN_RESTRICT aUnderNow = aUnder + i; 1157 const longDouble *COIN_RESTRICT aboveNow = above + j; 1158 for (k = 0; k < BLOCK; k++) { 1159 CoinWorkDouble multiplier = work[k]; 1160 CoinWorkDouble a00 = aUnderNow[0] * multiplier; 1161 CoinWorkDouble a01 = aUnderNow[1] * multiplier; 1162 t00 = a00 * aboveNow[0]; 1163 t10 = a00 * aboveNow[1]; 1164 t20 = a00 * aboveNow[2]; 1165 t30 = a00 * aboveNow[3]; 1166 t01 = a01 * aboveNow[0]; 1167 t11 = a01 * aboveNow[1]; 1168 t21 = a01 * aboveNow[2]; 1169 t31 = a01 * aboveNow[3]; 1170 aUnderNow += BLOCK; 1171 aboveNow += BLOCK; 1172 } 1173 aa[i + 0 * BLOCK] = t00; 1174 aa[i + 1 * BLOCK] = t10; 1175 aa[i + 2 * BLOCK] = t20; 1176 aa[i + 3 * BLOCK] = t30; 1177 aa[i + 1 + 0 * BLOCK] = t01; 1178 aa[i + 1 + 1 * BLOCK] = t11; 1179 aa[i + 1 + 2 * BLOCK] = t21; 1180 aa[i + 1 + 3 * BLOCK] = t31; 1181 } 1182 if (odd) { 1183 CoinWorkDouble t0 = aa[n + 0 * BLOCK]; 1184 CoinWorkDouble t1 = aa[n + 1 * BLOCK]; 1185 CoinWorkDouble t2 = aa[n + 2 * BLOCK]; 1186 CoinWorkDouble t3 = aa[n + 3 * BLOCK]; 1187 CoinWorkDouble a0; 1188 for (k = 0; k < BLOCK; k++) { 1189 a0 = aUnder[n + k * BLOCK] * work[k]; 1190 t0 = a0 * above[j + 0 + k * BLOCK]; 1191 t1 = a0 * above[j + 1 + k * BLOCK]; 1192 t2 = a0 * above[j + 2 + k * BLOCK]; 1193 t3 = a0 * above[j + 3 + k * BLOCK]; 1194 } 1195 aa[n + 0 * BLOCK] = t0; 1196 aa[n + 1 * BLOCK] = t1; 1197 aa[n + 2 * BLOCK] = t2; 1198 aa[n + 3 * BLOCK] = t3; 1199 } 1200 } 1201 } 1219 1202 #else 1220 1221 for (j = 0; j < BLOCK; j++) {1222 1223 1224 CoinWorkDouble t00 = aa[i+0*BLOCK];1225 1226 CoinWorkDouble a00 = aUnder[i+k*BLOCK] * work[k];1227 1228 1229 1230 1231 1203 aa = aOther  BLOCK; 1204 for (j = 0; j < BLOCK; j++) { 1205 aa += BLOCK; 1206 for (i = 0; i < nUnder; i++) { 1207 CoinWorkDouble t00 = aa[i + 0 * BLOCK]; 1208 for (k = 0; k < BLOCK; k++) { 1209 CoinWorkDouble a00 = aUnder[i + k * BLOCK] * work[k]; 1210 t00 = a00 * above[j + k * BLOCK]; 1211 } 1212 aa[i] = t00; 1213 } 1214 } 1232 1215 #endif 1233 1216 } 1234 1217 /* Uses factorization to solve. */ 1235 void 1236 ClpCholeskyDense::solve (CoinWorkDouble * region) 1218 void ClpCholeskyDense::solve(CoinWorkDouble *region) 1237 1219 { 1238 1220 #ifdef CHOL_COMPARE 1239 double *region2 = NULL;1240 1241 longDouble *xx = sparseFactor_;1242 longDouble *yy = diagonal_;1243 1244 1245 1246 1247 1248 longDouble *work = sparseFactor_  1;1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 #endif 1268 1269 1270 1271 1272 1273 1274 longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks;1275 1276 longDouble *aa = a;1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1221 double *region2 = NULL; 1222 if (numberRows_ < 200) { 1223 longDouble *xx = sparseFactor_; 1224 longDouble *yy = diagonal_; 1225 diagonal_ = sparseFactor_ + 40000; 1226 sparseFactor_ = diagonal_ + numberRows_; 1227 region2 = ClpCopyOfArray(region, numberRows_); 1228 int iRow, iColumn; 1229 int addOffset = numberRows_  1; 1230 longDouble *work = sparseFactor_  1; 1231 for (iColumn = 0; iColumn < numberRows_; iColumn++) { 1232 double value = region2[iColumn]; 1233 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) 1234 region2[iRow] = value * work[iRow]; 1235 addOffset; 1236 work += addOffset; 1237 } 1238 for (iColumn = numberRows_  1; iColumn >= 0; iColumn) { 1239 double value = region2[iColumn] * diagonal_[iColumn]; 1240 work = addOffset; 1241 addOffset++; 1242 for (iRow = iColumn + 1; iRow < numberRows_; iRow++) 1243 value = region2[iRow] * work[iRow]; 1244 region2[iColumn] = value; 1245 } 1246 sparseFactor_ = xx; 1247 diagonal_ = yy; 1248 } 1249 #endif 1250 /*longDouble * xx = sparseFactor_;*/ 1251 /*longDouble * yy = diagonal_;*/ 1252 /*diagonal_ = sparseFactor_ + 40000;*/ 1253 /*sparseFactor_=diagonal_ + numberRows_;*/ 1254 int numberBlocks = (numberRows_ + BLOCK  1) >> BLOCKSHIFT; 1255 /* later align on boundary*/ 1256 longDouble *a = sparseFactor_ + BLOCKSQ * numberBlocks; 1257 int iBlock; 1258 longDouble *aa = a; 1259 int iColumn; 1260 for (iBlock = 0; iBlock < numberBlocks; iBlock++) { 1261 int nChunk; 1262 int jBlock; 1263 int iDo = iBlock * BLOCK; 1264 int base = iDo; 1265 if (iDo + BLOCK > numberRows_) { 1266 nChunk = numberRows_  iDo; 1267 } else { 1268 nChunk = BLOCK; 1269 } 1270 solveF1(aa, nChunk, region + iDo); 1271 for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) { 1272 base += BLOCK; 1273 aa += BLOCKSQ; 1274 if (base + BLOCK > numberRows_) { 1275 nChunk = numberRows_  base; 1276 } else { 1277 nChunk = BLOCK; 1278 } 1279 solveF2(aa, nChunk, region + iDo, region + base); 1280 } 1281 aa += BLOCKSQ; 1282 } 1283 /* do diagonal outside*/ 1284 for (iColumn = 0; iColumn < numberRows_; iColumn++) 1285 region[iColumn] *= diagonal_[iColumn]; 1286 int offset = ((numberBlocks * (numberBlocks + 1)) >> 1); 1287 aa = a + number_entries(offset  1); 1288 int lBase = (numberBlocks  1) * BLOCK; 1289 for (iBlock = numberBlocks  1; iBlock >= 0; iBlock) { 1290 int nChunk; 1291 int jBlock; 1292 int triBase = iBlock * BLOCK; 1293 int iBase = lBase; 1294 for (jBlock = iBlock + 1; jBlock < numberBlocks; jBlock++) { 1295 if (iBase + BLOCK > numberRows_) { 1296 nChunk = numberRows_  iBase; 1297 } else { 1298 nChunk = BLOCK; 1299 } 1300 solveB2(aa, nChunk, region + triBase, region + iBase); 1301 iBase = BLOCK; 1302 aa = BLOCKSQ; 1303 } 1304 if (triBase + BLOCK > numberRows_) { 1305 nChunk = numberRows_  triBase; 1306 } else { 1307 nChunk = BLOCK; 1308 } 1309 solveB1(aa, nChunk, region + triBase); 1310 aa = BLOCKSQ; 1311 } 1330 1312 #ifdef CHOL_COMPARE 1331 1332 1333 1334 1335 delete[] region2;1336 1313 if (numberRows_ < 200) { 1314 for (int i = 0; i < numberRows_; i++) { 1315 assert(CoinAbs(region[i]  region2[i]) < 1.0e3); 1316 } 1317 delete[] region2; 1318 } 1337 1319 #endif 1338 1320 } 1339 1321 /* Forward part of solve 1*/ 1340 void 1341 ClpCholeskyDense::solveF1(longDouble * a, int n, CoinWorkDouble * region) 1342 { 1343 int j, k; 1344 CoinWorkDouble t00; 1345 for (j = 0; j < n; j ++) { 1346 t00 = region[j]; 1347 for (k = 0; k < j; ++k) { 1348 t00 = region[k] * a[j + k * BLOCK]; 1349 } 1350 /*t00*=a[j + j * BLOCK];*/ 1351 region[j] = t00; 1352 } 1322 void ClpCholeskyDense::solveF1(longDouble *a, int n, CoinWorkDouble *region) 1323 { 1324 int j, k; 1325 CoinWorkDouble t00; 1326 for (j = 0; j < n; j++) { 1327 t00 = region[j]; 1328 for (k = 0; k < j; ++k) { 1329 t00 = region[k] * a[j + k * BLOCK]; 1330 } 1331 /*t00*=a[j + j * BLOCK];*/ 1332 region[j] = t00; 1333 } 1353 1334 } 1354 1335 /* Forward part of solve 2*/ 1355 void 1356 ClpCholeskyDense::solveF2(longDouble * a, int n, CoinWorkDouble * region, CoinWorkDouble * region2) 1357 { 1358 int j, k; 1336 void ClpCholeskyDense::solveF2(longDouble *a, int n, CoinWorkDouble *region, CoinWorkDouble *region2) 1337 { 1338 int j, k; 1359 1339 #ifdef BLOCKUNROLL 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 #if BLOCK >81406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 #endif 1446 1447 1448 1449 1450 1451 1452 1453 1454 #endif 1455 1456 1457 for (j = 0; j < BLOCK; j++) {1458 1459 1460 1461 1340 if (n == BLOCK) { 1341 for (k = 0; k < BLOCK; k += 4) { 1342 CoinWorkDouble t0 = region2[0]; 1343 CoinWorkDouble t1 = region2[1]; 1344 CoinWorkDouble t2 = region2[2]; 1345 CoinWorkDouble t3 = region2[3]; 1346 t0 = region[0] * a[0 + 0 * BLOCK]; 1347 t1 = region[0] * a[1 + 0 * BLOCK]; 1348 t2 = region[0] * a[2 + 0 * BLOCK]; 1349 t3 = region[0] * a[3 + 0 * BLOCK]; 1350 1351 t0 = region[1] * a[0 + 1 * BLOCK]; 1352 t1 = region[1] * a[1 + 1 * BLOCK]; 1353 t2 = region[1] * a[2 + 1 * BLOCK]; 1354 t3 = region[1] * a[3 + 1 * BLOCK]; 1355 1356 t0 = region[2] * a[0 + 2 * BLOCK]; 1357 t1 = region[2] * a[1 + 2 * BLOCK]; 1358 t2 = region[2] * a[2 + 2 * BLOCK]; 1359 t3 = region[2] * a[3 + 2 * BLOCK]; 1360 1361 t0 = region[3] * a[0 + 3 * BLOCK]; 1362 t1 = region[3] * a[1 + 3 * BLOCK]; 1363 t2 = region[3] * a[2 + 3 * BLOCK]; 1364 t3 = region[3] * a[3 + 3 * BLOCK]; 1365 1366 t0 = region[4] * a[0 + 4 * BLOCK]; 1367 t1 = region[4] * a[1 + 4 * BLOCK]; 1368 t2 = region[4] * a[2 + 4 * BLOCK]; 1369 t3 = region[4] * a[3 + 4 * BLOCK]; 1370 1371 t0 = region[5] * a[0 + 5 * BLOCK]; 1372 t1 = region[5] * a[1 + 5 * BLOCK]; 1373 t2 = region[5] * a[2 + 5 * BLOCK]; 1374 t3 = region[5] * a[3 + 5 * BLOCK]; 1375 1376 t0 = region[6] * a[0 + 6 * BLOCK]; 1377 t1 = region[6] * a[1 + 6 * BLOCK]; 1378 t2 = region[6] * a[2 + 6 * BLOCK]; 1379 t3 = region[6] * a[3 + 6 * BLOCK]; 1380 1381 t0 = region[7] * a[0 + 7 * BLOCK]; 1382 t1 = region[7] * a[1 + 7 * BLOCK]; 1383 t2 = region[7] * a[2 + 7 * BLOCK]; 1384 t3 = region[7] * a[3 + 7 * BLOCK]; 1385 #if BLOCK > 8 1386 t0 = region[8] * a[0 + 8 * BLOCK]; 1387 t1 = region[8] * a[1 + 8 * BLOCK]; 1388 t2 = region[8] * a[2 + 8 * BLOCK]; 1389 t3 = region[8] * a[3 + 8 * BLOCK]; 1390 1391 t0 = region[9] * a[0 + 9 * BLOCK]; 1392 t1 = region[9] * a[1 + 9 * BLOCK]; 1393 t2 = region[9] * a[2 + 9 * BLOCK]; 1394 t3 = region[9] * a[3 + 9 * BLOCK]; 1395 1396 t0 = region[10] * a[0 + 10 * BLOCK]; 1397 t1 = region[10] * a[1 + 10 * BLOCK]; 1398 t2 = region[10] * a[2 + 10 * BLOCK]; 1399 t3 = region[10] * a[3 + 10 * BLOCK]; 1400 1401 t0 = region[11] * a[0 + 11 * BLOCK]; 1402 t1 = region[11] * a[1 + 11 * BLOCK]; 1403 t2 = region[11] * a[2 + 11 * BLOCK]; 1404 t3 = region[11] * a[3 + 11 * BLOCK]; 1405 1406 t0 = region[12] * a[0 + 12 * BLOCK]; 1407 t1 = region[12] * a[1 + 12 * BLOCK]; 1408 t2 = region[12] * a[2 + 12 * BLOCK]; 1409 t3 = region[12] * a[3 + 12 * BLOCK]; 1410 1411 t0 = region[13] * a[0 + 13 * BLOCK]; 1412 t1 = region[13] * a[1 + 13 * BLOCK]; 1413 t2 = region[13] * a[2 + 13 * BLOCK]; 1414 t3 = region[13] * a[3 + 13 * BLOCK]; 1415 1416 t0 = region[14] * a[0 + 14 * BLOCK]; 1417 t1 = region[14] * a[1 + 14 * BLOCK]; 1418 t2 = region[14] * a[2 + 14 * BLOCK]; 1419 t3 = region[14] * a[3 + 14 * BLOCK]; 1420 1421 t0 = region[15] * a[0 + 15 * BLOCK]; 1422 t1 = region[15] * a[1 + 15 * BLOCK]; 1423 t2 = region[15] * a[2 + 15 * BLOCK]; 1424 t3 = region[15] * a[3 + 15 * BLOCK]; 1425 #endif 1426 region2[0] = t0; 1427 region2[1] = t1; 1428 region2[2] = t2; 1429 region2[3] = t3; 1430 region2 += 4; 1431 a += 4; 1432 } 1433 } else { 1434 #endif 1435 for (k = 0; k < n; ++k) { 1436 CoinWorkDouble t00 = region2[k]; 1437 for (j = 0; j < BLOCK; j++) { 1438 t00 = region[j] * a[k + j * BLOCK]; 1439 } 1440 region2[k] = t00; 1441 } 1462 1442 #ifdef BLOCKUNROLL 1463 1443 } 1464 1444 #endif 1465 1445 } 1466 1446 /* Backward part of solve 1*/ 1467 void 1468 ClpCholeskyDense::solveB1(longDouble * a, int n, CoinWorkDouble * region) 1469 { 1470 int j, k; 1471 CoinWorkDouble t00; 1472 for (j = n  1; j >= 0; j ) { 1473 t00 = region[j]; 1474 for (k = j + 1; k < n; ++k) { 1475 t00 = region[k] * a[k + j * BLOCK]; 1476 } 1477 /*t00*=a[j + j * BLOCK];*/ 1478 region[j] = t00; 1479 } 1447 void ClpCholeskyDense::solveB1(longDouble *a, int n, CoinWorkDouble *region) 1448 { 1449 int j, k; 1450 CoinWorkDouble t00; 1451 for (j = n  1; j >= 0; j) { 1452 t00 = region[j]; 1453 for (k = j + 1; k < n; ++k) { 1454 t00 = region[k] * a[k + j * BLOCK]; 1455 } 1456 /*t00*=a[j + j * BLOCK];*/ 1457 region[j] = t00; 1458 } 1480 1459 } 1481 1460 /* Backward part of solve 2*/ 1482 void 1483 ClpCholeskyDense::solveB2(longDouble * a, int n, CoinWorkDouble * region, CoinWorkDouble * region2) 1484 { 1485 int j, k; 1461 void ClpCholeskyDense::solveB2(longDouble *a, int n, CoinWorkDouble *region, CoinWorkDouble *region2) 1462 { 1463 int j, k; 1486 1464 #ifdef BLOCKUNROLL 1487 1488 1489 1490 1491 1492 1493 t0 = region2[0] * a[0 + 0*BLOCK];1494 t1 = region2[0] * a[0 + 1*BLOCK];1495 t2 = region2[0] * a[0 + 2*BLOCK];1496 t3 = region2[0] * a[0 + 3*BLOCK];1497 1498 t0 = region2[1] * a[1 + 0*BLOCK];1499 t1 = region2[1] * a[1 + 1*BLOCK];1500 t2 = region2[1] * a[1 + 2*BLOCK];1501 t3 = region2[1] * a[1 + 3*BLOCK];1502 1503 t0 = region2[2] * a[2 + 0*BLOCK];1504 t1 = region2[2] * a[2 + 1*BLOCK];1505 t2 = region2[2] * a[2 + 2*BLOCK];1506 t3 = region2[2] * a[2 + 3*BLOCK];1507 1508 t0 = region2[3] * a[3 + 0*BLOCK];1509 t1 = region2[3] * a[3 + 1*BLOCK];1510 t2 = region2[3] * a[3 + 2*BLOCK];1511 t3 = region2[3] * a[3 + 3*BLOCK];1512 1513 t0 = region2[4] * a[4 + 0*BLOCK];1514 t1 = region2[4] * a[4 + 1*BLOCK];1515 t2 = region2[4] * a[4 + 2*BLOCK];1516 t3 = region2[4] * a[4 + 3*BLOCK];1517 1518 t0 = region2[5] * a[5 + 0*BLOCK];1519 t1 = region2[5] * a[5 + 1*BLOCK];1520 t2 = region2[5] * a[5 + 2*BLOCK];1521 t3 = region2[5] * a[5 + 3*BLOCK];1522 1523 t0 = region2[6] * a[6 + 0*BLOCK];1524 t1 = region2[6] * a[6 + 1*BLOCK];1525 t2 = region2[6] * a[6 + 2*BLOCK];1526 t3 = region2[6] * a[6 + 3*BLOCK];1527 1528 t0 = region2[7] * a[7 + 0*BLOCK];1529 t1 = region2[7] * a[7 + 1*BLOCK];1530 t2 = region2[7] * a[7 + 2*BLOCK];1531 t3 = region2[7] * a[7 + 3*BLOCK];1532 #if BLOCK >81533 1534 t0 = region2[8] * a[8 + 0*BLOCK];1535 t1 = region2[8] * a[8 + 1*BLOCK];1536 t2 = region2[8] * a[8 + 2*BLOCK];1537 t3 = region2[8] * a[8 + 3*BLOCK];1538 1539 t0 = region2[9] * a[9 + 0*BLOCK];1540 t1 = region2[9] * a[9 + 1*BLOCK];1541 t2 = region2[9] * a[9 + 2*BLOCK];1542 t3 = region2[9] * a[9 + 3*BLOCK];1543 1544 t0 = region2[10] * a[10 + 0*BLOCK];1545 t1 = region2[10] * a[10 + 1*BLOCK];1546 t2 = region2[10] * a[10 + 2*BLOCK];1547 t3 = region2[10] * a[10 + 3*BLOCK];1548 1549 t0 = region2[11] * a[11 + 0*BLOCK];1550 t1 = region2[11] * a[11 + 1*BLOCK];1551 t2 = region2[11] * a[11 + 2*BLOCK];1552 t3 = region2[11] * a[11 + 3*BLOCK];1553 1554 t0 = region2[12] * a[12 + 0*BLOCK];1555 t1 = region2[12] * a[12 + 1*BLOCK];1556 t2 = region2[12] * a[12 + 2*BLOCK];1557 t3 = region2[12] * a[12 + 3*BLOCK];1558 1559 t0 = region2[13] * a[13 + 0*BLOCK];1560 t1 = region2[13] * a[13 + 1*BLOCK];1561 t2 = region2[13] * a[13 + 2*BLOCK];1562 t3 = region2[13] * a[13 + 3*BLOCK];1563 1564 t0 = region2[14] * a[14 + 0*BLOCK];1565 t1 = region2[14] * a[14 + 1*BLOCK];1566 t2 = region2[14] * a[14 + 2*BLOCK];1567 t3 = region2[14] * a[14 + 3*BLOCK];1568 1569 t0 = region2[15] * a[15 + 0*BLOCK];1570 t1 = region2[15] * a[15 + 1*BLOCK];1571 t2 = region2[15] * a[15 + 2*BLOCK];1572 t3 = region2[15] * a[15 + 3*BLOCK];1573 #endif 1574 1575 1576 1577 1578 1579 1580 1581 1582 #endif 1583 for (j = 0; j < BLOCK; j++) {1584 1585 1586 1587 1588 1589 1465 if (n == BLOCK) { 1466 for (j = 0; j < BLOCK; j += 4) { 1467 CoinWorkDouble t0 = region[0]; 1468 CoinWorkDouble t1 = region[1]; 1469 CoinWorkDouble t2 = region[2]; 1470 CoinWorkDouble t3 = region[3]; 1471 t0 = region2[0] * a[0 + 0 * BLOCK]; 1472 t1 = region2[0] * a[0 + 1 * BLOCK]; 1473 t2 = region2[0] * a[0 + 2 * BLOCK]; 1474 t3 = region2[0] * a[0 + 3 * BLOCK]; 1475 1476 t0 = region2[1] * a[1 + 0 * BLOCK]; 1477 t1 = region2[1] * a[1 + 1 * BLOCK]; 1478 t2 = region2[1] * a[1 + 2 * BLOCK]; 1479 t3 = region2[1] * a[1 + 3 * BLOCK]; 1480 1481 t0 = region2[2] * a[2 + 0 * BLOCK]; 1482 t1 = region2[2] * a[2 + 1 * BLOCK]; 1483 t2 = region2[2] * a[2 + 2 * BLOCK]; 1484 t3 = region2[2] * a[2 + 3 * BLOCK]; 1485 1486 t0 = region2[3] * a[3 + 0 * BLOCK]; 1487 t1 = region2[3] * a[3 + 1 * BLOCK]; 1488 t2 = region2[3] * a[3 + 2 * BLOCK]; 1489 t3 = region2[3] * a[3 + 3 * BLOCK]; 1490 1491 t0 = region2[4] * a[4 + 0 * BLOCK]; 1492 t1 = region2[4] * a[4 + 1 * BLOCK]; 1493 t2 = region2[4] * a[4 + 2 * BLOCK]; 1494 t3 = region2[4] * a[4 + 3 * BLOCK]; 1495 1496 t0 = region2[5] * a[5 + 0 * BLOCK]; 1497 t1 = region2[5] * a[5 + 1 * BLOCK]; 1498 t2 = region2[5] * a[5 + 2 * BLOCK]; 1499 t3 = region2[5] * a[5 + 3 * BLOCK]; 1500 1501 t0 = region2[6] * a[6 + 0 * BLOCK]; 1502 t1 = region2[6] * a[6 + 1 * BLOCK]; 1503 t2 = region2[6] * a[6 + 2 * BLOCK]; 1504 t3 = region2[6] * a[6 + 3 * BLOCK]; 1505 1506 t0 = region2[7] * a[7 + 0 * BLOCK]; 1507 t1 = region2[7] * a[7 + 1 * BLOCK]; 1508 t2 = region2[7] * a[7 + 2 * BLOCK]; 1509 t3 = region2[7] * a[7 + 3 * BLOCK]; 1510 #if BLOCK > 8 1511 1512 t0 = region2[8] * a[8 + 0 * BLOCK]; 1513 t1 = region2[8] * a[8 + 1 * BLOCK]; 1514 t2 = region2[8] * a[8 + 2 * BLOCK]; 1515 t3 = region2[8] * a[8 + 3 * BLOCK]; 1516 1517 t0 = region2[9] * a[9 + 0 * BLOCK]; 1518 t1 = region2[9] * a[9 + 1 * BLOCK]; 1519 t2 = region2[9] * a[9 + 2 * BLOCK]; 1520 t3 = region2[9] * a[9 + 3 * BLOCK]; 1521 1522 t0 = region2[10] * a[10 + 0 * BLOCK]; 1523 t1 = region2[10] * a[10 + 1 * BLOCK]; 1524 t2 = region2[10] * a[10 + 2 * BLOCK]; 1525 t3 = region2[10] * a[10 + 3 * BLOCK]; 1526 1527 t0 = region2[11] * a[11 + 0 * BLOCK]; 1528 t1 = region2[11] * a[11 + 1 * BLOCK]; 1529 t2 = region2[11] * a[11 + 2 * BLOCK]; 1530 t3 = region2[11] * a[11 + 3 * BLOCK]; 1531 1532 t0 = region2[12] * a[12 + 0 * BLOCK]; 1533 t1 = region2[12] * a[12 + 1 * BLOCK]; 1534 t2 = region2[12] * a[12 + 2 * BLOCK]; 1535 t3 = region2[12] * a[12 + 3 * BLOCK]; 1536 1537 t0 = region2[13] * a[13 + 0 * BLOCK]; 1538 t1 = region2[13] * a[13 + 1 * BLOCK]; 1539 t2 = region2[13] * a[13 + 2 * BLOCK]; 1540 t3 = region2[13] * a[13 + 3 * BLOCK]; 1541 1542 t0 = region2[14] * a[14 + 0 * BLOCK]; 1543 t1 = region2[14] * a[14 + 1 * BLOCK]; 1544 t2 = region2[14] * a[14 + 2 * BLOCK]; 1545 t3 = region2[14] * a[14 + 3 * BLOCK]; 1546 1547 t0 = region2[15] * a[15 + 0 * BLOCK]; 1548 t1 = region2[15] * a[15 + 1 * BLOCK]; 1549 t2 = region2[15] * a[15 + 2 * BLOCK]; 1550 t3 = region2[15] * a[15 + 3 * BLOCK]; 1551 #endif 1552 region[0] = t0; 1553 region[1] = t1; 1554 region[2] = t2; 1555 region[3] = t3; 1556 a += 4 * BLOCK; 1557 region += 4; 1558 } 1559 } else { 1560 #endif 1561 for (j = 0; j < BLOCK; j++) { 1562 CoinWorkDouble t00 = region[j]; 1563 for (k = 0; k < n; ++k) { 1564 t00 = region2[k] * a[k + j * BLOCK]; 1565 } 1566 region[j] = t00; 1567 } 1590 1568 #ifdef BLOCKUNROLL 1591 } 1592 #endif 1593 } 1569 } 1570 #endif 1571 } 1572 1573 /* vi: softtabstop=2 shiftwidth=2 expandtab tabstop=2 1574 */
Note: See TracChangeset
for help on using the changeset viewer.