Changeset 2260 for trunk


Ignore:
Timestamp:
Mar 28, 2017 10:35:56 AM (3 years ago)
Author:
forrest
Message:

need loadu

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Clp/src/ClpPackedMatrix.cpp

    r2259 r2260  
    76497649      __m256d arrayX = _mm256_setzero_pd();
    76507650      for (int j=0;j<nel;j++) {
    7651         __m128i rows = _mm_load_si128((const __m128i *)row); // was loadu
     7651        __m128i rows = _mm_loadu_si128((const __m128i *)row);
    76527652        __m256d elements = _mm256_load_pd(element);
    76537653        __m256d pis = _mm256_i32gather_pd(pi,rows,8);
     
    78557855        }
    78567856      }
    7857     }
     7857    } 
    78587858  }
    78597859  int numberOld=numberNonZero;
     
    78787878      __m256d arrayX = _mm256_setzero_pd();
    78797879      for (int j=0;j<nel;j++) {
    7880         __m128i rows = _mm_load_si128((const __m128i *)row); // was loadu
     7880        __m128i rows = _mm_loadu_si128((const __m128i *)row);
    78817881        __m256d elements = _mm256_load_pd(element);
    78827882        __m256d pis = _mm256_i32gather_pd(pi,rows,8);
     
    78917891      __m512d arrayX = _mm512_setzero_pd();
    78927892      for (int j=0;j<nel;j++) {
    7893         __m256i rows = _mm256_load_si256((const __m256i *)row); // was loadu
     7893        __m256i rows = _mm256_loadu_si256((const __m256i *)row);
    78947894        __m512d elements = _mm512_load_pd(element);
    78957895        __m512d pis = _mm512_i32gather_pd(rows,pi,8);
     
    81948194        __m256d tempX = _mm256_setzero_pd();
    81958195        for (int j=0;j<nel;j++) {
    8196           __m128i rows = _mm_load_si128((const __m128i *)row); // was loadu
     8196          __m128i rows = _mm_loadu_si128((const __m128i *)row);
    81978197          __m256d elements = _mm256_load_pd(element);
    81988198          __m256d pis = _mm256_i32gather_pd(pi,rows,8);
     
    82088208        _mm256_store_pd(work2+i,arrayX);
    82098209      }
    8210 #else
     8210#else 
    82118211    assert(COIN_AVX2==8);
    82128212    __m512d zero = _mm512_setzero_pd();
     
    82178217        __m512d tempX = _mm512_setzero_pd();
    82188218        for (int j=0;j<nel;j++) {
    8219           __m256i rows = _mm256_load_si256((const __m256i *)row); // was loadu
     8219          __m256i rows = _mm256_loadu_si256((const __m256i *)row);
    82208220          __m512d elements = _mm512_load_pd(element);
    82218221          __m512d pis = _mm512_i32gather_pd(rows,pi,8);
Note: See TracChangeset for help on using the changeset viewer.