- Timestamp:
- Oct 13, 2011 8:05:42 AM (7 years ago)
- Location:
- branches/MPI/ADOL-C/src
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/MPI/ADOL-C/src/adolc_mpi.cpp
r273 r274 108 108 double *trade; 109 109 110 trade = (double*)myalloc1(h);110 trade = myalloc1(h); 111 111 for (i=0; i< count;i++ ) 112 112 trade[i] = buf[i].getValue(); … … 236 236 for(i=0; i < count*process_count;i++) 237 237 ADOLC_PUT_LOCINT(tmp_adoubles[i].loc()); 238 ADOLC_PUT_LOCINT(count*process_count); 239 } 238 } 239 ADOLC_PUT_LOCINT(count*process_count); 240 ADOLC_PUT_LOCINT(root); 241 ADOLC_PUT_LOCINT(id); 240 242 ADOLC_PUT_LOCINT(op); 241 243 … … 326 328 ADOLC_PUT_LOCINT(recvbuf[i].loc()); 327 329 ADOLC_PUT_LOCINT(count*process_count); 330 ADOLC_PUT_LOCINT(root); 331 ADOLC_PUT_LOCINT(id); 328 332 329 333 return ierr; … … 367 371 for(i=0; i< sendcount*process_count ;i++) 368 372 ADOLC_PUT_LOCINT(sendbuf[i].loc()); 369 ADOLC_PUT_LOCINT(sendcount*process_count);370 }373 } 374 ADOLC_PUT_LOCINT(sendcount*process_count); 371 375 ADOLC_PUT_LOCINT(root); 372 376 ADOLC_PUT_LOCINT(id); -
branches/MPI/ADOL-C/src/fo_rev.c
r273 r274 413 413 #if defined(_MPI_) 414 414 MPI_Status status_MPI; 415 int mpi_i,mpi_ii, myid, root, count, count2, *loc_recv, *loc_send; 415 int mpi_i,mpi_ii, myid, root, count, count2; 416 locint *loc_recv, *loc_send; 416 417 double *trade, *rec_buf; 417 int *trade_ulong, *rec_ulong, target, tag,mpi_op,use_reduce=0; 418 unsigned long int *trade_ulong, *rec_ulong; 419 int target, tag ,use_reduce=0; 420 ADOLC_MPI_Op mpi_op; 418 421 #endif 419 422 … … 1837 1840 /*--------------------------------------------------------------------------*/ 1838 1841 #if defined(_MPI_) 1839 case receive_data: // MPI-Send1840 tag = get_locint_r(); // tag1841 target = get_locint_r(); // dest1842 count = get_locint_r(); // count1843 loc_recv = (int*) malloc(count*sizeof(int));1844 for(mpi_i=0;mpi_i<count;mpi_i++)1845 loc_recv[mpi_i] = get_locint_r();1846 count2 = get_locint_r(); // count1847 #if !defined(_NTIGHT_)1848 trade = myalloc1(count);1849 for(mpi_i=0; mpi_i<count; mpi_i++){1850 trade[mpi_i] = rp_T[loc_recv[mpi_i]];1851 ADOLC_GET_TAYLOR(loc_recv[mpi_i]);1852 }1853 MPI_Send( trade , count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD);1854 myfree1(trade);1855 #endif1856 #if defined(_INT_REV_)1857 trade_ulong = (int*) malloc(count*p* sizeof(int));1858 for(mpi_i=0; mpi_i<count; mpi_i++){1859 for(l=0; l<p;l++){1860 trade_ulong[mpi_i*p+l] = upp_A[loc_recv[mpi_i]][l];1861 upp_A[loc_recv[mpi_i]][l]=0;1862 }1863 }1864 MPI_Send( trade_ulong , count*p, MPI_INT , target, tag , MPI_COMM_WORLD);1865 free(trade_ulong);1866 #endif1867 #if defined(_FOS_)1868 trade = myalloc1(count);1869 1870 for (mpi_i=0; mpi_i< count; mpi_i++) {1871 trade[mpi_i]=rp_A[loc_recv[mpi_i]];1872 rp_A[loc_recv[mpi_i]]=0;1873 }1874 MPI_Send( trade , count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD);1875 myfree1(trade);1876 #endif /* ALL_TOGETHER_AGAIN */1877 #if defined(_FOV_)1878 trade = myalloc1(p*count);1879 1880 n = 0;1881 for (mpi_i=0; mpi_i < count; mpi_i++){1882 FOR_0_LE_l_LT_p1883 {1884 trade[n]=rpp_A[loc_recv[mpi_i]][l];1885 rpp_A[loc_recv[mpi_i]][l]=0;1886 n++;1887 }1888 // ADOLC_GET_TAYLOR(loc_recv[mpi_i]);1889 }1890 MPI_Send( trade , count*p, MPI_DOUBLE , target, tag , MPI_COMM_WORLD);1891 myfree1(trade);1892 #endif /* ALL_TOGETHER_AGAIN */1893 free(loc_recv);1894 break;1895 /*--------------------------------------------------------------------------*/1896 1842 case send_data: // MPI-Send-Befehl 1897 1843 tag = get_locint_r(); // tag 1898 1844 target = get_locint_r(); // source 1899 1845 count = get_locint_r(); // count 1900 loc_recv = ( int*) malloc(count*sizeof(int));1846 loc_recv = (locint*) malloc(count*sizeof(locint)); 1901 1847 for(mpi_i=0;mpi_i<count;mpi_i++) 1902 1848 loc_recv[mpi_i] = get_locint_r(); 1903 1849 count2 = get_locint_r(); 1904 #if !defined(_NTIGHT_) 1905 trade = myalloc1(count); 1906 MPI_Recv( trade , count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 1850 1851 #if defined(_INT_REV_) 1852 #if defined(_TIGHT_) 1853 trade = myalloc1(count*(p+1)); 1854 MPI_Recv( trade , count*(p+1), MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 1855 mpi_ii=0; 1907 1856 for(mpi_i=0; mpi_i<count; mpi_i++){ 1908 rp_T[loc_recv[mpi_i]] = trade[mpi_i]; 1909 } 1910 myfree1(trade); 1911 #endif 1912 #if defined(_INT_REV_) 1913 trade_ulong = (int*) malloc(count*p*sizeof(int)); 1914 MPI_Recv( trade_ulong , count*p, MPI_INT , target, tag , MPI_COMM_WORLD, &status_MPI); 1857 rp_T[loc_recv[mpi_i]] = trade[mpi_ii]; 1858 mpi_ii++; 1859 for(l=0; l<p;l++,mpi_ii++) 1860 upp_A[loc_recv[mpi_i]][l] += (unsigned long int) trade[mpi_ii]; 1861 } 1862 free(trade); 1863 #else 1864 trade_ulong = (unsigned long int*) malloc(count*p*sizeof(unsigned long int)); 1865 MPI_Recv( trade_ulong , count*p, MPI_UNSIGNED_LONG , target, tag , MPI_COMM_WORLD, &status_MPI); 1866 mpi_ii=0; 1915 1867 for(mpi_i=0; mpi_i<count; mpi_i++){ 1916 for(l=0; l<p;l++){ 1917 upp_A[loc_recv[mpi_i]][l]+=trade_ulong[mpi_i*p+l]; 1918 } 1868 for(l=0; l<p;l++,mpi_ii++) 1869 upp_A[loc_recv[mpi_i]][l] += trade[mpi_ii]; 1919 1870 } 1920 1871 free(trade_ulong); 1921 1872 #endif 1873 #endif 1922 1874 #if defined(_FOS_) 1923 trade = myalloc1(count );1924 MPI_Recv( trade , count , MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI);1875 trade = myalloc1(count*2); 1876 MPI_Recv( trade , count*2, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 1925 1877 1926 1878 for (mpi_i=0; mpi_i < count; mpi_i++) { 1927 rp_A[loc_recv[mpi_i]] += trade[mpi_i ]; 1928 } 1929 myfree1(trade); 1879 rp_T[loc_recv[mpi_i]] = trade[2*mpi_i ]; 1880 rp_A[loc_recv[mpi_i]]+= trade[2*mpi_i+1]; 1881 } 1882 free(trade); 1930 1883 #endif 1931 1884 #if defined(_FOV_) 1932 trade = myalloc1(count*p); 1933 MPI_Recv( trade ,p*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 1934 1935 n = 0; 1936 for ( mpi_i=0; mpi_i < count ; mpi_i++ ) 1937 FOR_0_LE_l_LT_p 1938 { 1939 rpp_A[loc_recv[mpi_i]][l] += trade[n]; 1940 n++; 1941 } 1942 1943 myfree1(trade); 1885 trade = myalloc1(count*(p+1)); 1886 MPI_Recv( trade ,(p+1)*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 1887 1888 mpi_ii = 0; 1889 for( mpi_i=0; mpi_i < count ; mpi_i++ ){ 1890 rp_T[loc_recv[mpi_i]] = trade[mpi_ii]; 1891 mpi_ii++; 1892 for(l=0;l<p;l++,mpi_ii++) 1893 rpp_A[loc_recv[mpi_i]][l] += trade[mpi_ii]; 1894 } 1895 free(trade); 1944 1896 #endif 1945 1897 free(loc_recv); 1946 1898 break; 1899 /*--------------------------------------------------------------------------*/ 1900 case receive_data: // MPI-Send 1901 tag = get_locint_r(); // tag 1902 target = get_locint_r(); // dest 1903 count = get_locint_r(); // count 1904 loc_recv = (locint*) malloc(count*sizeof(locint)); 1905 for(mpi_i=0;mpi_i<count;mpi_i++) 1906 loc_recv[mpi_i] = get_locint_r(); 1907 count2 = get_locint_r(); // count 1908 1909 #if defined(_INT_REV_) 1910 #if defined(_TIGHT_) 1911 trade = myalloc1(count*(p+1)); 1912 mpi_ii=0; 1913 for(mpi_i=0; mpi_i<count; mpi_i++){ 1914 trade[mpi_ii] = rp_T[loc_recv[mpi_i]]; 1915 mpi_ii++; 1916 for(l=0; l<p;l++,mpi_ii++) 1917 trade[mpi_ii] = (double) upp_A[loc_recv[mpi_i]][l]; 1918 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 1919 for(l=0; l<p;l++,mpi_ii++) 1920 upp_A[loc_recv[mpi_i]][l] = 0; 1921 } 1922 MPI_Send(trade,(p+1)*count,MPI_DOUBLE,target,tag,MPI_COMM_WORLD); 1923 free(trade); 1924 #else 1925 trade_ulong = (unsigned long int*) malloc(count*p*sizeof(unsigned long int)); 1926 mpi_ii=0; 1927 for(mpi_i=0; mpi_i<count; mpi_i++){ 1928 for(l=0; l<p;l++,mpi_ii++){ 1929 trade_ulong[mpi_ii] = upp_A[loc_recv[mpi_i]][l]; 1930 upp_A[loc_recv[mpi_i]][l] = 0; 1931 } 1932 } 1933 MPI_Send(trade_ulong,p*count,MPI_UNSIGNED_LONG,target,tag,MPI_COMM_WORLD); 1934 free(trade_ulong); 1935 #endif 1936 #endif 1937 #if defined(_FOS_) 1938 trade = myalloc1(count*2); 1939 for(mpi_i=0; mpi_i<count; mpi_i++){ 1940 trade[2*mpi_ii] = rp_T[loc_recv[mpi_i]]; 1941 trade[2*mpi_ii+1] = rp_A[loc_recv[mpi_i]]; 1942 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 1943 rp_A[loc_recv[mpi_i]] = 0.; 1944 } 1945 MPI_Send(trade,2*count,MPI_DOUBLE,target,tag,MPI_COMM_WORLD); 1946 free(trade); 1947 #endif 1948 #if defined(_FOV_) 1949 trade = myalloc1(count*(p+1)); 1950 mpi_ii=0; 1951 for(mpi_i=0; mpi_i<count; mpi_i++){ 1952 trade[mpi_ii] = rp_T[loc_recv[mpi_i]]; 1953 mpi_ii++; 1954 for(l=0; l<p;l++,mpi_ii++) 1955 trade[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 1956 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 1957 for(l=0; l<p;l++,mpi_ii++) 1958 rpp_A[loc_recv[mpi_i]][l] = 0.; 1959 } 1960 MPI_Send(trade,(p+1)*count,MPI_DOUBLE,target,tag,MPI_COMM_WORLD); 1961 free(trade); 1962 #endif 1963 free(loc_recv); 1964 break; 1947 1965 /*--------------------------------------------------------------------------*/ 1948 1966 case barrier_op: … … 1954 1972 root = get_locint_r(); // root 1955 1973 count = get_locint_r(); // count 1956 loc_recv = ( int*) malloc(count*sizeof(int));1974 loc_recv = (locint*) malloc(count*sizeof(locint)); 1957 1975 for(mpi_i=0;mpi_i<count;mpi_i++) 1958 1976 loc_recv[mpi_i] = get_locint_r(); // Recv Buffer 1959 1977 count2 = get_locint_r(); // count 1960 #if !defined(_NTIGHT_) 1961 trade = myalloc1(count); 1978 count2 *= process_count; 1979 1980 #if defined(_INT_REV_) 1981 #if defined(_TIGHT_) 1982 trade = myalloc1(count*(p+1)); 1983 rec_buf = NULL; 1962 1984 if (myid == root) 1963 rec_buf = myalloc1(count*process_count); 1985 rec_buf = myalloc1(count2*(p+1)); 1986 1987 mpi_ii=0; 1988 for(mpi_i=0; mpi_i < count; mpi_i++) { 1989 trade[mpi_ii] = rp_T[loc_recv[mpi_i]]; 1990 mpi_ii++; 1991 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 1992 for(l=0; l<p;l++,mpi_ii++){ 1993 trade[p*mpi_i+l] = (double) upp_A[loc_recv[mpi_i]][l]; 1994 upp_A[loc_recv[mpi_i]][l] = 0; 1995 } 1996 } 1997 MPI_Gather( trade, count*(p+1), MPI_DOUBLE, rec_buf ,count*(p+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 1998 free(trade); 1999 if (myid == root){ 2000 mpi_ii=0; 2001 for(arg=0; arg< process_count; arg++) 2002 for (mpi_i=0; mpi_i < count; mpi_i++) { 2003 rp_T[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 2004 mpi_ii++; 2005 for(l=0; l<p;l++,mpi_ii++) 2006 upp_A[loc_recv[mpi_i]][l] += (unsigned long int) trade[mpi_ii]; 2007 } 2008 free(rec_buf); 2009 } 2010 #else 2011 trade_ulong = (unsigned long int*) malloc(count*p*sizeof(unsigned long int)); 2012 if (myid == root) 2013 rec_ulong = (unsigned long int*) malloc(count2*p*sizeof(unsigned long int)); 2014 else 2015 rec_ulong = NULL; 2016 mpi_ii=0; 2017 for(mpi_i=0; mpi_i < count; mpi_i++) { 2018 for(l=0;l<p;l++,mpi_ii++){ 2019 trade_ulong[mpi_ii] = upp_A[loc_recv[mpi_i]][l]; 2020 upp_A[loc_recv[mpi_i]][l]=0; 2021 } 2022 } 2023 MPI_Reduce( trade_ulong , rec_ulong ,p*count, MPI_DOUBLE , MPI_SUM , root, MPI_COMM_WORLD); 2024 if (myid == root){ 2025 mpi_ii=0; 2026 for(mpi_i=0; mpi_i < count; mpi_i++) { 2027 for(l=0;l<p;l++,mpi_ii++) 2028 upp_A[loc_recv[mpi_i]][l] += rec_ulong[mpi_ii]; 2029 } 2030 free(rec_ulong); 2031 } 2032 free(trade_ulong); 2033 #endif 2034 #endif 2035 #if defined(_FOS_) 2036 trade = myalloc1(count*2); 2037 if (myid == root) 2038 rec_buf = myalloc1(count2*2); 1964 2039 else 1965 2040 rec_buf = NULL; 1966 2041 for (mpi_i=0; mpi_i < count; mpi_i++) { 1967 trade[mpi_i] = rp_T[loc_recv[mpi_i]] ; 1968 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 1969 } 1970 MPI_Gather( trade, count, MPI_DOUBLE, rec_buf ,count, MPI_DOUBLE, root, MPI_COMM_WORLD); 2042 trade[2*mpi_i] = rp_T[loc_recv[mpi_i]]; 2043 trade[2*mpi_i+1] = rp_A[loc_recv[mpi_i]]; 2044 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 2045 rp_A[loc_recv[mpi_i]]=0.; 2046 } 2047 MPI_Gather( trade, count*2, MPI_DOUBLE, rec_buf ,count*2, MPI_DOUBLE, root, MPI_COMM_WORLD); 2048 free(trade); 1971 2049 if (myid == root){ 1972 for (mpi_i=0; mpi_i < count; mpi_i++) { 1973 rp_T[loc_recv[mpi_i]] = rec_buf[(process_count-1)*count+mpi_i]; 1974 } 1975 myfree1(rec_buf); 1976 } 1977 myfree1(trade); 1978 #endif 1979 #if defined(_INT_REV_) 1980 trade_ulong = (int*) malloc(count*p*sizeof(int)); 2050 mpi_ii=0; 2051 for(arg=0; arg< process_count; arg++) 2052 for(mpi_i=0; mpi_i < count; mpi_i++) { 2053 rp_T[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 2054 rp_A[loc_recv[mpi_i]]+= rec_buf[mpi_ii+1]; 2055 mpi_ii +=2; 2056 } 2057 free(rec_buf); 2058 } 2059 #endif 2060 #if defined(_FOV_) 2061 trade = myalloc1(count*(p+1)); 1981 2062 if (myid == root) 1982 rec_ulong = (int*) malloc(count*p*process_count*sizeof(int)); 1983 else 1984 rec_ulong = NULL; 1985 for (mpi_i=0; mpi_i < count; mpi_i++) { 1986 FOR_0_LE_l_LT_p{ 1987 trade_ulong[p*mpi_i+l] = upp_A[loc_recv[mpi_i]][l]; 1988 upp_A[loc_recv[mpi_i]][l]=0; 1989 } 1990 } 1991 MPI_Reduce( trade_ulong , rec_ulong ,p*count, MPI_DOUBLE , MPI_SUM , root, MPI_COMM_WORLD); 1992 if (myid == root){ 1993 for (mpi_i=0; mpi_i < count; mpi_i++) { 1994 FOR_0_LE_l_LT_p 1995 upp_A[loc_recv[mpi_i]][l] += rec_ulong[p*mpi_i +l]; 1996 } 1997 free(rec_ulong); 1998 } 1999 free(trade_ulong); 2000 #endif 2001 #if defined(_FOS_) 2002 trade = myalloc1(count); 2003 if (myid == root) 2004 rec_buf = myalloc1(count); 2063 rec_buf = myalloc1(count2*(p+1)); 2005 2064 else 2006 2065 rec_buf = NULL; 2066 mpi_ii=0; 2007 2067 for (mpi_i=0; mpi_i < count; mpi_i++) { 2008 trade[mpi_i] = rp_A[loc_recv[mpi_i]]; 2009 rp_A[loc_recv[mpi_i]]=0; 2010 } 2011 MPI_Reduce( trade , rec_buf ,count, MPI_DOUBLE , MPI_SUM , root, MPI_COMM_WORLD); 2068 trade[mpi_ii] = rp_T[loc_recv[mpi_i]]; 2069 mpi_ii++; 2070 for(l=0;l<p;l++,mpi_ii++){ 2071 trade[mpi_i+1] = rpp_A[loc_recv[mpi_i]][l]; 2072 rpp_A[loc_recv[mpi_i]][l]=0.; 2073 } 2074 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 2075 } 2076 MPI_Gather( trade, count*(p+1), MPI_DOUBLE, rec_buf ,count*(p+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 2077 free(trade); 2012 2078 if (myid == root){ 2013 for (mpi_i=0; mpi_i < count; mpi_i++) { 2014 rp_A[loc_recv[mpi_i]] += rec_buf[mpi_i]; 2015 } 2016 myfree1(rec_buf); 2017 } 2018 myfree1(trade); 2019 #endif 2020 #if defined(_FOV_) 2021 trade = myalloc1(count*p); 2022 if (myid==root) 2023 rec_buf = myalloc1(p*count); 2024 else 2025 rec_buf = NULL; 2026 n=0; 2027 for (mpi_i=0; mpi_i < count; mpi_i++){ 2028 FOR_0_LE_l_LT_p{ 2029 trade[n] = rpp_A[loc_recv[mpi_i]][l]; 2030 rpp_A[loc_recv[mpi_i]][l] = 0; 2031 n++; 2032 } 2033 } 2034 MPI_Reduce( trade , rec_buf ,p*count, MPI_DOUBLE , MPI_SUM , root, MPI_COMM_WORLD); 2035 if (myid == root){ 2036 for(mpi_i=0;mpi_i<count;mpi_i++) 2037 rp_T[loc_recv[mpi_i]] = rec_buf[mpi_i]; 2038 n=0; 2039 for (mpi_i=0; mpi_i < count; mpi_i++){ 2040 FOR_0_LE_l_LT_p{ 2041 rpp_A[loc_recv[mpi_i]][l] += rec_buf[n]; 2042 n++; 2043 } 2044 } 2045 myfree1(rec_buf); 2046 } 2047 myfree1(trade); 2079 mpi_ii=0; 2080 for(arg=0; arg< process_count; arg++) 2081 for(mpi_i=0; mpi_i < count; mpi_i++) { 2082 rp_T[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 2083 mpi_ii++; 2084 for(l=0;l<p;l++,mpi_ii++) 2085 rpp_A[loc_recv[mpi_i]][l] +=rec_buf[mpi_ii]; 2086 } 2087 free(rec_buf); 2088 } 2048 2089 #endif 2049 2090 free(loc_recv); 2050 2091 break; 2051 2092 case reduce: 2052 use_reduce=1; 2093 use_reduce=0; 2094 mpi_op = get_locint_r(); 2095 if(mpi_op == ADOLC_MPI_SUM) use_reduce=1; 2053 2096 case gather: 2054 if(use_reduce==1) mpi_op = get_locint_r();2055 if(all_root == mpi_id){2056 count2 = get_locint_r(); // count*process_count2057 loc_recv = (int*) malloc (count2*sizeof(int));2058 /* Must use an additional value to send the right locints back */2059 if (use_reduce==1){2060 if (mpi_op == ADOLC_MPI_SUM){2061 for(mpi_i=0;mpi_i<count2;mpi_i++)2062 loc_recv[mpi_i] = get_locint_r(); // Receive Buffer2063 } else {2064 for(mpi_i=0;mpi_i<count2;mpi_i++)2065 loc_recv[count2-1-mpi_i] = get_locint_r(); // Receive Buffer2066 }2067 }2068 }2069 count2 = get_locint_r(); // count*process_count2070 2097 myid = get_locint_r(); // process id 2071 2098 root = get_locint_r(); // root 2099 count2 = get_locint_r(); // count*process_count 2100 if (root == myid){ 2101 loc_recv = (locint*) malloc (count2*sizeof(locint)); 2102 for(mpi_i=0;mpi_i<count2;mpi_i++) 2103 loc_recv[count2-1-mpi_i] = get_locint_r(); // Receive Buffer 2104 } 2105 arg = get_locint_r(); // count*process_count 2106 arg = get_locint_r(); // process id 2107 arg = get_locint_r(); // root 2072 2108 count = get_locint_r(); // count 2073 loc_send = (int*) calloc(count,sizeof(int)); 2074 if (use_reduce==1){ 2075 if (mpi_op == ADOLC_MPI_SUM){ 2076 for(mpi_i=0;mpi_i<count;mpi_i++) 2077 loc_send[mpi_i] = get_locint_r(); // Send Buffer 2078 } else { 2079 for(mpi_i=0;mpi_i<count;mpi_i++) 2080 loc_send[count-1-mpi_i] = get_locint_r(); // Send Buffer 2081 } 2082 } 2109 loc_send = (locint*) calloc(count,sizeof(locint)); 2083 2110 for(mpi_i=0;mpi_i<count;mpi_i++) 2084 loc_send[ mpi_i] = get_locint_r(); // Send Buffer2111 loc_send[count-1-mpi_i] = get_locint_r(); // Send Buffer 2085 2112 arg = get_locint_r(); // count 2086 #if !defined(_NTIGHT_) 2087 trade = myalloc1(count); 2113 2114 #if defined(_INT_REV_) 2115 #if defined(_TIGHT_) 2116 trade = myalloc1(count*(p+1)); 2088 2117 rec_buf = NULL; 2089 if(myid == root){ 2090 rec_buf = myalloc1(count2); 2091 for (mpi_i=0; mpi_i < count2; mpi_i++) 2092 rec_buf[mpi_i] = rp_T[loc_recv[mpi_i]]; 2093 for (mpi_i=0; mpi_i < count2; mpi_i++) 2094 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 2118 if (myid == root){ 2119 rec_buf = myalloc1(count2*(p+1)); 2120 mpi_ii=0; 2121 for(mpi_i=0; mpi_i < count2; mpi_i++) { 2122 rec_buf[mpi_ii] = rp_T[loc_recv[mpi_i]]; 2123 mpi_ii++; 2124 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 2125 for(l=0;l<p;l++,mpi_ii++){ 2126 rec_buf[mpi_ii] = (double) upp_A[loc_recv[mpi_i]][l]; 2127 upp_A[loc_recv[mpi_i]][l]=0; 2128 } 2129 } 2095 2130 } 2096 MPI_Scatter(rec_buf,count,MPI_DOUBLE,trade,count,MPI_DOUBLE, root,MPI_COMM_WORLD); 2097 2098 for (mpi_i=0; mpi_i < count; mpi_i++) { 2099 rp_T[loc_send[mpi_i]] = trade[mpi_i]; 2100 } 2101 if(myid == root) myfree1(rec_buf); 2102 myfree1(trade); 2103 #endif 2104 #if defined(_INT_REV_) 2105 trade_ulong = (int*) malloc(count*p*sizeof(int)); 2131 MPI_Scatter(rec_buf,count*(p+1),MPI_DOUBLE,trade,count*(p+1),MPI_DOUBLE, root,MPI_COMM_WORLD); 2132 if (myid == root) 2133 free(rec_buf); 2134 mpi_ii=0; 2135 for(mpi_i=0; mpi_i < count; mpi_i++) { 2136 rp_T[loc_send[mpi_i]] = trade[mpi_ii]; 2137 mpi_ii++; 2138 for(l=0;l<p;l++,mpi_ii++) 2139 upp_A[loc_send[mpi_i]][l] += (unsigned long int) trade[mpi_ii]; 2140 } 2141 free(trade); 2142 #else /* NTIGHT */ 2143 trade_ulong = (unsigned long int*) malloc(count*p*sizeof(unsigned long int)); 2106 2144 rec_ulong = NULL; 2107 2145 if(myid == root){ 2108 rec_ulong = (int*) malloc(count2*p*sizeof(int)); 2109 for (mpi_i=0; mpi_i < count2; mpi_i++) { 2110 FOR_0_LE_l_LT_p{ 2111 rec_ulong[p*mpi_i+l] = upp_A[loc_recv[mpi_i]][l]; 2146 rec_ulong = (unsigned long int*) malloc(count2*p*sizeof(unsigned long int)); 2147 mpi_ii=0; 2148 for (mpi_i=0; mpi_i < count2; mpi_i++){ 2149 for(l=0;l<p;l++,mpi_ii++){ 2150 rec_ulong[mpi_ii] = upp_A[loc_recv[mpi_i]][l]; 2112 2151 upp_A[loc_recv[mpi_i]][l]=0; 2113 2152 } 2114 2153 } 2115 2154 } 2116 MPI_Scatter(rec_ulong,count*p,MPI_DOUBLE,trade_ulong,count*p,MPI_DOUBLE, root,MPI_COMM_WORLD); 2117 2118 for (mpi_i=0; mpi_i < count; mpi_i++) { 2119 FOR_0_LE_l_LT_p 2120 upp_A[loc_send[mpi_i]][l] += trade_ulong[p*mpi_i+l]; 2121 } 2122 if(myid == root) free(rec_ulong); 2155 MPI_Scatter(rec_ulong,count*p,MPI_UNSIGNED_LONG,trade_ulong,count*p,MPI_UNSIGNED_LONG, root,MPI_COMM_WORLD); 2156 if (myid == root) 2157 free(rec_ulong); 2158 mpi_ii=0; 2159 for(mpi_i=0; mpi_i < count; mpi_i++){ 2160 for(l=0;l<p;l++,mpi_ii++) 2161 upp_A[loc_send[mpi_i]][l] += trade_ulong[mpi_ii]; 2162 } 2123 2163 free(trade_ulong); 2124 2164 #endif 2165 #endif 2125 2166 #if defined(_FOS_) 2126 trade = myalloc1(count );2167 trade = myalloc1(count*2); 2127 2168 rec_buf = NULL; 2128 2169 if(myid == root){ 2129 rec_buf = myalloc1(count2 );2170 rec_buf = myalloc1(count2*2); 2130 2171 for (mpi_i=0; mpi_i < count2; mpi_i++) { 2131 rec_buf[mpi_i] = rp_A[loc_recv[mpi_i]]; 2132 rp_A[loc_recv[mpi_i]]=0; 2172 rec_buf[2*mpi_i] = rp_T[loc_recv[mpi_i]]; 2173 rec_buf[2*mpi_i+1] = rp_A[loc_recv[mpi_i]]; 2174 ADOLC_GET_TAYLOR(loc_recv[mpi_i]) 2175 rp_A[loc_recv[mpi_i]]=0.; 2133 2176 } 2134 2177 } 2135 2178 MPI_Scatter(rec_buf,count,MPI_DOUBLE,trade,count,MPI_DOUBLE, root,MPI_COMM_WORLD); 2136 2179 if (myid == root) 2180 free(rec_buf); 2137 2181 for (mpi_i=0; mpi_i < count; mpi_i++) { 2138 rp_ A[loc_send[mpi_i]] += trade[mpi_i];2139 }2140 if(myid == root) myfree1(rec_buf);2141 myfree1(trade);2182 rp_T[loc_send[mpi_i]] = trade[2*mpi_i]; 2183 rp_A[loc_send[mpi_i]] += trade[2*mpi_i+1]; 2184 } 2185 free(trade); 2142 2186 #endif 2143 2187 #if defined(_FOV_) 2144 trade = myalloc1(count* p);2188 trade = myalloc1(count*(p+1)); 2145 2189 rec_buf = NULL; 2146 2190 if(myid == root){ 2147 rec_buf = myalloc1(count2*p); 2148 n= 0; 2149 for (mpi_i=0; mpi_i < count2; mpi_i++){ 2150 FOR_0_LE_l_LT_p { 2151 rec_buf[n] = rpp_A[loc_recv[mpi_i]][l]; 2152 rpp_A[loc_recv[mpi_i]][l]=0; 2153 n++; 2191 rec_buf = myalloc1(count2*(p+1)); 2192 mpi_ii=0; 2193 for(mpi_i=0; mpi_i < count2; mpi_i++){ 2194 rec_buf[mpi_ii] = rp_T[loc_recv[mpi_i]]; 2195 mpi_ii++; 2196 ADOLC_GET_TAYLOR(loc_recv[mpi_i]) 2197 for(l=0;l<p;l++,mpi_ii++){ 2198 rec_buf[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2199 rpp_A[loc_recv[mpi_i]][l]=0.; 2154 2200 } 2155 2201 } 2156 2202 } 2157 MPI_Scatter(rec_buf,count*p,MPI_DOUBLE,trade,count*p,MPI_DOUBLE, root,MPI_COMM_WORLD); 2158 2159 n=0; 2203 MPI_Scatter(rec_buf,count*(p+1),MPI_DOUBLE,trade,count*(p+1),MPI_DOUBLE, root,MPI_COMM_WORLD); 2204 if (myid == root) 2205 free(rec_buf); 2206 mpi_ii=0; 2160 2207 for (mpi_i=0; mpi_i < count; mpi_i++){ 2161 FOR_0_LE_l_LT_p{2162 rpp_A[loc_send[mpi_i]][l] += trade[n];2163 n++;2164 }2165 } 2166 if(myid == root) myfree1(rec_buf);2167 myfree1(trade); 2168 #endif 2169 if(myid == root )free(loc_recv);2208 rp_T[loc_send[mpi_i]] = trade[mpi_ii]; 2209 mpi_ii++; 2210 for(l=0;l<p;l++,mpi_ii++) 2211 rpp_A[loc_send[mpi_i]][l] += trade[mpi_ii]; 2212 } 2213 free(trade); 2214 #endif 2215 if (myid == root ) 2216 free(loc_recv); 2170 2217 free(loc_send); 2171 2218 use_reduce=0; … … 2174 2221 case scatter: 2175 2222 count2 = get_locint_r(); // recvcount (count) 2176 loc_recv = ( int*) malloc(count2*sizeof(int));2223 loc_recv = (locint*) malloc(count2*sizeof(locint)); 2177 2224 for(mpi_i=0;mpi_i<count2;mpi_i++) 2178 2225 loc_recv[count2-1-mpi_i] = get_locint_r(); // recv Buffer … … 2180 2227 myid = get_locint_r(); // process id 2181 2228 root = get_locint_r(); // root 2229 count = get_locint_r(); // sendcount (count*process_count) 2182 2230 if(myid==root){ 2183 count = get_locint_r(); // sendcount (count*process_count) 2184 loc_send = (int*) malloc(count*sizeof(int)); 2231 loc_send = (locint*) malloc(count*sizeof(locint)); 2185 2232 for(mpi_i=0;mpi_i<count;mpi_i++) 2186 2233 loc_send[count-1-mpi_i]= get_locint_r(); … … 2189 2236 res = get_locint_r(); // root 2190 2237 res = get_locint_r(); // sendcount (count*process_count) 2191 #if !defined(_NTIGHT_) 2192 rec_buf = myalloc1(count2); 2193 trade = NULL; 2194 if(myid == root) 2195 trade = myalloc1(count); 2196 for (mpi_i=0; mpi_i < count2; mpi_i++) { 2197 rec_buf[mpi_i] = rp_T[loc_recv[mpi_i]]; 2238 2239 #if defined(_INT_REV_) 2240 #if defined(_TIGHT_) 2241 rec_buf = myalloc1(count2*(p+1)); 2242 trade = NULL; 2243 if (myid == root) 2244 trade = myalloc1(count*(p+1)); 2245 mpi_ii=0; 2246 for(mpi_i=0; mpi_i< count2; mpi_i++){ 2247 rec_buf[mpi_ii] = rp_T[loc_recv[mpi_i]]; 2248 mpi_ii++; 2198 2249 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 2199 } 2200 2201 MPI_Gather(rec_buf , count2, MPI_DOUBLE,trade,count2, MPI_DOUBLE, root, MPI_COMM_WORLD); 2202 2203 if(myid == root){ 2204 for (mpi_i=0; mpi_i < count; mpi_i++) { 2205 rp_T[loc_send[mpi_i]] = trade[mpi_i]; 2206 } 2207 myfree1(trade); 2208 } 2209 myfree1(rec_buf); 2210 #endif 2211 #if defined(_INT_REV_) 2212 rec_ulong = (int*) malloc(count2*p*sizeof(int)); 2213 trade_ulong = NULL; 2214 if(myid == root) 2215 trade_ulong = (int*) malloc(count*p*sizeof(int)); 2216 2217 for (mpi_i=0; mpi_i < count2; mpi_i++) { 2218 FOR_0_LE_l_LT_p{ 2219 rec_ulong[p*mpi_i+l] = upp_A[loc_recv[mpi_i]][l]; 2220 upp_A[loc_recv[mpi_i]][l]=0; 2221 } 2222 } 2223 2250 for(l=0;l<p;l++,mpi_ii++){ 2251 rec_buf[mpi_ii] = (double) upp_A[loc_recv[mpi_i]][l]; 2252 upp_A[loc_recv[mpi_i]] = 0; 2253 } 2254 } 2255 MPI_Gather(rec_buf , count2*(p+1), MPI_DOUBLE,trade,count2*(p+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 2256 free(rec_buf); 2257 if (myid == root){ 2258 mpi_ii=0; 2259 for( mpi_i=0; mpi_i< count; mpi_i++){ 2260 rp_T[loc_send[mpi_i]] = trade[mpi_ii]; 2261 mpi_ii++; 2262 for(l=0;l<p;l++,mpi_ii++) 2263 upp_A[loc_send[mpi_i]] += (unsigned long int) trade[mpi_ii]; 2264 } 2265 free(trade); 2266 } 2267 #else 2268 rec_ulong = (unsigned long int*) malloc(count2*p*sizeof(unsigned long int)); 2269 trade_ulong = NULL; 2270 if(myid == root) 2271 trade_ulong = (unsigned long int*) malloc(count*p*sizeof(unsigned long int)); 2272 mpi_ii=0; 2273 for(mpi_i=0; mpi_i < count2; mpi_i++){ 2274 for(l=0;l<p;l++,mpi_ii++){ 2275 rec_ulong[mpi_ii] = upp_A[loc_recv[mpi_i]][l]; 2276 upp_A[loc_recv[mpi_i]][l]=0; 2277 } 2278 } 2224 2279 MPI_Gather(rec_ulong , count2*p, MPI_DOUBLE,trade_ulong,count2*p, MPI_DOUBLE, root, MPI_COMM_WORLD); 2225 2226 if(myid == root){ 2227 for (mpi_i=0; mpi_i < count; mpi_i++) { 2228 FOR_0_LE_l_LT_p 2229 upp_A[loc_send[mpi_i]][l] += trade_ulong[p*mpi_i+l]; 2280 free(rec_ulong); 2281 mpi_ii=0; 2282 if (myid == root ){ 2283 for(mpi_i=0; mpi_i < count; mpi_i++){ 2284 for(l=0;l<p;l++,mpi_ii++) 2285 upp_A[loc_send[mpi_i]][l] += trade_ulong[mpi_ii]; 2230 2286 } 2231 2287 free(trade_ulong); 2232 2288 } 2233 free(rec_ulong); 2289 #endif 2234 2290 #endif 2235 2291 #if defined(_FOS_) 2236 rec_buf = myalloc1(count2); 2292 rec_buf = myalloc1(count2*2); 2293 trade = NULL; 2294 if (myid == root) 2295 trade = myalloc1(count*2); 2296 2297 for (mpi_i=0; mpi_i < count2; mpi_i++) { 2298 rec_buf[2*mpi_i] = rp_T[loc_recv[mpi_i]]; 2299 rec_buf[2*mpi_i+1] = rp_A[loc_recv[mpi_i]]; 2300 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 2301 rp_A[loc_recv[mpi_i]]=0; 2302 } 2303 MPI_Gather( rec_buf ,count2*2, MPI_DOUBLE,trade,count2*2,MPI_DOUBLE, root, MPI_COMM_WORLD); 2304 free(rec_buf); 2305 if(myid == root){ 2306 for(mpi_i=0; mpi_i < count; mpi_i++) { 2307 rp_T[loc_send[mpi_i]] = trade[2*mpi_i]; 2308 rp_A[loc_send[mpi_i]] += trade[2*mpi_i+1]; 2309 } 2310 free(trade); 2311 } 2312 #endif 2313 #if defined(_FOV_) 2314 rec_buf = myalloc1(count2*(p+1)); 2237 2315 trade = NULL; 2238 2316 if(myid == root) 2239 trade = myalloc1(count); 2240 2241 for (mpi_i=0; mpi_i < count2; mpi_i++) { 2242 rec_buf[mpi_i] = rp_A[loc_recv[ mpi_i]]; 2243 rp_A[loc_recv[mpi_i]]=0; 2244 } 2245 2246 MPI_Gather( rec_buf ,count2, MPI_DOUBLE,trade,count2,MPI_DOUBLE, root, MPI_COMM_WORLD); 2247 if(myid == root){ 2248 for (mpi_i=0; mpi_i < count; mpi_i++) { 2249 rp_A[loc_send[mpi_i]] += trade[mpi_i]; 2317 trade = myalloc1(count*(p+1)); 2318 2319 mpi_ii=0; 2320 for(mpi_i=0; mpi_i< count2; mpi_i++){ 2321 rec_buf[mpi_ii] = rp_T[loc_recv[mpi_i]]; 2322 mpi_ii++; 2323 ADOLC_GET_TAYLOR(loc_recv[mpi_i]); 2324 for(l=0;l<p;l++,mpi_ii++){ 2325 rec_buf[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2326 rpp_A[loc_recv[mpi_i]][l] = 0.; 2250 2327 } 2251 myfree1(trade); 2252 } 2253 myfree1(rec_buf); 2254 #endif 2255 #if defined(_FOV_) 2256 rec_buf = myalloc1(count2*p); 2257 trade = NULL; 2258 if(myid == root) 2259 trade = myalloc1(count*p); 2260 2261 n=0; 2262 for (mpi_i=0; mpi_i < count2; mpi_i++) { 2263 FOR_0_LE_l_LT_p{ 2264 rec_buf[n] = rpp_A[loc_recv[mpi_i]][l]; 2265 rpp_A[loc_recv[mpi_i]][l]=0; 2266 n++; 2328 } 2329 MPI_Gather(rec_buf , count2*(p+1), MPI_DOUBLE,trade,count2*(p+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 2330 free(rec_buf); 2331 if (myid == root){ 2332 mpi_ii=0; 2333 for( mpi_i=0; mpi_i< count; mpi_i++){ 2334 rp_T[loc_send[mpi_i]] = trade[mpi_ii]; 2335 mpi_ii++; 2336 for(l=0;l<p;l++,mpi_ii++) 2337 rpp_A[loc_send[mpi_i]][l] += trade[mpi_ii]; 2267 2338 } 2268 } 2269 2270 MPI_Gather( rec_buf ,p*count2, MPI_DOUBLE,trade,p*count2,MPI_DOUBLE, root, MPI_COMM_WORLD); 2271 if(myid == root){ 2272 n=0; 2273 for (mpi_i=0; mpi_i < count; mpi_i++) 2274 FOR_0_LE_l_LT_p{ 2275 rpp_A[loc_send[mpi_i]][l] += trade[n]; 2276 n++; 2277 } 2278 myfree1(trade); 2279 } 2280 myfree1(rec_buf); 2281 #endif 2282 if(myid == root) free(loc_send); 2339 free(trade); 2340 } 2341 #endif 2342 if (myid == root) 2343 free(loc_send); 2283 2344 free(loc_recv); 2284 2345 break; -
branches/MPI/ADOL-C/src/ho_rev.c
r273 r274 648 648 MPI_Status status_MPI; 649 649 double *trade, *rec_buf; 650 int mpi_i, myid, root, count,count2, *loc_recv, *loc_send; 650 int mpi_i,mpi_ii, myid, root, count,count2; 651 locint *loc_recv, *loc_send; 651 652 int target, tag, use_reduce=0; 652 653 ADOLC_MPI_Op mpi_op; … … 2149 2150 2150 2151 #if defined(_MPI_) 2151 case receive_data: // MPI-Send 2152 case send_data: // MPI-Send-Befehl 2153 tag = get_locint_r(); // tag 2154 target = get_locint_r(); // source 2155 count = get_locint_r(); // count 2156 loc_recv = (locint*) malloc(count*sizeof(locint)); 2157 for(mpi_i=0;mpi_i<count;mpi_i++) 2158 loc_recv[mpi_i] = get_locint_r(); 2159 count2 = get_locint_r(); 2160 #if defined(_HOS_) 2161 trade = myalloc1((k+k1)*count); 2162 MPI_Recv( trade , (k+k1)*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 2163 mpi_ii=0; 2164 for(mpi_i=0; mpi_i< count ;mpi_i++){ 2165 for(l=0;l<k;l++,mpi_ii++) 2166 rpp_T[loc_recv[mpi_i]][l] = trade[mpi_ii]; 2167 for(l=0;l<k1;l++,mpi_ii++) 2168 rpp_A[loc_recv[mpi_i]][l] += trade[mpi_ii]; 2169 } 2170 myfree1(trade); 2171 #endif 2172 #if defined(_HOV_) 2173 trade = myalloc1((k+pk1)*count); 2174 MPI_Recv( trade , (k+pk1)*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 2175 mpi_ii=0; 2176 for(mpi_i=0; mpi_i< count ;mpi_i++){ 2177 for(l=0;l<k;l++,mpi_ii++) 2178 rpp_T[loc_recv[mpi_i]][l] = trade[mpi_ii]; 2179 for(l=0;l<pk1;l++,mpi_ii++) 2180 rpp_A[loc_recv[mpi_i]][l] += trade[mpi_ii]; 2181 } 2182 myfree1(trade); 2183 #endif 2184 free(loc_recv); 2185 break; 2186 case receive_data: // MPI-Send 2152 2187 tag = get_locint_r(); // tag 2153 2188 target = get_locint_r(); // dest 2154 2189 count = get_locint_r(); // count 2155 loc_recv = ( int*) malloc(count*sizeof(int));2190 loc_recv = (locint*) malloc(count*sizeof(locint)); 2156 2191 for(mpi_i=0;mpi_i<count;mpi_i++) 2157 2192 loc_recv[mpi_i] = get_locint_r(); // first Buffer 2158 2193 count2 = get_locint_r(); // count 2159 2194 2160 trade = myalloc1(k*count); 2161 /* writing Taylor- and Adjointbuffer in one double array */ 2162 for (mpi_i=0; mpi_i< count ;mpi_i++){ 2163 FOR_0_LE_l_LT_pk 2164 trade[mpi_i*k + l] = rpp_T[loc_recv[mpi_i]][l]; 2195 #if defined(_HOS_) 2196 trade = myalloc1((k+k1)*count); 2197 mpi_ii=0; 2198 for(mpi_i=0; mpi_i< count ;mpi_i++){ 2199 for(l=0;l<k;l++,mpi_ii++) 2200 trade[mpi_ii] = rpp_T[loc_recv[mpi_i]][l]; 2201 for(l=0;l<k1;l++,mpi_ii++){ 2202 trade[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2203 rpp_A[loc_recv[mpi_i]][l] = 0.; 2204 } 2165 2205 GET_TAYL(loc_recv[mpi_i],k,p); 2166 2206 } 2167 2168 MPI_Send(trade,k*count,MPI_DOUBLE,target,tag,MPI_COMM_WORLD); 2207 MPI_Send( trade , (k+k1)*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 2169 2208 myfree1(trade); 2170 2171 #if defined(_HOS_)2172 count2 = k1;2173 2209 #endif 2174 2210 #if defined(_HOV_) 2175 count2 = pk1;2176 #endif 2177 trade = myalloc1(count2*count);2178 for (mpi_i=0; mpi_i< count ;mpi_i++)2179 for (l=0; l< count2 ;l++){2180 trade[mpi_i*count2 + l] = rpp_A[loc_recv[mpi_i]][l];2181 rpp_A[mpi_i][l] = 0;2211 trade = myalloc1((k+pk1)*count); 2212 for(mpi_i=0; mpi_i< count ;mpi_i++){ 2213 for(l=0;l<k;l++,mpi_ii++) 2214 trade[mpi_ii] = rpp_T[loc_recv[mpi_i]][l]; 2215 for(l=0;l<pk1;l++,mpi_ii++){ 2216 trade[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2217 rpp_A[loc_recv[mpi_i]][l] = 0.; 2182 2218 } 2183 /* loading saved Values of Adjoint- and Taylorbuffer */ 2184 MPI_Send(trade,count2*count,MPI_DOUBLE,target,tag,MPI_COMM_WORLD); 2219 GET_TAYL(loc_recv[mpi_i],k,p); 2220 } 2221 MPI_Send( trade , (k+pk1)*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 2185 2222 myfree1(trade); 2186 2223 #endif 2187 2224 free(loc_recv); 2188 2225 break; 2189 2190 /*--------------------------------------------------------------------------*/2191 case send_data: // MPI-Send-Befehl2192 tag = get_locint_r(); // tag2193 target = get_locint_r(); // source2194 count = get_locint_r(); // count2195 loc_recv = (int*) malloc(count*sizeof(int));2196 for(mpi_i=0;mpi_i<count;mpi_i++)2197 loc_recv[mpi_i] = get_locint_r();2198 count2 = get_locint_r();2199 2200 trade = myalloc1(k*count);2201 /* writing Taylor- and Adjointbuffer in one double array */2202 MPI_Recv( trade , k*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI);2203 2204 for (mpi_i=0; mpi_i< count ;mpi_i++)2205 FOR_0_LE_l_LT_pk2206 rpp_T[loc_recv[mpi_i]][l] = trade[mpi_i*k + l];2207 2208 myfree1(trade);2209 #if defined(_HOS_)2210 count2 = k1;2211 #endif2212 #if defined(_HOV_)2213 count2 = pk1;2214 #endif2215 trade = myalloc1(count2*count);2216 2217 MPI_Recv( trade , count2*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI);2218 l=0;2219 for (mpi_i=0; mpi_i<count; mpi_i++)2220 for (i=0; i < count2 ; i++){2221 rpp_A[loc_recv[mpi_i]][i] += trade[l];2222 l++;2223 }2224 myfree1(trade);2225 2226 free(loc_recv);2227 break;2228 2229 2226 /*--------------------------------------------------------------------------*/ 2230 2227 case barrier_op: … … 2235 2232 root = get_locint_r(); // root 2236 2233 count = get_locint_r(); // count 2237 loc_recv = ( int*) malloc(count*sizeof(int));2234 loc_recv = (locint*) malloc(count*sizeof(locint)); 2238 2235 for(mpi_i=0;mpi_i<count;mpi_i++) 2239 loc_recv[ count-1-mpi_i] = get_locint_r();2236 loc_recv[mpi_i] = get_locint_r(); 2240 2237 count2 = get_locint_r(); 2241 2242 trade = myalloc1(count*k); 2243 if (myid == root) { 2244 rec_buf = myalloc1(count*k*process_count); 2245 }else{ 2246 rec_buf = NULL; 2247 } 2248 for (mpi_i=0; mpi_i< count; mpi_i++){ 2249 for( l=0; l < k ; l++) 2250 trade[mpi_i*k +l] = rpp_T[loc_recv[mpi_i]][l]; 2251 } 2252 /* loading saved Values of Adjoint- and Taylorbuffer */ 2253 for(mpi_i=0; mpi_i<count; mpi_i++){ 2238 #if defined(_HOS_) 2239 trade = myalloc1((k+k1)*count); 2240 rec_buf = NULL; 2241 mpi_ii=0; 2242 for(mpi_i=0; mpi_i< count ;mpi_i++){ 2243 for(l=0;l<k;l++,mpi_ii++) 2244 trade[mpi_ii] = rpp_T[loc_recv[mpi_i]][l]; 2245 for(l=0;l<k1;l++,mpi_ii++){ 2246 trade[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2247 rpp_A[loc_recv[mpi_i]][l] = 0.; 2248 } 2254 2249 GET_TAYL(loc_recv[mpi_i],k,p); 2255 2250 } 2256 MPI_Gather(trade,k*count, MPI_DOUBLE, rec_buf, k*count, MPI_DOUBLE, root, MPI_COMM_WORLD);2257 myfree1(trade);2258 if (myid == root){2259 i = 0;2260 for(count2=0; count2 < process_count; count2++ )2261 for(mpi_i=0 ; mpi_i< count ;mpi_i++){2262 for( l=0; l < k ; l++){2263 rpp_T[loc_recv[mpi_i]][l] = rec_buf[i];2264 i++;2265 }2266 }2267 myfree1(rec_buf);2268 }2269 #if defined(_HOS_)2270 count2 = k1;2271 #endif2272 #if defined(_HOV_)2273 count2 = pk1;2274 #endif2275 trade = myalloc1(count*count2);2276 2277 for (mpi_i=0 ; mpi_i< count ;mpi_i++)2278 for( l=0; l < count2 ; l++){2279 trade[mpi_i*count2 +l] = rpp_A[loc_recv[mpi_i]][l];2280 rpp_A[loc_recv[mpi_i]][l] = 0;2281 }2282 2251 if (myid == root) 2283 rec_buf = myalloc1(count*count2); 2284 else rec_buf = NULL; 2285 2286 MPI_Reduce( trade , rec_buf ,count2*count, MPI_DOUBLE , MPI_SUM , root, MPI_COMM_WORLD); 2287 myfree1(trade); 2288 if (myid == root){ 2289 for (mpi_i=0; mpi_i< count ;mpi_i++){ 2290 for( l=0; l < count2 ; l++) 2291 rpp_A[loc_recv[mpi_i]][l] += rec_buf[mpi_i*count2 + l]; 2292 } 2293 myfree1(rec_buf); 2294 } 2295 free(loc_recv); 2296 break; 2297 /*--------------------------------------------------------------------------*/ 2298 2299 case reduce: 2300 use_reduce=1; 2301 case gather: 2302 if(use_reduce==1) mpi_op=get_locint_r(); 2303 if(all_root == mpi_id){ 2304 count2 = get_locint_r(); // count*process_count 2305 loc_recv = (int*) malloc (count2*sizeof(int)); 2306 if (use_reduce==1){ 2307 if (mpi_op == ADOLC_MPI_SUM){ 2308 for(mpi_i=0;mpi_i<count2;mpi_i++) 2309 loc_recv[mpi_i] = get_locint_r(); // Send Buffer 2310 } else { 2311 for(mpi_i=0;mpi_i<count2;mpi_i++) 2312 loc_recv[count2-1-mpi_i] = get_locint_r(); // Send Buffer 2252 rec_buf = myalloc1(count*process_count*(k+k1)); 2253 MPI_Gather( trade , (k+k1)*count, MPI_DOUBLE, rec_buf,(k+k1)*count, MPI_DOUBLE, root, MPI_COMM_WORLD); 2254 free(trade); 2255 if(myid == root){ 2256 mpi_ii=0; 2257 for(arg=0; arg< process_count ;arg++){ 2258 for(mpi_i=0; mpi_i< count; mpi_i++){ 2259 for(l=0;l<k;l++,mpi_ii++) 2260 rpp_T[loc_recv[mpi_i]][l] = trade[mpi_ii]; 2261 for(l=0;l<k1;l++,mpi_ii++) 2262 rpp_A[loc_recv[mpi_i]][l] += trade[mpi_ii]; 2313 2263 } 2314 2264 } 2315 } 2316 res = get_locint_r(); // count*process_count 2265 free(rec_buf); 2266 } 2267 #endif 2268 #if defined(_HOV_) 2269 trade = myalloc1((k+pk1)*count); 2270 rec_buf = NULL; 2271 mpi_ii=0; 2272 for(mpi_i=0; mpi_i< count ;mpi_i++){ 2273 for(l=0;l<k;l++,mpi_ii++) 2274 trade[mpi_ii] = rpp_T[loc_recv[mpi_i]][l]; 2275 for(l=0;l<pk1;l++,mpi_ii++){ 2276 trade[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2277 rpp_A[loc_recv[mpi_i]][l] = 0.; 2278 } 2279 GET_TAYL(loc_recv[mpi_i],k,p); 2280 } 2281 if (myid == root) 2282 rec_buf = myalloc1(count*process_count*(k+pk1)); 2283 MPI_Gather( trade , (k+pk1)*count, MPI_DOUBLE, rec_buf,(k+pk1)*count, MPI_DOUBLE, root, MPI_COMM_WORLD); 2284 free(trade); 2285 if(myid == root){ 2286 mpi_ii=0; 2287 for(arg=0; arg< process_count ;arg++){ 2288 for(mpi_i=0; mpi_i< count; mpi_i++){ 2289 for(l=0;l<k;l++,mpi_ii++) 2290 rpp_T[loc_recv[mpi_i]][l] = trade[mpi_ii]; 2291 for(l=0;l<pk1;l++,mpi_ii++) 2292 rpp_A[loc_recv[mpi_i]][l] += trade[mpi_ii]; 2293 } 2294 } 2295 free(rec_buf); 2296 } 2297 #endif 2298 free(loc_recv); 2299 break; 2300 /*--------------------------------------------------------------------------*/ 2301 2302 case reduce: 2303 use_reduce=0; 2304 mpi_op=get_locint_r(); 2305 if(mpi_op == ADOLC_MPI_SUM) use_reduce=1; 2306 case gather: 2317 2307 myid = get_locint_r(); // process id 2318 2308 root = get_locint_r(); // root 2309 count2 = get_locint_r(); // count*process_count 2310 if (root == myid){ 2311 loc_recv = (locint*) malloc (count2*sizeof(locint)); 2312 for(mpi_i=0;mpi_i<count2;mpi_i++) 2313 loc_recv[count2-1-mpi_i] = get_locint_r(); // Receive Buffer 2314 } 2315 arg = get_locint_r(); // count*process_count 2316 arg = get_locint_r(); // process id 2317 arg = get_locint_r(); // root 2319 2318 count = get_locint_r(); // count 2320 loc_send = (int*) malloc(count*sizeof(int)); 2321 /* Must use an additional value to send the right locints back */ 2322 if (use_reduce==1){ 2323 if (mpi_op == ADOLC_MPI_SUM){ 2324 for(mpi_i=0;mpi_i<count;mpi_i++) 2325 loc_send[mpi_i] = get_locint_r(); // Send Buffer 2326 } else { 2327 for(mpi_i=0;mpi_i<count;mpi_i++) 2328 loc_send[count-1-mpi_i] = get_locint_r(); // Send Buffer 2329 } 2330 } 2319 loc_send = (locint*) calloc(count,sizeof(locint)); 2320 for(mpi_i=0;mpi_i<count;mpi_i++) 2321 loc_send[count-1-mpi_i] = get_locint_r(); // Send Buffer 2331 2322 arg = get_locint_r(); // count 2332 trade = myalloc1(count*k); 2323 #if defined(_HOS_) 2333 2324 rec_buf = NULL; 2334 2325 if(myid == root ){ 2335 rec_buf = myalloc1(count2* k);2336 i=0;2326 rec_buf = myalloc1(count2*(k+k1)); 2327 mpi_ii=0; 2337 2328 for (mpi_i=0 ; mpi_i< count2 ;mpi_i++ ){ 2338 for( l=0; l < k ; l++){ 2339 rec_buf[i] = rpp_T[loc_recv[mpi_i]][l]; 2340 i++; 2329 for( l=0; l < k ; l++,mpi_ii++) 2330 rec_buf[mpi_ii] = rpp_T[loc_recv[mpi_i]][l]; 2331 for( l=0; l < k1 ; l++,mpi_ii++){ 2332 rec_buf[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2333 rpp_A[loc_recv[mpi_i]][l] = 0.; 2341 2334 } 2342 2335 GET_TAYL(loc_recv[mpi_i],k,p); 2343 2336 } 2344 2337 } 2345 MPI_Scatter(rec_buf,k*count, MPI_DOUBLE , trade, count*k,MPI_DOUBLE, root, MPI_COMM_WORLD);2346 i=0;2347 for (mpi_i=0; mpi_i< count ;mpi_i++)2348 for( l=0; l < k ; l++,i++)2349 rpp_T[loc_send[mpi_i]][l] = trade[i];2350 if(myid==root) myfree1(rec_buf);2351 myfree1(trade);2352 #if defined(_HOS_) 2353 arg = k1;2338 trade = myalloc1((k+k1)*count); 2339 MPI_Scatter(rec_buf,(k+k1)*count, MPI_DOUBLE , trade, count*(k+k1),MPI_DOUBLE, root, MPI_COMM_WORLD); 2340 mpi_ii=0; 2341 for(mpi_i=0; mpi_i< count ;mpi_i++){ 2342 for(l=0;l<k;l++,mpi_ii++) 2343 rpp_T[loc_send[mpi_i]][l] = trade[mpi_ii]; 2344 for(l=0;l<k1;l++,mpi_ii++) 2345 rpp_A[loc_send[mpi_i]][l] += trade[mpi_ii]; 2346 } 2354 2347 #endif 2355 2348 #if defined(_HOV_) 2356 arg = pk1;2357 #endif2358 trade = myalloc1(count*arg);2359 2349 rec_buf = NULL; 2360 2350 if(myid == root ){ 2361 rec_buf = myalloc1(count2* arg);2362 i=0;2351 rec_buf = myalloc1(count2*(k+pk1)); 2352 mpi_ii=0; 2363 2353 for (mpi_i=0 ; mpi_i< count2 ;mpi_i++ ){ 2364 for( l=0; l < arg ; l++){ 2365 rec_buf[i] = rpp_A[loc_recv[mpi_i]][l]; 2366 rpp_A[loc_recv[mpi_i]][l] = 0; 2367 i++; 2354 for( l=0; l < k ; l++,mpi_ii++) 2355 rec_buf[mpi_ii] = rpp_T[loc_recv[mpi_i]][l]; 2356 for( l=0; l < pk1 ; l++,mpi_ii++){ 2357 rec_buf[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2358 rpp_A[loc_recv[mpi_i]][l] = 0.; 2368 2359 } 2360 GET_TAYL(loc_recv[mpi_i],k,p); 2369 2361 } 2370 2362 } 2371 MPI_Scatter(rec_buf,arg*count, MPI_DOUBLE , trade, count*arg,MPI_DOUBLE, root, MPI_COMM_WORLD); 2372 i=0; 2373 for (mpi_i=0; mpi_i< count ;mpi_i++) 2374 for( l=0; l < arg ; l++,i++) 2375 rpp_A[loc_send[mpi_i]][l] += trade[i]; 2376 2377 if(myid==root) myfree1(rec_buf); 2378 myfree1(trade); 2379 2363 trade = myalloc1((k+pk1)*count); 2364 MPI_Scatter(rec_buf,(k+pk1)*count, MPI_DOUBLE , trade, count*(k+pk1),MPI_DOUBLE, root, MPI_COMM_WORLD); 2365 mpi_ii=0; 2366 for(mpi_i=0; mpi_i< count ;mpi_i++){ 2367 for(l=0;l<k;l++,mpi_ii++) 2368 rpp_T[loc_send[mpi_i]][l] = trade[mpi_ii]; 2369 for(l=0;l<pk1;l++,mpi_ii++) 2370 rpp_A[loc_send[mpi_i]][l] += trade[mpi_ii]; 2371 } 2372 #endif 2380 2373 use_reduce=0; 2381 2374 if(myid==root) free(loc_recv); … … 2384 2377 /*--------------------------------------------------------------------------*/ 2385 2378 case scatter: 2386 2387 loc_recv = (int*) malloc(count2*sizeof(int));2388 for(mpi_i=0; mpi_i < count2 ;mpi_i++)2389 2390 res= get_locint_r(); // recvcount (count)2391 2392 2393 if(myid==root){2394 count = get_locint_r(); // sendcount (count*process_count)2395 loc_send = (int*) malloc(count*sizeof(int));2396 2397 loc_send[count-1-mpi_i] 2398 2399 2400 2401 2402 2403 rec_buf = (double*) myalloc1(count2*k);2379 count2 = get_locint_r(); // recvcount (count) 2380 loc_recv = (locint*) malloc(count2*sizeof(locint)); 2381 for(mpi_i=0;mpi_i<count2;mpi_i++) 2382 loc_recv[count2-1-mpi_i] = get_locint_r(); // recv Buffer 2383 arg = get_locint_r(); // recvcount (count) 2384 myid = get_locint_r(); // process id 2385 root = get_locint_r(); // root 2386 count = get_locint_r(); // sendcount (count*process_count) 2387 if(myid==root){ 2388 loc_send = (locint*) malloc(count*sizeof(locint)); 2389 for(mpi_i=0;mpi_i<count;mpi_i++) 2390 loc_send[count-1-mpi_i]= get_locint_r(); 2391 } 2392 res = get_locint_r(); // id 2393 res = get_locint_r(); // root 2394 res = get_locint_r(); // sendcount (count*process_count) 2395 #if defined(_HOS_) 2396 rec_buf = myalloc1(count2*(k+k1)); 2404 2397 trade = NULL; 2405 if(myid == root) 2406 trade = myalloc1(count*k); 2407 2408 i=0; 2409 for (mpi_i=0 ; mpi_i< count2 ;mpi_i++ ){ 2410 for( l=0; l < k ; l++,i++) 2411 rec_buf[i] = rpp_T[loc_recv[mpi_i]][l]; 2398 rec_buf = myalloc1(count2*(k+k1)); 2399 mpi_ii=0; 2400 for(mpi_i=0 ; mpi_i< count2 ;mpi_i++ ){ 2401 for( l=0; l < k ; l++,mpi_ii++) 2402 rec_buf[mpi_ii] = rpp_T[loc_recv[mpi_i]][l]; 2403 for( l=0; l < k1 ; l++,mpi_ii++){ 2404 rec_buf[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2405 rpp_A[loc_recv[mpi_i]][l] = 0.; 2406 } 2412 2407 GET_TAYL(loc_recv[mpi_i],k,p); 2413 2408 } 2414 2415 MPI_Gather(rec_buf,k*count2, MPI_DOUBLE, trade, k*count2, MPI_DOUBLE, root, MPI_COMM_WORLD); 2416 2409 if (myid == root) 2410 trade = myalloc1(count*(k+k1)); 2411 MPI_Gather(rec_buf,count2*(k+k1), MPI_DOUBLE, trade,count2*(k+k1), MPI_DOUBLE, root, MPI_COMM_WORLD); 2412 free(rec_buf); 2417 2413 if (myid == root ){ 2418 i=0; 2419 for (mpi_i=0; mpi_i< count ;mpi_i++){ 2420 for( l=0; l < k ; l++,i++) 2421 rpp_T[loc_send[mpi_i]][l] = trade[i]; 2414 mpi_ii=0; 2415 for( mpi_i=0; mpi_i< count ;mpi_i++){ 2416 for( l=0;l < k ; l++,mpi_ii++) 2417 rpp_T[loc_send[mpi_i]][l] = trade[mpi_ii]; 2418 for( l=0;l < k1; l++,mpi_ii++) 2419 rpp_A[loc_send[mpi_i]][l] += trade[mpi_ii]; 2422 2420 } 2423 2421 myfree1(trade); 2424 2422 } 2425 myfree1(rec_buf);2426 #if defined(_HOS_)2427 arg = k1;2428 2423 #endif 2429 2424 #if defined(_HOV_) 2430 arg = pk1;2431 #endif2432 rec_buf = (double*) myalloc1(count2*arg);2433 2425 trade = NULL; 2434 if(myid == root ) 2435 trade = myalloc1(count*arg); 2436 2437 i=0; 2438 for (mpi_i=0 ; mpi_i< count2 ;mpi_i++ ){ 2439 for( l=0; l < arg ; l++,i++){ 2440 rec_buf[i] = rpp_A[loc_recv[mpi_i]][l]; 2441 rpp_A[loc_recv[mpi_i]][l] = 0; 2442 } 2443 } 2444 2445 MPI_Gather(rec_buf,arg*count2, MPI_DOUBLE, trade,arg*count2, MPI_DOUBLE, root, MPI_COMM_WORLD); 2446 2426 rec_buf = myalloc1(count2*(k+pk1)); 2427 mpi_ii=0; 2428 for(mpi_i=0 ; mpi_i< count2 ;mpi_i++ ){ 2429 for( l=0; l < k ; l++,mpi_ii++) 2430 rec_buf[mpi_ii] = rpp_T[loc_recv[mpi_i]][l]; 2431 for( l=0; l < pk1 ; l++,mpi_ii++){ 2432 rec_buf[mpi_ii] = rpp_A[loc_recv[mpi_i]][l]; 2433 rpp_A[loc_recv[mpi_i]][l] = 0.; 2434 } 2435 GET_TAYL(loc_recv[mpi_i],k,p); 2436 } 2437 if (myid == root) 2438 trade = myalloc1(count*(k+pk1)); 2439 MPI_Gather(rec_buf,count2*(k+pk1), MPI_DOUBLE, trade,count2*(k+pk1), MPI_DOUBLE, root, MPI_COMM_WORLD); 2440 free(rec_buf); 2447 2441 if (myid == root ){ 2448 i=0; 2449 for (mpi_i=0; mpi_i< count ;mpi_i++){ 2450 for( l=0; l < arg ; l++,i++) 2451 rpp_A[loc_send[mpi_i]][l] += trade[i]; 2452 2442 mpi_ii=0; 2443 for( mpi_i=0; mpi_i< count ;mpi_i++){ 2444 for( l=0;l < k ; l++,mpi_ii++) 2445 rpp_T[loc_send[mpi_i]][l] = trade[mpi_ii]; 2446 for( l=0;l < pk1; l++,mpi_ii++) 2447 rpp_A[loc_send[mpi_i]][l] += trade[mpi_ii]; 2453 2448 } 2454 myfree1(trade); 2455 } 2456 myfree1(rec_buf); 2457 2449 free(trade); 2450 } 2451 #endif 2458 2452 if( myid == root) free(loc_send); 2459 2453 free(loc_recv); -
branches/MPI/ADOL-C/src/uni5_for.c
r273 r274 1040 1040 double *trade, *rec_buf, *mpi_tmp; 1041 1041 MPI_Status status_MPI; 1042 int mpi_i,mpi_ii, *loc_send, *loc_recv, s_r_c=1,use_reduce=0; 1042 int mpi_i,mpi_ii, s_r_c=1,use_reduce=0; 1043 locint *loc_send, *loc_recv; 1043 1044 ADOLC_MPI_Op mpi_op; 1044 int myid,root, count, id=mpi_id,count2, target,tag;1045 int myid,root, count, count2, target,tag; 1045 1046 #if defined(_NONLIND_) 1046 1047 locint *tmp_element; 1047 1048 #endif 1048 #if (defined(_INDO_) | defined(_INT_FOR_))1049 #if defined(_INDO_) 1049 1050 int *trade_loc, *rec_buf_loc; 1050 1051 int *counts, *tmp_counts; 1051 1052 int anz; 1053 #endif 1054 #if defined(_INT_FOR_) 1055 locint *trade_loc, *rec_buf_loc; 1052 1056 #endif 1053 1057 #endif … … 3805 3809 case send_data: // MPI-Send-Befehl 3806 3810 count = get_locint_f(); // first Buffer 3807 loc_send = ( int*) malloc(count*sizeof(int));3811 loc_send = (locint*) malloc(count*sizeof(locint)); 3808 3812 for(mpi_i=0; mpi_i < count; mpi_i++) 3809 3813 loc_send[mpi_i] = get_locint_f(); 3810 count2= get_locint_f();3814 res = get_locint_f(); 3811 3815 target = get_locint_f(); // dest 3812 3816 tag = get_locint_f(); // tag 3813 #if !defined(_NTIGHT_) 3814 // sending dp_t0 3815 trade = (double*)myalloc1( count );3817 3818 #if defined(_ZOS_) 3819 trade = myalloc1( count ); 3816 3820 for (mpi_i=0; mpi_i< count; mpi_i++) 3817 3821 trade[mpi_i] = dp_T0[ loc_send[mpi_i]]; 3818 3822 MPI_Send( trade , count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3819 3823 free(trade); 3820 #endif /* END NOT _NTIGHT_ */ 3821 #if defined(_INT_FOR_) 3822 trade_loc = (int*) myalloc1(arg1*p); 3823 for (mpi_i=0; mpi_i< arg1; mpi_i++) { 3824 FOR_0_LE_l_LT_pk 3825 trade_loc[mpi_i*p+l]=up_T[loc_send[mpi_i]][l]; 3826 } 3827 MPI_Send( trade_loc , count*p, MPI_INT , target, tag , MPI_COMM_WORLD); 3828 free(trade_loc); 3829 #endif /* END INT_FOR */ 3824 #endif /* _ZOS_ */ 3830 3825 #if defined(_FOS_) 3831 trade = (double*) myalloc1(arg1); 3832 for (mpi_i=0; mpi_i< arg1; mpi_i++) { 3833 trade[mpi_i]=dp_T[loc_send[mpi_i]]; 3834 } 3835 MPI_Send( trade , count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3826 trade = myalloc1(count*2); 3827 for (mpi_i=0; mpi_i< count; mpi_i++) { 3828 trade[2*mpi_i ]= dp_T0[loc_send[mpi_i]]; 3829 trade[2*mpi_i+1]= dp_T[loc_send[mpi_i]]; 3830 } 3831 MPI_Send( trade , count*2, MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3836 3832 free(trade); 3837 3833 #endif /* END FOS */ 3838 3834 #if defined(_FOV_) 3839 trade = (double*) myalloc1(p*count); 3835 trade = myalloc1((p+1)*count); 3836 mpi_ii=0; 3840 3837 for (mpi_i=0; mpi_i< count; mpi_i++) { 3841 for(i=0;i<p;i++) 3842 trade[p*mpi_i+i] = dpp_T[loc_send[mpi_i]][i]; 3843 } 3844 MPI_Send( trade , arg1*p, MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3838 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 3839 mpi_ii++; 3840 for(i=0;i<p;i++,mpi_ii++) 3841 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][i]; 3842 } 3843 MPI_Send( trade , count*(p+1), MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3845 3844 free(trade); 3846 3845 #endif /* END FOV */ 3847 3846 #if defined(_HOS_) 3848 trade = (double*) myalloc1(count * k); 3849 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */ 3850 for (mpi_i=0; mpi_i< count; mpi_i++) 3851 for (i=0; i<k; i++) 3852 trade[k*mpi_i + i] = dpp_T[loc_send[mpi_i]][i]; 3853 MPI_Send( trade , count*k, MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3847 trade = myalloc1(count*(k+1)); 3848 mpi_ii=0; 3849 for (mpi_i=0; mpi_i< count; mpi_i++){ 3850 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 3851 mpi_ii++; 3852 for (i=0; i<k; i++,mpi_ii++) 3853 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][i]; 3854 } 3855 MPI_Send( trade , count*(k+1), MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3854 3856 free(trade); 3855 3857 #endif /* END HOS */ 3856 3858 #if defined(_HOV_) 3857 trade = (double*) myalloc1(count*p*k); 3858 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */ 3859 for (mpi_i=0; mpi_i< count; mpi_i++) 3860 for (i=0; i<p*k; i++) 3861 trade[p*k*mpi_i + i] = dpp_T[loc_send[mpi_i]][i]; 3862 MPI_Send( trade , count*p*k, MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3859 trade = myalloc1(count*(p*k+1)); 3860 mpi_ii=0; 3861 for (mpi_i=0; mpi_i< count; mpi_i++){ 3862 trade[mpi_ii] = dp_T0[loc_send[mpi_ii]]; 3863 mpi_ii++; 3864 for (i=0; i<p*k; i++,mpi_ii) 3865 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][i]; 3866 } 3867 MPI_Send( trade , count*(p*k+1), MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3863 3868 free(trade); 3864 3869 #endif /* END HOV */ 3870 3871 #if defined(_TIGHT_) /* with dp_T0 */ 3872 #if defined(_INT_FOR_) 3873 trade = myalloc1(count*(p+1)); 3874 mpi_ii=0; 3875 for(mpi_i=0; mpi_i< count; mpi_i++) { 3876 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 3877 mpi_ii++; 3878 for(l=0;l<p;l++,mpi_ii++) 3879 trade[mpi_ii]= (double) up_T[loc_send[mpi_i]][l]; 3880 } 3881 MPI_Send( trade_loc , count*(p+1), MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3882 free(trade_loc); 3883 #endif /* END INT_FOR */ 3865 3884 #if defined(_INDO_) 3866 // getting information about count of entries 3867 counts = (int*) malloc( count*sizeof(int) ); 3885 trade = myalloc1(count*2); 3868 3886 anz=0; 3869 3887 for (mpi_i=0; mpi_i< count; mpi_i++){ 3870 counts[mpi_i] = (int) ind_dom[loc_send[mpi_i]][0]; 3871 anz += counts[mpi_i]; 3872 } 3873 MPI_Send( counts , count , MPI_INT , target, tag , MPI_COMM_WORLD); 3888 trade[2*mpi_i ] = dp_T0[loc_send[mpi_i]]; 3889 trade[2*mpi_i+1]= (double) ind_dom[loc_send[mpi_i]][0]; 3890 anz += ind_dom[loc_send[mpi_i]][0]; 3891 } 3892 MPI_Send( trade , count*2 , MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3874 3893 3875 3894 // sending index domains … … 3886 3905 free(trade_loc); 3887 3906 } 3888 free(counts); 3907 free(trade); 3908 #endif 3909 #if defined(_NONLIND_) 3910 trade = myalloc1(s_r_indep*2); 3911 anz=0; 3912 for (mpi_i=0; mpi_i< s_r_indep; mpi_i++){ 3913 trade[2*mpi_i ] = dp_T0[loc_send[mpi_i]]; 3914 trade[2*mpi_i+1]= (double) nonl_dom[loc_send[mpi_i]][0]; 3915 anz += nonl_dom[loc_send[mpi_i]][0]; 3916 } 3917 MPI_Send( trade , s_r_indep*2 , MPI_DOUBLE , target, tag , MPI_COMM_WORLD); 3918 // sending index domains 3919 if (anz >0 ){ 3920 trade_loc = (int*) malloc(anz*sizeof(int)); 3921 l =0; 3922 for (mpi_i=0; mpi_i < s_r_indep ; mpi_i++ ) 3923 for (i=2; i < nonl_dom[mpi_i][0]+2 ; i++ ){ 3924 trade_loc[l] = nonl_dom[mpi_i][i]; 3925 l++; 3926 } 3927 MPI_Send( trade_loc , anz , MPI_INT , target, tag , MPI_COMM_WORLD); 3928 free( trade_loc); 3929 } 3930 free( trade); 3931 #endif // end _NONLIND_ 3932 #endif /* END OF TIGHT */ 3933 #if defined(_NTIGHT_) /* without dp_T0 */ 3934 #if defined(_INT_FOR_) 3935 trade_loc = (locint*) malloc(count*p*sizeof(locint)); 3936 mpi_ii=0; 3937 for(mpi_i=0; mpi_i< count; mpi_i++) { 3938 for(l=0;l<p;l++,mpi_ii++) 3939 trade[mpi_ii]= up_T[loc_send[mpi_i]][l]; 3940 } 3941 MPI_Send( trade_loc , count*p, MPI_UNSIGNED_LONG , target, tag , MPI_COMM_WORLD); 3942 free(trade_loc); 3943 #endif /* END INT_FOR */ 3944 #if defined(_INDO_) 3945 counts = (int*) malloc(count*sizeof(int)); 3946 anz=0; 3947 for (mpi_i=0; mpi_i< count; mpi_i++){ 3948 trade[mpi_i]= ind_dom[loc_send[mpi_i]][0]; 3949 anz += ind_dom[loc_send[mpi_i]][0]; 3950 } 3951 MPI_Send( counts , count , MPI_INT , target, tag , MPI_COMM_WORLD); 3952 3953 // sending index domains 3954 if (anz > 0 ){ 3955 trade_loc = (int*) malloc( anz*sizeof(int) ); 3956 l =0; 3957 for (mpi_i=0; mpi_i< count; mpi_i++){ 3958 for(i=2;i < ind_dom[loc_send[mpi_i]][0]+2 ;i++ ){ 3959 trade_loc[l] = ind_dom[loc_send[mpi_i]][i]; 3960 l++; 3961 } 3962 } 3963 MPI_Send( trade_loc , anz , MPI_INT , target, tag , MPI_COMM_WORLD); 3964 free(trade_loc); 3965 } 3966 free(trade); 3889 3967 #endif 3890 3968 #if defined(_NONLIND_) 3891 3969 counts = (int*) malloc(s_r_indep*sizeof(int)); 3892 3970 anz=0; 3893 // Send information about counts of ind_dom and nonl_dom3894 3971 for (mpi_i=0; mpi_i< s_r_indep; mpi_i++){ 3895 counts[mpi_i] = (int) nonl_dom[mpi_i][0]; 3896 anz += counts[mpi_i]; 3897 } 3898 3972 counts[mpi_i]= nonl_dom[loc_send[mpi_i]][0]; 3973 anz += nonl_dom[loc_send[mpi_i]][0]; 3974 } 3899 3975 MPI_Send( counts , s_r_indep , MPI_INT , target, tag , MPI_COMM_WORLD); 3900 3901 3976 // sending index domains 3902 3977 if (anz >0 ){ … … 3911 3986 free( trade_loc); 3912 3987 } 3913 free( 3988 free(counts); 3914 3989 #endif // end _NONLIND_ 3990 #endif /* END OF TIGHT */ 3915 3991 free(loc_send); 3916 3992 break; 3917 3993 /*--------------------------------------------------------------------------*/ 3918 3994 case receive_data: // MPI-Receive 3919 count =get_locint_f(); // Location3920 loc_recv = ( int*) malloc(count*sizeof(int));3995 count =get_locint_f(); // count 3996 loc_recv = (locint*) malloc(count*sizeof(locint)); 3921 3997 for(mpi_i=0; mpi_i<count;mpi_i++) 3922 3998 loc_recv[mpi_i] = get_locint_f(); 3923 count2 =get_locint_f(); // Location3999 res =get_locint_f(); // count 3924 4000 target = get_locint_f(); // source 3925 4001 tag = get_locint_f(); // tag 3926 #if !defined(_NTIGHT_)3927 // receiving values for dp_T03928 trade = (double*)myalloc1( count );3929 MPI_Recv( trade , count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI);3930 for(mpi_i =0; mpi_i < count; mpi_i++){4002 #if defined(_ZOS_) 4003 // receiving values for dp_T0 4004 trade = myalloc1( count ); 4005 MPI_Recv( trade , count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 4006 for(mpi_i =0; mpi_i < count; mpi_i++){ 3931 4007 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 3932 4008 dp_T0[loc_recv[mpi_i]] = trade[mpi_i]; 3933 } 3934 free(trade); 3935 #endif /* END NOT _NTIGHT_ */ 4009 } 4010 free(trade); 4011 #endif /* END _ZOS_ */ 4012 #if defined(_FOS_) 4013 trade = myalloc1(count*2); 4014 MPI_Recv( trade , count*2, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 4015 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */ 4016 for(mpi_i=0; mpi_i< count; mpi_i++){ 4017 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4018 dp_T0[loc_recv[mpi_i]] = trade[2*mpi_i]; 4019 dp_T[loc_recv[mpi_i]] = trade[2*mpi_i]+1; 4020 } 4021 free(trade); 4022 #endif 4023 #if defined(_FOV_) 4024 trade = myalloc1(count*(p+1)); 4025 MPI_Recv( trade , (p+1)*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 4026 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */ 4027 mpi_ii=0; 4028 for (mpi_i=0; mpi_i< count; mpi_i++) { 4029 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4030 dp_T0[loc_recv[mpi_i]] = trade[mpi_ii]; 4031 mpi_ii++; 4032 for(i=0;i<p;i++,mpi_ii++) 4033 dpp_T[loc_recv[mpi_i]][i] = trade[mpi_ii]; 4034 } 4035 free(trade); 4036 #endif 4037 #if defined(_HOS_) 4038 trade =myalloc1(count*(k+1)); 4039 MPI_Recv( trade , (k+1)*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 4040 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */ 4041 mpi_ii=0; 4042 for (mpi_i=0; mpi_i< count; mpi_i++) { 4043 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4044 dp_T0[loc_recv[mpi_i]] = trade[mpi_i]; 4045 for(i=0; i < k ; i++ ) 4046 dpp_T[loc_recv[mpi_i]][i] = trade[k*mpi_i+i]; 4047 } 4048 myfree1(trade); 4049 #endif 4050 #if defined(_HOV_) 4051 trade =myalloc1(count*(p*k+1)); 4052 MPI_Recv( trade , (p*k+1)*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 4053 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */ 4054 mpi_ii=0; 4055 for (mpi_i=0; mpi_i< count; mpi_i++) { 4056 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4057 dp_T0[loc_recv[mpi_i]] = trade[mpi_ii]; 4058 mpi_ii++; 4059 for(i=0; i < p*k ; i++,mpi_ii++) 4060 dpp_T[loc_recv[mpi_i]][i] = trade[mpi_ii]; 4061 } 4062 free(trade); 4063 #endif 4064 #if defined(_TIGHT_) 3936 4065 #if defined(_INT_FOR_) 3937 trade_loc = (int*) malloc(count*p*sizeof(int)); 3938 MPI_Recv( trade_loc , count*p, MPI_INT , target, tag , MPI_COMM_WORLD, &status_MPI); 4066 trade = myalloc1(count*(p+1)); 4067 MPI_Recv( trade , count*(p+1), MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 4068 mpi_ii=0; 3939 4069 for (mpi_i=0; mpi_i< count; mpi_i++) { 3940 FOR_0_LE_l_LT_pk 3941 up_T[loc_recv[mpi_i]][l]=trade_loc[mpi_i*p+l]; 4070 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4071 dp_T0[loc_recv[mpi_i]] = trade[mpi_ii]; 4072 mpi_ii++; 4073 for(l=0; l < p;l++,mpi_ii++) 4074 up_T[loc_recv[mpi_i]][l]= (unsigned long int) trade[mpi_ii]; 3942 4075 } 3943 4076 free(trade_loc); 3944 4077 #endif /* END INT_FOR */ 3945 #if defined(_FOS_)3946 trade = (double*) myalloc1(count);3947 MPI_Recv( trade , count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI);3948 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */3949 for (mpi_i=0; mpi_i< count; mpi_i++){3950 dp_T[loc_recv[mpi_i]] = trade[mpi_i];3951 }3952 free(trade);3953 #endif3954 #if defined(_FOV_)3955 trade = (double*) myalloc1(count*p);3956 MPI_Recv( trade , p*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI);3957 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */3958 for (mpi_i=0; mpi_i< count; mpi_i++) {3959 for(i=0;i<p;i++)3960 dpp_T[loc_recv[mpi_i]][i] = trade[p*mpi_i+i];3961 }3962 free(trade);3963 #endif3964 #if defined(_HOS_)3965 trade = (double*) myalloc1(count * k);3966 MPI_Recv( trade , k*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI);3967 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */3968 for (mpi_i=0; mpi_i< count; mpi_i++) {3969 for(i=0; i < k ; i++ )3970 dpp_T[loc_recv[mpi_i]][i] = trade[k*mpi_i+i];3971 }3972 myfree1(trade);3973 #endif3974 #if defined(_HOV_)3975 trade = (double*) myalloc1(count * p*k);3976 MPI_Recv( trade , p*k*count, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI);3977 /* Receiving double Values by MPI and try to save Taylorbuffer before overwriting */3978 for (mpi_i=0; mpi_i< count; mpi_i++) {3979 for(i=0; i < p*k ; i++ )3980 dpp_T[loc_recv[mpi_i]][i] = trade[p*k*mpi_i+i];3981 }3982 free(trade);3983 #endif3984 4078 #if defined(_INDO_) 3985 4079 // getting information about count of entries 3986 4080 counts = ( int*) malloc( count*sizeof(int) ); 3987 MPI_Recv( counts , count, MPI_INT , target, tag , MPI_COMM_WORLD, &status_MPI); 4081 trade = myalloc1(count*2); 4082 MPI_Recv( trade , count*2, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 3988 4083 3989 4084 anz =0; 3990 4085 for (mpi_i=0; mpi_i< count; mpi_i++) { 3991 anz += counts[mpi_i]; 3992 } 4086 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4087 dp_T0[loc_recv[mpi_i]] = trade[2*mpi_i]; 4088 counts[mpi_i] = (int) trade[2*mpi_i+1]; 4089 anz += counts[mpi_i]; 4090 } 4091 free(trade); 3993 4092 if ( anz > 0){ 3994 4093 trade_loc = (int*) malloc( anz*sizeof(int) ); … … 4007 4106 #if defined(_NONLIND_) 4008 4107 counts = ( int*) malloc( s_r_indep*sizeof(int) ); 4009 MPI_Recv( counts , s_r_indep, MPI_INT , target, tag , MPI_COMM_WORLD, &status_MPI); 4108 trade = myalloc1(s_r_indep*2); 4109 MPI_Recv( trade , s_r_indep*2, MPI_DOUBLE , target, tag , MPI_COMM_WORLD, &status_MPI); 4110 4010 4111 anz =0; 4011 for (mpi_i=0; mpi_i< s_r_indep; mpi_i++){ 4012 anz += (int) counts[mpi_i]; 4013 } 4112 for (mpi_i=0; mpi_i< s_r_indep; mpi_i++) { 4113 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4114 dp_T0[loc_recv[mpi_i]] = trade[2*mpi_i]; 4115 counts[mpi_i] = (int) trade[2*mpi_i+1]; 4116 anz += counts[mpi_i]; 4117 } 4118 free(trade); 4014 4119 if (anz > 0) { 4015 4120 trade_loc = (int*) calloc( anz,sizeof(int) ); … … 4028 4133 free( counts); 4029 4134 #endif // end _NONLIND_ 4135 #endif /* END TIGHT */ 4136 #if defined(_NTIGHT_) 4137 #if defined(_INT_FOR_) 4138 trade_loc = (locint*) malloc(count*p*sizeof(locint)); 4139 MPI_Recv( trade_loc , count*p, MPI_INT , target, tag , MPI_COMM_WORLD, &status_MPI); 4140 for (mpi_i=0; mpi_i< count; mpi_i++) { 4141 FOR_0_LE_l_LT_pk 4142 up_T[loc_recv[mpi_i]][l]=trade_loc[mpi_i*p+l]; 4143 } 4144 free(trade_loc); 4145 #endif /* END INT_FOR */ 4146 #if defined(_INDO_) 4147 // getting information about count of entries 4148 counts = ( int*) malloc( count*sizeof(int) ); 4149 MPI_Recv( counts , count, MPI_INT , target, tag , MPI_COMM_WORLD, &status_MPI); 4150 4151 anz =0; 4152 for (mpi_i=0; mpi_i< count; mpi_i++) { 4153 anz += counts[mpi_i]; 4154 } 4155 if ( anz > 0){ 4156 trade_loc = (int*) malloc( anz*sizeof(int) ); 4157 MPI_Recv( trade_loc , anz , MPI_INT , target, tag , MPI_COMM_WORLD, &status_MPI); 4158 4159 // combine each index domain ... 4160 l = 0; 4161 for(mpi_i=0; mpi_i < count; mpi_i++){ 4162 combine_index_domain_received_data(loc_recv[mpi_i], counts[mpi_i], ind_dom, &trade_loc[l] ); 4163 l += counts[mpi_i]; 4164 } 4165 free(trade_loc); 4166 } 4167 free( counts); 4168 #endif 4169 #if defined(_NONLIND_) 4170 counts = ( int*) malloc( s_r_indep*sizeof(int) ); 4171 MPI_Recv( counts , s_r_indep, MPI_INT , target, tag , MPI_COMM_WORLD, &status_MPI); 4172 anz =0; 4173 for (mpi_i=0; mpi_i< s_r_indep; mpi_i++){ 4174 anz += (int) counts[mpi_i]; 4175 } 4176 if (anz > 0) { 4177 trade_loc = (int*) calloc( anz,sizeof(int) ); 4178 MPI_Recv( trade_loc , anz , MPI_INT , target, tag , MPI_COMM_WORLD, &status_MPI); 4179 4180 // combine each index domain ... 4181 l = 0; 4182 for (mpi_i=0; mpi_i < s_r_indep; mpi_i++){ 4183 // nonl_dom settings 4184 extend_nonlinearity_domain_combine_received_trade(mpi_i, counts[mpi_i], nonl_dom, &trade_loc[l] ); 4185 l += counts[mpi_i]; 4186 } 4187 4188 free( trade_loc); 4189 } 4190 free( counts); 4191 #endif // end _NONLIND_ 4192 #endif /* END OF NTIGHT */ 4030 4193 free(loc_recv); 4031 4194 break; … … 4035 4198 case broadcast: 4036 4199 count = get_locint_f(); // count 4037 loc_send = ( int*) malloc(count*sizeof(int));4200 loc_send = (locint*) malloc(count*sizeof(locint)); 4038 4201 for(mpi_i=0;mpi_i<count;mpi_i++) 4039 4202 loc_send[mpi_i] = get_locint_f(); // Send Location … … 4041 4204 root = get_locint_f(); // root 4042 4205 myid = get_locint_f(); // process id 4043 #if !defined(_NTIGHT_) 4206 4207 #if defined(_ZOS_) 4044 4208 // receiving values for dp_T0 4045 trade = (double*)myalloc1( count );4209 trade = myalloc1( count ); 4046 4210 if (myid == root){ 4047 4211 for(mpi_i =0; mpi_i < count ; mpi_i++) … … 4054 4218 } 4055 4219 free(trade); 4056 #endif /* END NOT _NTIGHT_ */ 4057 #if defined(_INT_FOR_) 4058 trade_loc = (int*) myalloc1(count*p); 4059 for (mpi_i=0; mpi_i< count; mpi_i++) { 4060 FOR_0_LE_l_LT_pk 4061 trade_loc[mpi_i*p+l]=up_T[loc_send[mpi_i]][l]; 4062 } 4063 MPI_Bcast(trade_loc,count*p, MPI_INT, root, MPI_COMM_WORLD); 4064 for( mpi_i =0; mpi_i < count; mpi_i++){ 4065 FOR_0_LE_l_LT_pk 4066 up_T[loc_send[mpi_i]][l] = trade_loc[mpi_i*p+l]; 4067 } 4068 free(trade_loc); 4069 #endif /* END INT_FOR */ 4220 #endif /* END ZOS */ 4070 4221 #if defined(_FOS_) 4071 trade = (double*) myalloc1( count);4222 trade = myalloc1( count+2 ); 4072 4223 if (myid ==root){ 4073 4224 for(mpi_i =0; mpi_i < count; mpi_i++){ 4074 trade[mpi_i] = dp_T[loc_send[mpi_i]]; 4225 trade[2*mpi_i] = dp_T0[loc_send[mpi_i]]; 4226 trade[2*mpi_i+1] = dp_T[loc_send[mpi_i]]; 4075 4227 } 4076 4228 } 4077 MPI_Bcast(trade,count, MPI_DOUBLE, root, MPI_COMM_WORLD); 4078 if ( myid != root){ 4079 for( mpi_i =0; mpi_i < count; mpi_i++) 4080 dp_T[loc_send[mpi_i]] = trade[mpi_i]; 4229 MPI_Bcast(trade,count*2, MPI_DOUBLE, root, MPI_COMM_WORLD); 4230 4231 for( mpi_i =0; mpi_i < count; mpi_i++){ 4232 IF_KEEP_WRITE_TAYLOR(loc_send[mpi_i],keep,k,p) 4233 dp_T0[loc_send[mpi_i]] = trade[2*mpi_i]; 4234 dp_T[loc_send[mpi_i]] = trade[2*mpi_i+1]; 4081 4235 } 4082 4236 free(trade); 4083 4237 #endif 4084 4238 #if defined(_FOV_) 4085 trade = (double*) myalloc1( count*p);4239 trade = myalloc1( count*(p+1)); 4086 4240 if (myid ==root){ 4087 for(mpi_i =0; mpi_i < count; mpi_i++) 4088 for(i=0; i<p; i++) 4089 trade[p*mpi_i+i] = dpp_T[loc_send[mpi_i]][i]; 4090 } 4091 MPI_Bcast(trade,count*p, MPI_DOUBLE, root, MPI_COMM_WORLD); 4092 if ( myid != root){ 4093 for(mpi_i =0; mpi_i < count; mpi_i++) 4094 for(i=0; i<p; i++) 4095 dpp_T[loc_send[mpi_i]][i] = trade[p*mpi_i+i]; 4241 mpi_ii=0; 4242 for(mpi_i =0; mpi_i < count; mpi_i++){ 4243 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 4244 mpi_ii++; 4245 for(i=0; i<p; i++,mpi_ii++) 4246 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][i]; 4247 } 4248 } 4249 MPI_Bcast(trade,count*(p+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 4250 mpi_ii=0; 4251 for(mpi_i =0; mpi_i < count; mpi_i++){ 4252 IF_KEEP_WRITE_TAYLOR(loc_send[mpi_i],keep,k,p) 4253 dp_T0[loc_send[mpi_i]] = trade[ mpi_ii]; 4254 mpi_ii++; 4255 for(i=0; i<p; i++,mpi_ii++) 4256 dpp_T[loc_send[mpi_i]][i] = trade[mpi_ii]; 4096 4257 } 4097 4258 free(trade); 4098 4259 #endif 4099 4260 #if defined(_HOS_) 4100 trade = (double*) myalloc1(count * k);4261 trade = myalloc1(count * (k+1)); 4101 4262 if (myid ==root){ 4102 for(mpi_i =0; mpi_i < count; mpi_i++) 4103 for(i=0; i<k; i++) 4104 trade[k*mpi_i+i] = dpp_T[loc_send[mpi_i]][i]; 4105 } 4106 MPI_Bcast(trade,count*k, MPI_DOUBLE, root, MPI_COMM_WORLD); 4107 if ( myid != root){ 4108 for(mpi_i =0; mpi_i < count; mpi_i++) 4109 for(i=0; i<k; i++) 4110 dpp_T[loc_send[mpi_i]][i] = trade[k*mpi_i+i]; 4263 mpi_ii=0; 4264 for(mpi_i =0; mpi_i < count; mpi_i++){ 4265 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 4266 mpi_ii++; 4267 for(i=0; i<k; i++,mpi_ii++) 4268 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][i]; 4269 } 4270 } 4271 MPI_Bcast(trade,count*(k+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 4272 mpi_ii=0; 4273 for(mpi_i =0; mpi_i < count; mpi_i++){ 4274 IF_KEEP_WRITE_TAYLOR(loc_send[mpi_i],keep,k,p) 4275 dp_T0[loc_send[mpi_i]] = trade[ mpi_ii]; 4276 mpi_ii++; 4277 for(i=0; i<k; i++,mpi_ii++) 4278 dpp_T[loc_send[mpi_i]][i] = trade[mpi_ii]; 4111 4279 } 4112 4280 free(trade); 4113 4281 #endif 4114 4282 #if defined(_HOV_) 4115 trade = (double*) myalloc1(count * p*k);4283 trade = myalloc1(count * (p*k+1)); 4116 4284 if (myid ==root){ 4117 for(mpi_i =0; mpi_i < count; mpi_i++) 4118 for(i=0; i<p*k; i++) 4119 trade[p*k*mpi_i+i] = dpp_T[loc_send[mpi_i]][i]; 4120 } 4121 MPI_Bcast(trade,count*p*k, MPI_DOUBLE, root, MPI_COMM_WORLD); 4122 if ( myid != root){ 4123 for(mpi_i =0; mpi_i < count; mpi_i++) 4124 for(i=0; i<p*k; i++) 4125 dpp_T[loc_send[mpi_i]][i] = trade[p*k*mpi_i+i]; 4285 mpi_ii=0; 4286 for(mpi_i =0; mpi_i < count; mpi_i++){ 4287 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 4288 mpi_ii++; 4289 for(i=0; i<p*k; i++,mpi_ii++) 4290 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][i]; 4291 } 4292 } 4293 MPI_Bcast(trade,count*(p*k+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 4294 mpi_ii=0; 4295 for(mpi_i =0; mpi_i < count; mpi_i++){ 4296 IF_KEEP_WRITE_TAYLOR(loc_send[mpi_i],keep,k,p) 4297 dp_T0[loc_send[mpi_i]] = trade[ mpi_ii]; 4298 mpi_ii++; 4299 for(i=0; i<p*k; i++,mpi_ii++) 4300 dpp_T[loc_send[mpi_i]][i] = trade[mpi_ii]; 4126 4301 } 4127 4302 free(trade); 4128 4303 #endif 4304 #if defined(_TIGHT_) 4305 #if defined(_INT_FOR_) 4306 trade = myalloc1(count*(p+1)); 4307 if (myid == root){ 4308 mpi_ii=0; 4309 for(mpi_i=0; mpi_i< count; mpi_i++) { 4310 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 4311 mpi_ii++; 4312 for(l=0; l< p; l++,mpi_ii++) 4313 trade[mpi_ii]= (double) up_T[loc_send[mpi_i]][l]; 4314 } 4315 } 4316 MPI_Bcast(trade,count*(p+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 4317 mpi_ii=0; 4318 for( mpi_i =0; mpi_i < count; mpi_i++){ 4319 IF_KEEP_WRITE_TAYLOR(loc_send[mpi_i],keep,k,p) 4320 dp_T0[loc_send[mpi_i]] = trade[mpi_ii]; 4321 mpi_ii++; 4322 for(l=0; l<p; l++,mpi_ii++) 4323 up_T[loc_send[mpi_i]][l] = (unsigned long int) trade[mpi_ii]; 4324 } 4325 free(trade); 4326 #endif /* END INT_FOR */ 4129 4327 #if defined(_INDO_) 4130 // getting information about count of entries 4131 counts = ( int*) malloc( count*sizeof(int) ); 4328 trade = myalloc1( count*2); 4132 4329 if (myid ==root){ 4133 for(mpi_i =0; mpi_i < count; mpi_i++) 4134 counts[mpi_i] = ind_dom[loc_send[mpi_i]][0]; 4135 } 4136 MPI_Bcast(counts,count, MPI_INT, root, MPI_COMM_WORLD); 4137 4330 for(mpi_i =0; mpi_i < count; mpi_i++){ 4331 trade[2*mpi_i] = dp_T0[loc_send[mpi_i]]; 4332 trade[2*mpi_i+1] = (double) ind_dom[loc_send[mpi_i]][0]; 4333 } 4334 } 4335 MPI_Bcast(trade,count*2, MPI_DOUBLE, root, MPI_COMM_WORLD); 4336 4337 counts = (int*) malloc(count*sizeof(int)); 4138 4338 anz =0; 4139 for (mpi_i=0; mpi_i< count; mpi_i++) { 4140 anz += counts[mpi_i]; 4141 } 4339 for(mpi_i=0; mpi_i< count; mpi_i++) { 4340 IF_KEEP_WRITE_TAYLOR(loc_send[mpi_i],keep,k,p) 4341 dp_T0[loc_send[mpi_i]] = trade[2*mpi_i]; 4342 counts[mpi_i] = (int) trade[2*mpi_i+1]; 4343 anz += counts[mpi_i]; 4344 } 4345 free(trade); 4142 4346 if ( anz > 0){ 4143 4347 trade_loc = (int*) malloc( anz*sizeof(int) ); … … 4151 4355 } 4152 4356 MPI_Bcast(trade_loc,anz, MPI_INT, root, MPI_COMM_WORLD); 4153 if(myid != root){ 4154 // combine each index domain ... 4357 if (myid != root){ 4155 4358 l = 0; 4156 4359 for(mpi_i=0; mpi_i < count; mpi_i++){ … … 4164 4367 #endif 4165 4368 #if defined(_NONLIND_) 4166 counts = ( int*) malloc( s_r_indep*sizeof(int));4369 trade = myalloc1( s_r_indep*2); 4167 4370 if (myid ==root){ 4168 for(mpi_i =0; mpi_i < s_r_indep; mpi_i++) 4169 counts[mpi_i] = nonl_dom[mpi_i][0]; 4170 } 4171 MPI_Bcast(counts,s_r_indep, MPI_INT, root, MPI_COMM_WORLD); 4172 4371 for(mpi_i =0; mpi_i < s_r_indep; mpi_i++){ 4372 trade[2*mpi_i] = dp_T0[loc_send[mpi_i]]; 4373 trade[2*mpi_i+1] = (double) nonl_dom[mpi_i][0]; 4374 } 4375 } 4376 MPI_Bcast(trade,s_r_indep*2, MPI_DOUBLE, root, MPI_COMM_WORLD); 4377 4378 counts = (int*) malloc(s_r_indep*sizeof(int)); 4173 4379 anz =0; 4174 for (mpi_i=0; mpi_i< s_r_indep; mpi_i++){ 4175 anz += (int) counts[mpi_i]; 4176 } 4380 for(mpi_i=0; mpi_i< s_r_indep; mpi_i++) { 4381 IF_KEEP_WRITE_TAYLOR(loc_send[mpi_i],keep,k,p) 4382 dp_T0[loc_send[mpi_i]] = trade[2*mpi_i]; 4383 counts[mpi_i] = (int) trade[2*mpi_i+1]; 4384 anz += counts[mpi_i]; 4385 } 4386 free(trade); 4177 4387 if (anz > 0) { 4178 4388 trade_loc = (int*) calloc( anz,sizeof(int) ); … … 4199 4409 free(counts); 4200 4410 #endif // end _NONLIND_ 4411 #endif /* TIGHT */ 4412 #if defined(_NTIGHT_) 4413 #if defined(_INT_FOR_) 4414 trade_loc = (locint*) malloc(count*p*sizeof(locint)); 4415 for (mpi_i=0; mpi_i< count; mpi_i++) { 4416 FOR_0_LE_l_LT_pk 4417 trade_loc[mpi_i*p+l]=up_T[loc_send[mpi_i]][l]; 4418 } 4419 MPI_Bcast(trade_loc,count*p, MPI_INT, root, MPI_COMM_WORLD); 4420 for( mpi_i =0; mpi_i < count; mpi_i++){ 4421 FOR_0_LE_l_LT_pk 4422 up_T[loc_send[mpi_i]][l] = trade_loc[mpi_i*p+l]; 4423 } 4424 free(trade_loc); 4425 #endif /* END INT_FOR */ 4426 #if defined(_INDO_) 4427 // getting information about count of entries 4428 counts = ( int*) malloc( count*sizeof(int) ); 4429 if (myid ==root){ 4430 for(mpi_i =0; mpi_i < count; mpi_i++) 4431 counts[mpi_i] = ind_dom[loc_send[mpi_i]][0]; 4432 } 4433 MPI_Bcast(counts,count, MPI_INT, root, MPI_COMM_WORLD); 4434 4435 anz =0; 4436 for (mpi_i=0; mpi_i< count; mpi_i++) { 4437 anz += counts[mpi_i]; 4438 } 4439 if ( anz > 0){ 4440 trade_loc = (int*) malloc( anz*sizeof(int) ); 4441 if (myid ==root ){ 4442 l=0; 4443 for(mpi_i =0; mpi_i < anz; mpi_i++) 4444 for(i=2; i < ind_dom[loc_send[mpi_i]][0]+2; i++){ 4445 trade_loc[l] = ind_dom[loc_send[mpi_i]][i]; 4446 l++; 4447 } 4448 } 4449 MPI_Bcast(trade_loc,anz, MPI_INT, root, MPI_COMM_WORLD); 4450 if(myid != root){ 4451 // combine each index domain ... 4452 l = 0; 4453 for(mpi_i=0; mpi_i < count; mpi_i++){ 4454 combine_index_domain_received_data(loc_send[mpi_i], counts[mpi_i], ind_dom, &trade_loc[l] ); 4455 l += counts[mpi_i]; 4456 } 4457 } 4458 free(trade_loc); 4459 } 4460 free(counts); 4461 #endif 4462 #if defined(_NONLIND_) 4463 counts = ( int*) malloc( s_r_indep*sizeof(int) ); 4464 if (myid ==root){ 4465 for(mpi_i =0; mpi_i < s_r_indep; mpi_i++) 4466 counts[mpi_i] = nonl_dom[mpi_i][0]; 4467 } 4468 MPI_Bcast(counts,s_r_indep, MPI_INT, root, MPI_COMM_WORLD); 4469 4470 anz =0; 4471 for (mpi_i=0; mpi_i< s_r_indep; mpi_i++){ 4472 anz += (int) counts[mpi_i]; 4473 } 4474 if (anz > 0) { 4475 trade_loc = (int*) calloc( anz,sizeof(int) ); 4476 if (myid == root){ 4477 l=0; 4478 for(mpi_i=0; mpi_i < s_r_indep ; mpi_i++) 4479 for(i=2; i < nonl_dom[mpi_i][0]+2 ; i++){ 4480 trade_loc[l] = nonl_dom[mpi_i][i]; 4481 l++; 4482 } 4483 } 4484 MPI_Bcast(trade_loc,anz, MPI_INT, root, MPI_COMM_WORLD); 4485 if( myid != root){ 4486 // combine each index domain ... 4487 l = 0; 4488 for (mpi_i=0; mpi_i < s_r_indep; mpi_i++){ 4489 // nonl_dom settings 4490 extend_nonlinearity_domain_combine_received_trade(mpi_i, counts[mpi_i], nonl_dom, &trade_loc[l]); 4491 l += counts[mpi_i]; 4492 } 4493 } 4494 free(trade_loc); 4495 } 4496 free(counts); 4497 #endif // end _NONLIND_ 4498 #endif 4201 4499 free(loc_send); 4202 4500 break; … … 4205 4503 case gather: 4206 4504 count = get_locint_f(); // count 4207 loc_send = ( int*) malloc(count*sizeof(int));4505 loc_send = (locint*) malloc(count*sizeof(locint)); 4208 4506 for(mpi_i=0; mpi_i < count ; mpi_i++) 4209 4507 loc_send[mpi_i] = get_locint_f(); // Send Location … … 4213 4511 count2 = get_locint_f(); // count*process_count 4214 4512 if(myid==root){ 4215 loc_recv = ( int*) malloc(count2*sizeof(int));4513 loc_recv = (locint*) malloc(count2*sizeof(locint)); 4216 4514 for(mpi_i=0; mpi_i < count2 ; mpi_i++) 4217 4515 loc_recv[mpi_i] = get_locint_f(); // Receive Location 4516 } 4218 4517 arg = get_locint_f(); // count*process_count 4219 } 4518 arg = get_locint_f(); // root 4519 arg = get_locint_f(); // myid 4220 4520 if (use_reduce == 1) 4221 4521 mpi_op = get_locint_f(); 4222 #if !defined(_NTIGHT_) 4522 4523 #if defined(_ZOS_) 4223 4524 // receiving values for dp_T0 4224 trade = (double*)myalloc1( count );4525 trade = myalloc1( count ); 4225 4526 if (myid == root) 4226 rec_buf = (double*)myalloc1(count2);4527 rec_buf = myalloc1(count2); 4227 4528 else 4228 4529 rec_buf =NULL; … … 4239 4540 } 4240 4541 free(trade); 4241 #endif /* END NOT _NTIGHT_ */ 4242 #if defined(_INT_FOR_) 4243 trade_loc = (int*) malloc(count*p*sizeof(int)); 4244 if (myid == root) 4245 rec_buf_loc = (int*) malloc(count2*p*sizeof(int)); 4246 else 4247 rec_buf =NULL; 4248 for (mpi_i=0; mpi_i< count; mpi_i++) { 4249 FOR_0_LE_l_LT_pk 4250 trade_loc[mpi_i*p+l]=up_T[loc_send[mpi_i]][l]; 4251 } 4252 MPI_Gather(trade_loc,count*p, MPI_INT,rec_buf_loc, count*p,MPI_INT, root, MPI_COMM_WORLD); 4253 if (myid == root){ 4254 for( mpi_i =0; mpi_i < count; mpi_i++){ 4255 FOR_0_LE_l_LT_pk 4256 up_T[loc_recv[mpi_i]][l] = rec_buf_loc[mpi_i*p+l]; 4257 } 4258 free(rec_buf_loc); 4259 } 4260 free(trade_loc); 4261 #endif /* END INT_FOR */ 4542 #endif /* END ZOS */ 4262 4543 #if defined(_FOS_) 4263 trade = (double*) myalloc1( count);4544 trade = myalloc1( count*2 ); 4264 4545 if (myid ==root) 4265 rec_buf = (double*) myalloc1(count2);4546 rec_buf = myalloc1(count2*2); 4266 4547 else 4267 4548 rec_buf = NULL; 4268 4549 for(mpi_i =0; mpi_i < count; mpi_i++) { 4269 trade[mpi_i] = dp_T[loc_send[mpi_i]]; 4270 } 4271 MPI_Gather(trade,count, MPI_DOUBLE,rec_buf, count,MPI_DOUBLE, root, MPI_COMM_WORLD); 4550 trade[2*mpi_i] = dp_T0[loc_send[mpi_i]]; 4551 trade[2*mpi_i+1] = dp_T[loc_send[mpi_i]]; 4552 } 4553 MPI_Gather(trade,count*2, MPI_DOUBLE,rec_buf, count*2 ,MPI_DOUBLE, root, MPI_COMM_WORLD); 4272 4554 if ( myid == root){ 4273 4555 for( mpi_i =0; mpi_i < count2; mpi_i++){ 4274 dp_T[loc_recv[mpi_i]] = rec_buf[mpi_i]; 4556 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4557 dp_T0[loc_recv[mpi_i]] = rec_buf[2*mpi_i]; 4558 dp_T[loc_recv[mpi_i]] = rec_buf[2*mpi_i+1]; 4275 4559 } 4276 4560 free(rec_buf); … … 4279 4563 #endif 4280 4564 #if defined(_FOV_) 4281 trade = (double*) myalloc1( count*p);4565 trade = myalloc1( count*(p+1)); 4282 4566 if (myid == root) 4283 rec_buf = (double*) myalloc1(count2*p);4567 rec_buf = myalloc1(count2*(p+1)); 4284 4568 else 4285 4569 rec_buf=NULL; 4286 for(mpi_i =0; mpi_i < count; mpi_i++) 4287 for(i=0; i<p; i++) 4288 trade[p*mpi_i+i] = dpp_T[loc_send[mpi_i]][i]; 4289 4290 MPI_Gather(trade,count*p, MPI_DOUBLE,rec_buf, count*p,MPI_DOUBLE, root, MPI_COMM_WORLD); 4570 mpi_ii=0; 4571 for(mpi_i =0; mpi_i < count; mpi_i++){ 4572 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 4573 mpi_ii++; 4574 for(i=0; i<p; i++,mpi_ii++) 4575 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][i]; 4576 } 4577 MPI_Gather(trade,count*(p+1), MPI_DOUBLE,rec_buf, count*(p+1),MPI_DOUBLE, root, MPI_COMM_WORLD); 4291 4578 if ( myid == root){ 4292 for(mpi_i =0; mpi_i < count2; mpi_i++) 4293 for(i=0; i<p; i++) 4294 dpp_T[loc_recv[mpi_i]][i] = rec_buf[p*mpi_i+i]; 4579 mpi_ii=0; 4580 for(mpi_i =0; mpi_i < count2; mpi_i++){ 4581 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4582 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 4583 mpi_ii++; 4584 for(i=0; i<p; i++,mpi_ii++) 4585 dpp_T[loc_recv[mpi_i]][i] = rec_buf[mpi_ii]; 4586 } 4295 4587 free(rec_buf); 4296 4588 } … … 4298 4590 #endif 4299 4591 #if defined(_HOS_) 4300 trade = (double*) myalloc1(count * k);4301 if (myid == root)4302 rec_buf = (double*) myalloc1(count2*k);4592 trade = myalloc1( count*(k+1)); 4593 if (myid == root) 4594 rec_buf = myalloc1(count2*(k+1)); 4303 4595 else 4304 4596 rec_buf=NULL; 4305 for(mpi_i =0; mpi_i < count; mpi_i++) 4306 for(i=0; i<k; i++) 4307 trade[k*mpi_i+i] = dpp_T[loc_send[mpi_i]][i]; 4308 4309 MPI_Gather(trade,count*k, MPI_DOUBLE,rec_buf, count*k,MPI_DOUBLE, root, MPI_COMM_WORLD); 4597 mpi_ii=0; 4598 for(mpi_i =0; mpi_i < count; mpi_i++){ 4599 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 4600 mpi_ii++; 4601 for(i=0; i<k; i++,mpi_ii++) 4602 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][i]; 4603 } 4604 MPI_Gather(trade,count*(k+1), MPI_DOUBLE,rec_buf, count*(k+1),MPI_DOUBLE, root, MPI_COMM_WORLD); 4310 4605 if ( myid == root){ 4311 for(mpi_i =0; mpi_i < count2; mpi_i++) 4312 for(i=0; i<k; i++) 4313 dpp_T[loc_recv[mpi_i]][i] = rec_buf[k*mpi_i+i]; 4606 mpi_ii=0; 4607 for(mpi_i =0; mpi_i < count2; mpi_i++){ 4608 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4609 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 4610 mpi_ii++; 4611 for(i=0; i<k; i++,mpi_ii++) 4612 dpp_T[loc_recv[mpi_i]][i] = rec_buf[mpi_ii]; 4613 } 4314 4614 free(rec_buf); 4315 4615 } … … 4317 4617 #endif 4318 4618 #if defined(_HOV_) 4319 trade = (double*) myalloc1(count * p*k);4619 trade = myalloc1( count*(p*k+1)); 4320 4620 if (myid == root) 4321 rec_buf = (double*) myalloc1(count2*p*k);4621 rec_buf = myalloc1(count2*(p*k+1)); 4322 4622 else 4323 4623 rec_buf=NULL; 4324 for(mpi_i =0; mpi_i < count; mpi_i++) 4325 for(i=0; i<p*k; i++) 4326 trade[p*k*mpi_i+i] = dpp_T[loc_send[mpi_i]][i]; 4327 4328 MPI_Gather(trade,count*p*k, MPI_DOUBLE,rec_buf, count*p*k,MPI_DOUBLE, root, MPI_COMM_WORLD); 4624 mpi_ii=0; 4625 for(mpi_i =0; mpi_i < count; mpi_i++){ 4626 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 4627 mpi_ii++; 4628 for(i=0; i<p*k; i++,mpi_ii++) 4629 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][i]; 4630 } 4631 MPI_Gather(trade,count*(p*k+1), MPI_DOUBLE,rec_buf, count*(p*k+1),MPI_DOUBLE, root, MPI_COMM_WORLD); 4329 4632 if ( myid == root){ 4330 for(mpi_i =0; mpi_i < count2; mpi_i++) 4331 for(i=0; i<p*k; i++) 4332 dpp_T[loc_recv[mpi_i]][i] = rec_buf[p*k*mpi_i+i]; 4633 mpi_ii=0; 4634 for(mpi_i =0; mpi_i < count2; mpi_i++){ 4635 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4636 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 4637 mpi_ii++; 4638 for(i=0; i<p*k; i++,mpi_ii++) 4639 dpp_T[loc_recv[mpi_i]][i] = rec_buf[mpi_ii]; 4640 } 4333 4641 free(rec_buf); 4334 4642 } 4335 4643 free(trade); 4336 4644 #endif 4645 #if defined(_TIGHT_) 4646 #if defined(_INT_FOR_) 4647 trade = myalloc1(count*(p+1)); 4648 if (myid == root) 4649 rec_buf = myalloc1(count2*(p+1)); 4650 else 4651 rec_buf =NULL; 4652 mpi_ii=0; 4653 for (mpi_i=0; mpi_i< count; mpi_i++) { 4654 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 4655 mpi_ii++; 4656 for(l=0;l<p;l++,mpi_ii++) 4657 trade[mpi_ii]= (double) up_T[loc_send[mpi_i]][l]; 4658 } 4659 MPI_Gather(trade,count*(p+1), MPI_DOUBLE,rec_buf, count*(p+1),MPI_DOUBLE, root, MPI_COMM_WORLD); 4660 if (myid == root){ 4661 mpi_ii=0; 4662 for( mpi_i =0; mpi_i < count; mpi_i++){ 4663 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4664 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 4665 mpi_ii++; 4666 for(l=0;l<p;l++,mpi_ii++) 4667 up_T[loc_recv[mpi_i]][l] = (unsigned long int) rec_buf[mpi_ii]; 4668 } 4669 free(rec_buf_loc); 4670 } 4671 free(trade_loc); 4672 #endif /* END INT_FOR */ 4337 4673 #if defined(_INDO_) 4338 // getting information about count of entries4339 4674 counts = ( int*) malloc( count*sizeof(int) ); 4340 if( myid == root) 4675 trade = myalloc1(count*2); 4676 if( myid == root){ 4341 4677 tmp_counts = ( int*) malloc( count2*sizeof(int) ); 4342 else 4678 rec_buf = myalloc1(count2*2); 4679 } 4680 else { 4343 4681 tmp_counts = NULL; 4344 4345 for(mpi_i =0; mpi_i < count; mpi_i++) 4682 rec_buf = NULL; 4683 } 4684 4685 for(mpi_i =0; mpi_i < count; mpi_i++){ 4686 trade[2*mpi_i] = dp_T0[loc_send[mpi_i]]; 4346 4687 counts[mpi_i] = ind_dom[loc_send[mpi_i]][0]; 4347 4348 MPI_Gather(counts,count, MPI_INT, tmp_counts, count, MPI_INT, root, MPI_COMM_WORLD); 4688 trade[2*mpi_i+1] = (double) counts[mpi_i]; 4689 } 4690 4691 MPI_Gather(trade,count*2, MPI_DOUBLE, rec_buf, count*2, MPI_DOUBLE, root, MPI_COMM_WORLD); 4349 4692 if (myid == root){ 4693 for(mpi_i =0; mpi_i < count2; mpi_i++ ){ 4694 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4695 dp_T0[loc_recv[mpi_i]] = trade[2*mpi_i]; 4696 tmp_counts[mpi_i] = (int) trade[2*mpi_i+1]; 4697 } 4350 4698 for(mpi_i =0; mpi_i < count; mpi_i++ ){ 4351 4699 for(i=1; i < process_count; i++ ) … … 4353 4701 counts[mpi_i] = tmp_counts[count*i + mpi_i]; 4354 4702 } 4355 } 4703 free(rec_buf); 4704 } 4705 free(trade); 4356 4706 MPI_Bcast(counts,count,MPI_INT, root, MPI_COMM_WORLD); 4357 4707 anz=0; … … 4373 4723 } 4374 4724 } 4375 if (myid == root) rec_buf_loc = (int*) malloc(process_count * anz * sizeof(int) ); 4376 else rec_buf_loc = NULL ; 4725 if (myid == root) 4726 rec_buf_loc = (int*) malloc(process_count * anz * sizeof(int) ); 4727 else 4728 rec_buf_loc = NULL ; 4377 4729 4378 4730 MPI_Gather(trade_loc, anz, MPI_INT, rec_buf_loc, anz, MPI_INT, root, MPI_COMM_WORLD); … … 4394 4746 #endif 4395 4747 #if defined(_NONLIND_) 4396 // getting information about count of entries4397 4748 counts = ( int*) malloc( count*sizeof(int) ); 4398 if( myid == root) 4749 trade = myalloc1(count*2); 4750 if( myid == root){ 4399 4751 tmp_counts = ( int*) malloc( count2*sizeof(int) ); 4400 else 4752 rec_buf = myalloc1(count2*2); 4753 } 4754 else { 4401 4755 tmp_counts = NULL; 4402 4403 for(mpi_i =0; mpi_i < count; mpi_i++) 4756 rec_buf = NULL; 4757 } 4758 4759 for(mpi_i =0; mpi_i < count; mpi_i++){ 4760 trade[2*mpi_i] = dp_T0[loc_send[mpi_i]]; 4404 4761 counts[mpi_i] = nonl_dom[mpi_i][0]; 4405 4406 MPI_Gather(counts,count, MPI_INT, tmp_counts, count, MPI_INT, root, MPI_COMM_WORLD); 4407 if (id == root){ 4762 trade[2*mpi_i+1] = (double) counts[mpi_i]; 4763 } 4764 4765 MPI_Gather(trade,count*2, MPI_DOUBLE, rec_buf, count*2, MPI_DOUBLE, root, MPI_COMM_WORLD); 4766 if (myid == root){ 4767 for(mpi_i =0; mpi_i < count2; mpi_i++ ){ 4768 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 4769 dp_T0[loc_recv[mpi_i]] = trade[2*mpi_i]; 4770 tmp_counts[mpi_i] = (int) trade[2*mpi_i+1]; 4771 } 4408 4772 for(mpi_i =0; mpi_i < count; mpi_i++ ){ 4409 4773 for(i=1; i < process_count; i++ ) … … 4411 4775 counts[mpi_i] = tmp_counts[count*i + mpi_i]; 4412 4776 } 4413 } 4777 free(rec_buf); 4778 } 4779 free(trade); 4414 4780 MPI_Bcast(counts,count,MPI_INT, root, MPI_COMM_WORLD); 4415 4781 anz=0; … … 4458 4824 free(counts); 4459 4825 #endif // end _NONLIND_ 4460 if( myid == root) free(loc_recv); 4826 #endif 4827 #if defined(_NTIGHT_) 4828 #if defined(_INT_FOR_) 4829 trade_loc = (locint*) malloc(count*p*sizeof(locint)); 4830 if (myid == root) 4831 rec_buf_loc = (locint*) malloc(count2*p*sizeof(locint)); 4832 else 4833 rec_buf =NULL; 4834 mpi_ii=0; 4835 for(mpi_i=0; mpi_i< count; mpi_i++) { 4836 for(l=0;l<p;l++,mpi_ii++) 4837 trade[mpi_ii]=up_T[loc_send[mpi_i]][l]; 4838 } 4839 MPI_Gather(trade,count*p, MPI_UNSIGNED_LONG,rec_buf, count*p,MPI_UNSIGNED_LONG, root, MPI_COMM_WORLD); 4840 if (myid == root){ 4841 for( mpi_i =0; mpi_i < count; mpi_i++){ 4842 FOR_0_LE_l_LT_pk 4843 up_T[loc_recv[mpi_i]][l] = rec_buf_loc[mpi_i*p+l]; 4844 } 4845 free(rec_buf_loc); 4846 } 4847 free(trade_loc); 4848 #endif /* END INT_FOR */ 4849 #if defined(_INDO_) 4850 // getting information about count of entries 4851 counts = ( int*) malloc( count*sizeof(int) ); 4852 if( myid == root) 4853 tmp_counts = ( int*) malloc( count2*sizeof(int) ); 4854 else 4855 tmp_counts = NULL; 4856 4857 for(mpi_i =0; mpi_i < count; mpi_i++) 4858 counts[mpi_i] = ind_dom[loc_send[mpi_i]][0]; 4859 4860 MPI_Gather(counts,count, MPI_INT, tmp_counts, count, MPI_INT, root, MPI_COMM_WORLD); 4861 if (myid == root){ 4862 for(mpi_i =0; mpi_i < count; mpi_i++ ){ 4863 for(i=1; i < process_count; i++ ) 4864 if( counts[mpi_i] < tmp_counts[count*i + mpi_i]) 4865 counts[mpi_i] = tmp_counts[count*i + mpi_i]; 4866 } 4867 } 4868 MPI_Bcast(counts,count,MPI_INT, root, MPI_COMM_WORLD); 4869 anz=0; 4870 for(mpi_i =0; mpi_i < count; mpi_i++ ) 4871 anz += counts[mpi_i]; 4872 4873 // every process has same counts 4874 if ( anz > 0){ 4875 trade_loc = (int*) malloc( anz*sizeof(int) ); 4876 l=0; 4877 for(mpi_i =0; mpi_i < count; mpi_i++){ 4878 for (i=2; i < ind_dom[loc_send[mpi_i]][0]+2; i++){ 4879 trade_loc[l] = ind_dom[loc_send[mpi_i]][i]; 4880 l++; 4881 } 4882 for(i=ind_dom[loc_send[mpi_i]][0]; i < counts[mpi_i] ; i++ ){ 4883 trade_loc[l] = -10; 4884 l++; 4885 } 4886 } 4887 if (myid == root) rec_buf_loc = (int*) malloc(process_count * anz * sizeof(int) ); 4888 else rec_buf_loc = NULL ; 4889 4890 MPI_Gather(trade_loc, anz, MPI_INT, rec_buf_loc, anz, MPI_INT, root, MPI_COMM_WORLD); 4891 free( trade_loc ); 4892 4893 if(myid == root){ 4894 l = 0; 4895 for(i=0;i<process_count; i++) { 4896 for(mpi_i=0; mpi_i < count; mpi_i++){ 4897 combine_index_domain_received_data(loc_recv[mpi_i], tmp_counts[i*count+mpi_i], ind_dom, &rec_buf_loc[l] ); 4898 l += counts[mpi_i]; 4899 } 4900 } 4901 free(rec_buf_loc); 4902 free(tmp_counts); 4903 } 4904 } 4905 free(counts); 4906 #endif 4907 #if defined(_NONLIND_) 4908 // getting information about count of entries 4909 counts = ( int*) malloc( count*sizeof(int) ); 4910 if( myid == root) 4911 tmp_counts = ( int*) malloc( count2*sizeof(int) ); 4912 else 4913 tmp_counts = NULL; 4914 4915 for(mpi_i =0; mpi_i < count; mpi_i++) 4916 counts[mpi_i] = nonl_dom[mpi_i][0]; 4917 4918 MPI_Gather(counts,count, MPI_INT, tmp_counts, count, MPI_INT, root, MPI_COMM_WORLD); 4919 if (myid == root){ 4920 for(mpi_i =0; mpi_i < count; mpi_i++ ){ 4921 for(i=1; i < process_count; i++ ) 4922 if( counts[mpi_i] < tmp_counts[count*i + mpi_i]) 4923 counts[mpi_i] = tmp_counts[count*i + mpi_i]; 4924 } 4925 } 4926 MPI_Bcast(counts,count,MPI_INT, root, MPI_COMM_WORLD); 4927 anz=0; 4928 for(mpi_i =0; mpi_i < count; mpi_i++ ) 4929 anz += counts[mpi_i]; 4930 4931 // every process has same counts 4932 if ( anz > 0){ 4933 trade_loc = (int*) malloc( anz*sizeof(int) ); 4934 if (myid == root){ 4935 rec_buf_loc = (int*) malloc(process_count * anz * sizeof(int)); 4936 } else { rec_buf_loc =NULL; } 4937 4938 l=0; 4939 for(mpi_i =0; mpi_i < count; mpi_i++){ 4940 for (i=2; i < nonl_dom[mpi_i][0]+2; i++){ 4941 trade_loc[l] = nonl_dom[mpi_i][i]; 4942 l++; 4943 } 4944 for(i=nonl_dom[mpi_i][0]; i < counts[0] ; i++ ){ 4945 trade_loc[l] = -10; 4946 l++; 4947 } 4948 } 4949 MPI_Gather(trade_loc,anz, MPI_INT, rec_buf_loc, anz, MPI_INT, root, MPI_COMM_WORLD); 4950 free( trade_loc ); 4951 if(myid == root){ 4952 // rewrite each index domain rec_buf_loc[pc*process_count*anz + count] 4953 l = 0; 4954 for(mpi_i=0; mpi_i < count2; mpi_i++){ 4955 i = 0; 4956 free(nonl_dom[mpi_i]); 4957 nonl_dom[mpi_i] = (locint*) calloc(2*tmp_counts[mpi_i]+2,sizeof(locint)); 4958 nonl_dom[mpi_i][1] = 2*tmp_counts[mpi_i]; 4959 while ((rec_buf_loc[l+i] > -1 ) && ( i < tmp_counts[mpi_i]) ) { 4960 nonl_dom[mpi_i][i+2]= rec_buf_loc[l+i]; 4961 i++; 4962 } 4963 nonl_dom[mpi_i][0]= i; 4964 l += counts[mpi_i]; 4965 } 4966 free(rec_buf_loc); 4967 } 4968 } 4969 if( myid == root) free(tmp_counts); 4970 free(counts); 4971 #endif // end _NONLIND_ 4972 #endif /* END _NTIGHT_ */ 4973 if ( myid == root) 4974 free(loc_recv); 4461 4975 free(loc_send); 4462 4976 use_reduce=0; … … 4464 4978 case scatter: 4465 4979 count = get_locint_f(); // count*procsize 4980 root = get_locint_f(); 4981 myid = get_locint_f(); 4466 4982 if(myid == root){ 4467 loc_send = ( int*) malloc(count*sizeof(int));4983 loc_send = (locint*) malloc(count*sizeof(locint)); 4468 4984 for(mpi_i=0; mpi_i < count ; mpi_i++) 4469 4985 loc_send[mpi_i] = get_locint_f(); // Send Location 4470 count = get_locint_f(); // count*procsize4471 }4472 r oot= get_locint_f(); // root4473 myid= get_locint_f(); // process id4474 count2 = get_locint_f(); // count 4475 loc_recv = ( int*) malloc(count2*sizeof(int));4986 } 4987 res = get_locint_f(); // count*procsize 4988 res = get_locint_f(); // root 4989 res = get_locint_f(); // process id 4990 count2 = get_locint_f(); // count or recv_count 4991 loc_recv = (locint*) malloc(count2*sizeof(locint)); 4476 4992 for(mpi_i=0;mpi_i< count2; mpi_i++) 4477 4993 loc_recv[mpi_i] = get_locint_f(); // Receive Location 4478 count2 = get_locint_f(); // count 4479 4480 #if !defined(_NTIGHT_) 4994 arg = get_locint_f(); // count 4995 #if defined(_ZOS_) 4481 4996 // receiving values for dp_T0 4482 4997 if (myid == root) 4483 trade = (double*)myalloc1( count );4998 trade = myalloc1( count ); 4484 4999 else 4485 5000 trade = NULL; 4486 5001 4487 rec_buf = (double*)myalloc1(count2);5002 rec_buf = myalloc1(count2); 4488 5003 4489 5004 if(myid == root) … … 4498 5013 } 4499 5014 free(rec_buf); 4500 if(myid==root) free(trade); 4501 #endif /* END NOT _NTIGHT_ */ 4502 #if defined(_INT_FOR_) 4503 if (myid == root) 4504 trade_loc = (int*) malloc(count*p*sizeof(int)); 4505 else 4506 trade_loc =NULL; 4507 rec_buf_loc = (int*) malloc(count2*p*sizeof(int)); 4508 for (mpi_i=0; mpi_i< count; mpi_i++) { 4509 FOR_0_LE_l_LT_pk 4510 trade_loc[mpi_i*p+l]=up_T[loc_send[mpi_i]][l]; 4511 } 4512 MPI_Scatter(trade_loc,count2*p, MPI_INT,rec_buf_loc, count2*p ,MPI_INT, root, MPI_COMM_WORLD); 4513 4514 for( mpi_i =0; mpi_i < count; mpi_i++){ 4515 FOR_0_LE_l_LT_pk 4516 up_T[loc_recv[mpi_i]][l] = rec_buf_loc[mpi_i*p+l]; 4517 } 4518 free(rec_buf_loc); 4519 if(myid==root) free(trade_loc); 4520 #endif /* END INT_FOR */ 5015 if (myid==root) 5016 free(trade); 5017 #endif /* END _ZOS_ */ 4521 5018 #if defined(_FOS_) 4522 rec_buf = (double*) myalloc1( count2 );5019 rec_buf = myalloc1( count2*2 ); 4523 5020 if (myid ==root) 4524 trade = (double*) myalloc1(count);5021 trade = myalloc1(count*2); 4525 5022 else 4526 5023 trade = NULL; 4527 5024 4528 5025 if ( myid == root){ 4529 for(mpi_i =0; mpi_i < count; mpi_i++) 4530 trade[mpi_i] = dp_T[loc_send[mpi_i]]; 4531 } 4532 MPI_Scatter(trade,count2, MPI_DOUBLE,rec_buf, count2, MPI_DOUBLE, root, MPI_COMM_WORLD); 5026 for(mpi_i =0; mpi_i < count; mpi_i++){ 5027 trade[2*mpi_i] = dp_T0[loc_send[mpi_i]]; 5028 trade[2*mpi_i+1] = dp_T[loc_send[mpi_i]]; 5029 } 5030 } 5031 MPI_Scatter(trade,count2*2, MPI_DOUBLE,rec_buf, count2*2, MPI_DOUBLE, root, MPI_COMM_WORLD); 4533 5032 4534 5033 for( mpi_i =0; mpi_i < count2; mpi_i++){ 4535 dp_T[loc_recv[mpi_i]] = rec_buf[mpi_i]; 5034 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 5035 dp_T0[loc_recv[mpi_i]] = rec_buf[2*mpi_i]; 5036 dp_T[loc_recv[mpi_i]] = rec_buf[2*mpi_i+1]; 4536 5037 } 4537 5038 free(rec_buf); 4538 if(myid==root) free(trade); 5039 if (myid==root) 5040 free(trade); 4539 5041 #endif 4540 5042 #if defined(_FOV_) 4541 rec_buf = (double*) myalloc1( count2*p); 5043 rec_buf = myalloc1( count2*(p+1)); 5044 if (myid ==root) 5045 trade = myalloc1(count*(p+1)); 5046 else 5047 trade = NULL; 5048 5049 if ( myid == root){ 5050 mpi_ii=0; 5051 for(mpi_i =0; mpi_i < count; mpi_i++){ 5052 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 5053 mpi_ii++; 5054 for(l=0;l<p;l++,mpi_ii++) 5055 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][l]; 5056 } 5057 } 5058 MPI_Scatter(trade,count2*(p+1), MPI_DOUBLE,rec_buf, count2*(p+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 5059 mpi_ii=0; 5060 for( mpi_i =0; mpi_i < count2; mpi_i++){ 5061 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 5062 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 5063 mpi_ii++; 5064 for(l=0;l<p;l++,mpi_ii++) 5065 dpp_T[loc_recv[mpi_i]][l] = rec_buf[mpi_ii]; 5066 } 5067 free(rec_buf); 5068 if (myid==root) 5069 free(trade); 5070 #endif 5071 #if defined(_HOS_) 5072 rec_buf = myalloc1( count2*(k+1)); 5073 if (myid ==root) 5074 trade = myalloc1(count*(k+1)); 5075 else 5076 trade = NULL; 5077 5078 if ( myid == root){ 5079 mpi_ii=0; 5080 for(mpi_i =0; mpi_i < count; mpi_i++){ 5081 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 5082 mpi_ii++; 5083 for(l=0;l<k;l++,mpi_ii++) 5084 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][l]; 5085 } 5086 } 5087 MPI_Scatter(trade,count2*(k+1), MPI_DOUBLE,rec_buf, count2*(k+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 5088 mpi_ii=0; 5089 for( mpi_i =0; mpi_i < count2; mpi_i++){ 5090 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 5091 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 5092 mpi_ii++; 5093 for(l=0;l<k;l++,mpi_ii++) 5094 dpp_T[loc_recv[mpi_i]][l] = rec_buf[mpi_ii]; 5095 } 5096 free(rec_buf); 5097 if (myid==root) 5098 free(trade); 5099 #endif 5100 #if defined(_HOV_) 5101 rec_buf = myalloc1( count2*(p*k+1)); 5102 if (myid ==root) 5103 trade = myalloc1(count*(p*k+1)); 5104 else 5105 trade = NULL; 5106 5107 if ( myid == root){ 5108 mpi_ii=0; 5109 for(mpi_i =0; mpi_i < count; mpi_i++){ 5110 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 5111 mpi_ii++; 5112 for(l=0;l<p*k;l++,mpi_ii++) 5113 trade[mpi_ii] = dpp_T[loc_send[mpi_i]][l]; 5114 } 5115 } 5116 MPI_Scatter(trade,count2*(p*k+1), MPI_DOUBLE,rec_buf, count2*(p*k+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 5117 mpi_ii=0; 5118 for( mpi_i =0; mpi_i < count2; mpi_i++){ 5119 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 5120 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 5121 mpi_ii++; 5122 for(l=0;l<p*k;l++,mpi_ii++) 5123 dpp_T[loc_recv[mpi_i]][l] = rec_buf[mpi_ii]; 5124 } 5125 free(rec_buf); 5126 if (myid==root) 5127 free(trade); 5128 #endif 5129 #if defined(_TIGHT_) 5130 #if defined(_INT_FOR_) 5131 rec_buf = myalloc1( count2*(p+1)); 5132 if (myid ==root) 5133 trade = myalloc1(count*(p+1)); 5134 else 5135 trade = NULL; 5136 5137 if ( myid == root){ 5138 mpi_ii=0; 5139 for(mpi_i =0; mpi_i < count; mpi_i++){ 5140 trade[mpi_ii] = dp_T0[loc_send[mpi_i]]; 5141 mpi_ii++; 5142 for(l=0;l<p;l++,mpi_ii++) 5143 trade[mpi_ii] = (double) up_T[loc_send[mpi_i]][l]; 5144 } 5145 } 5146 MPI_Scatter(trade,count2*(p+1), MPI_DOUBLE,rec_buf, count2*(p+1), MPI_DOUBLE, root, MPI_COMM_WORLD); 5147 mpi_ii=0; 5148 for( mpi_i =0; mpi_i < count2; mpi_i++){ 5149 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 5150 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_ii]; 5151 mpi_ii++; 5152 for(l=0;l<p;l++,mpi_ii++) 5153 up_T[loc_recv[mpi_i]][l] = (unsigned long int) rec_buf[mpi_ii]; 5154 } 5155 free(rec_buf); 5156 if (myid==root) 5157 free(trade); 5158 #endif /* END INT_FOR */ 5159 #if defined(_INDO_) 5160 rec_buf = myalloc1(count2+1); 4542 5161 if (myid == root) 4543 trade = (double*) myalloc1(count*p);5162 trade = myalloc1(count+process_count); 4544 5163 else 4545 5164 trade = NULL; 4546 4547 if( myid == root){ 4548 for(mpi_i =0; mpi_i < count; mpi_i++) 4549 for(i=0; i<p; i++) 4550 trade[p*mpi_i+i] = dpp_T[loc_send[mpi_i]][i]; 4551 } 4552 4553 MPI_Scatter(trade,count2*p, MPI_DOUBLE,rec_buf, count2*p,MPI_DOUBLE, root, MPI_COMM_WORLD); 4554 4555 for(mpi_i =0; mpi_i < count2; mpi_i++) 4556 for(i=0; i<p; i++) 4557 dpp_T[loc_recv[mpi_i]][i] = rec_buf[p*mpi_i+i]; 4558 5165 if(myid== root){ 5166 anz = ind_dom[loc_send[0]][0]; 5167 for(mpi_i =1; mpi_i < count; mpi_i++){ 5168 if(anz < ind_dom[loc_send[mpi_i]][0]) 5169 anz = ind_dom[loc_send[mpi_i]][0]; 5170 } 5171 i=0; 5172 l=0; 5173 for(mpi_ii=0; mpi_ii<process_count; mpi_ii++){ 5174 for(mpi_i=0; mpi_i < count2; mpi_i++,l++,i++) 5175 trade[l] = dp_T0[loc_send[i]]; 5176 trade[l] = (double) anz; 5177 l++; 5178 } 5179 } 5180 MPI_Scatter(trade,count2+1, MPI_DOUBLE, rec_buf, count2 +1, MPI_DOUBLE, root, MPI_COMM_WORLD); 5181 if (myid == root) 5182 free(trade); 5183 for(mpi_i=0; mpi_i < count2; mpi_i++){ 5184 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 5185 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_i]; 5186 } 5187 anz = (int) rec_buf[count2]; 4559 5188 free(rec_buf); 4560 if(myid==root) free(trade);4561 #endif4562 #if defined(_HOS_)4563 if (myid ==root)4564 trade = (double*) myalloc1(count * k);4565 else4566 trade=NULL;4567 rec_buf = (double*) myalloc1(count2*k);4568 4569 if(myid== root){4570 for(mpi_i =0; mpi_i < count; mpi_i++)4571 for(i=0; i<k; i++)4572 trade[k*mpi_i+i] = dpp_T[loc_send[mpi_i]][i];4573 }4574 4575 MPI_Scatter(trade,count2*k, MPI_DOUBLE,rec_buf, count2*k,MPI_DOUBLE, root, MPI_COMM_WORLD);4576 4577 for(mpi_i =0; mpi_i < count2; mpi_i++)4578 for(i=0; i<k; i++)4579 dpp_T[loc_recv[mpi_i]][i] = rec_buf[k*mpi_i+i];4580 4581 free(rec_buf);4582 if(myid==root) free(trade);4583 #endif4584 #if defined(_HOV_)4585 rec_buf = (double*) myalloc1(count2*p*k);4586 if (myid == root)4587 trade = (double*) myalloc1(count * p*k);4588 else4589 trade = NULL;4590 4591 if(myid == root){4592 for(mpi_i =0; mpi_i < count; mpi_i++)4593 for(i=0; i<p*k; i++)4594 trade[p*k*mpi_i+i] = dpp_T[loc_send[mpi_i]][i];4595 }4596 4597 MPI_Gather(trade,count2*p*k, MPI_DOUBLE,rec_buf, count2*p*k,MPI_DOUBLE, root, MPI_COMM_WORLD);4598 4599 for(mpi_i =0; mpi_i < count2; mpi_i++)4600 for(i=0; i<p*k; i++)4601 dpp_T[loc_recv[mpi_i]][i] = rec_buf[p*k*mpi_i+i];4602 4603 free(rec_buf);4604 if(myid==root) free(trade);4605 #endif4606 #if defined(_INDO_)4607 // getting information about count of entries4608 counts = ( int*) malloc( sizeof(int) );4609 if(myid== root){4610 counts[0] = ind_dom[loc_send[0]][0];4611 for(mpi_i =1; mpi_i < count; mpi_i++)4612 if(counts[0] < ind_dom[loc_send[mpi_i]][0])4613 counts[0] = ind_dom[loc_send[mpi_i]][0];4614 }4615 MPI_Bcast(counts,1,MPI_INT, root, MPI_COMM_WORLD);4616 5189 l=0; 4617 5190 if(myid == root){ 4618 trade_loc = (int*) calloc(count* counts[0],sizeof(int));5191 trade_loc = (int*) calloc(count*anz,sizeof(int)); 4619 5192 for(mpi_i =0; mpi_i < count; mpi_i++ ) 4620 5193 for (i=2; i < ind_dom[loc_send[mpi_i]][0]+2; i++){ … … 4622 5195 l++; 4623 5196 } 4624 for(i=ind_dom[loc_send[mpi_i]][0]; i < counts[0]; i++ ){5197 for(i=ind_dom[loc_send[mpi_i]][0]; i < anz ; i++ ){ 4625 5198 trade_loc[l] = -10; 4626 5199 l++; 4627 5200 } 4628 5201 } 4629 rec_buf_loc = ( int*) malloc( counts[0]*count2*sizeof(int) );4630 MPI_Scatter(trade_loc, counts[0]*count2, MPI_INT, rec_buf_loc, counts[0]*count2, MPI_INT, root, MPI_COMM_WORLD);5202 rec_buf_loc = ( int*) malloc(anz*count2*sizeof(int) ); 5203 MPI_Scatter(trade_loc,anz*count2, MPI_INT, rec_buf_loc, anz*count2, MPI_INT, root, MPI_COMM_WORLD); 4631 5204 4632 5205 if(myid == root) free( trade_loc ); … … 4635 5208 i = 0; 4636 5209 free(ind_dom[loc_recv[mpi_i]]); 4637 ind_dom[loc_recv[mpi_i]] = (locint*) calloc(2* tmp_counts[0]+2,sizeof(locint));4638 ind_dom[loc_recv[mpi_i]][1] = 2* counts[0];4639 4640 while ((rec_buf_loc[l+i] > -1 ) && ( i < counts[0]) ) {5210 ind_dom[loc_recv[mpi_i]] = (locint*) calloc(2*anz+2,sizeof(locint)); 5211 ind_dom[loc_recv[mpi_i]][1] = 2*anz; 5212 5213 while ((rec_buf_loc[l+i] > -1 ) && ( i < anz) ) { 4641 5214 ind_dom[loc_recv[mpi_i]][i+2]= rec_buf_loc[l+i]; 4642 5215 i++; 4643 5216 } 4644 5217 ind_dom[loc_recv[mpi_i]][0]= i; 4645 l += counts[0]; 5218 l += anz; 5219 } 5220 5221 free(rec_buf_loc); 5222 #endif 5223 #if defined(_NONLIND_) 5224 rec_buf = myalloc1(count2+1); 5225 if (myid == root) 5226 trade = myalloc1(count+process_count); 5227 else 5228 trade = NULL; 5229 if(myid== root){ 5230 anz = nonl_dom[0][0]; 5231 for(mpi_i =1; mpi_i < count; mpi_i++){ 5232 if(anz < nonl_dom[mpi_i][0]) 5233 anz = nonl_dom[mpi_i][0]; 5234 } 5235 i=0; 5236 l=0; 5237 for(mpi_ii=0; mpi_ii<process_count; mpi_ii++){ 5238 for(mpi_i=0; mpi_i < count2; mpi_i++,l++,i++) 5239 trade[l] = dp_T0[loc_send[i]]; 5240 trade[l] = (double) anz; 5241 l++; 5242 } 5243 } 5244 MPI_Scatter(trade,count2+1, MPI_DOUBLE, rec_buf, count2 +1, MPI_DOUBLE, root, MPI_COMM_WORLD); 5245 if (myid == root) 5246 free(trade); 5247 for(mpi_i=0; mpi_i < count2; mpi_i++){ 5248 IF_KEEP_WRITE_TAYLOR(loc_recv[mpi_i],keep,k,p) 5249 dp_T0[loc_recv[mpi_i]] = rec_buf[mpi_i]; 5250 } 5251 anz = (int) rec_buf[count2]; 5252 free(rec_buf); 5253 l = 0; 5254 for(mpi_i=0; mpi_i < count2; mpi_i++){ 5255 i = 0; 5256 free(nonl_dom[mpi_i]); 5257 nonl_dom[mpi_i] = (locint*) calloc(2*anz+2,sizeof(locint)); 5258 nonl_dom[mpi_i][1] = 2*anz; 5259 5260 while ((rec_buf_loc[l+i] > -1 ) && ( i < anz) ) { 5261 nonl_dom[mpi_i][i+2]= rec_buf_loc[l+i]; 5262 i++; 5263 } 5264 nonl_dom[mpi_i][0]= i; 5265 l += anz; 5266 } 5267 free(rec_buf_loc); 5268 #endif // end _NONLIND_ 5269 #endif 5270 #if defined(_NTIGHT_) 5271 #if defined(_INT_FOR_) 5272 if (myid == root) 5273 trade_loc = (locint*) malloc(count*p*sizeof(locint)); 5274 else 5275 trade_loc =NULL; 5276 rec_buf_loc = (locint*) malloc(count2*p*sizeof(locint)); 5277 mpi_ii=0; 5278 for (mpi_i=0; mpi_i< count; mpi_i++) { 5279 for(l=0;l<p;l++,mpi_ii++) 5280 trade_loc[mpi_ii]=up_T[loc_send[mpi_i]][l]; 5281 } 5282 MPI_Scatter(trade_loc,count2*p, MPI_INT,rec_buf_loc, count2*p ,MPI_INT, root, MPI_COMM_WORLD); 5283 mpi_ii=0; 5284 for( mpi_i =0; mpi_i < count; mpi_i++){ 5285 for(l=0;l<p;l++,mpi_ii++) 5286 up_T[loc_recv[mpi_i]][l] = rec_buf_loc[mpi_ii]; 5287 } 5288 free(rec_buf_loc); 5289 if(myid==root) free(trade_loc); 5290 #endif /* END INT_FOR */ 5291 #if defined(_INDO_) 5292 // getting information about count of entries 5293 if(myid== root){ 5294 anz = ind_dom[loc_send[0]][0]; 5295 for(mpi_i =1; mpi_i < count; mpi_i++) 5296 if(anz < ind_dom[loc_send[mpi_i]][0]) 5297 anz = ind_dom[loc_send[mpi_i]][0]; 5298 } 5299 MPI_Bcast(&anz,1,MPI_INT, root, MPI_COMM_WORLD); 5300 l=0; 5301 if(myid == root){ 5302 trade_loc = (int*) calloc(count*anz,sizeof(int)); 5303 for(mpi_i =0; mpi_i < count; mpi_i++ ) 5304 for (i=2; i < ind_dom[loc_send[mpi_i]][0]+2; i++){ 5305 trade_loc[l] = ind_dom[loc_send[mpi_i]][i]; 5306 l++; 5307 } 5308 for(i=ind_dom[loc_send[mpi_i]][0]; i < anz ; i++ ){ 5309 trade_loc[l] = -10; 5310 l++; 5311 } 5312 } 5313 rec_buf_loc = ( int*) malloc(anz*count2*sizeof(int) ); 5314 MPI_Scatter(trade_loc,anz*count2, MPI_INT, rec_buf_loc, anz*count2, MPI_INT, root, MPI_COMM_WORLD); 5315 5316 if(myid == root) free( trade_loc ); 5317 l = 0; 5318 for(mpi_i=0; mpi_i < count2; mpi_i++){ 5319 i = 0; 5320 free(ind_dom[loc_recv[mpi_i]]); 5321 ind_dom[loc_recv[mpi_i]] = (locint*) calloc(2*anz+2,sizeof(locint)); 5322 ind_dom[loc_recv[mpi_i]][1] = 2*anz; 5323 5324 while ((rec_buf_loc[l+i] > -1 ) && ( i < anz) ) { 5325 ind_dom[loc_recv[mpi_i]][i+2]= rec_buf_loc[l+i]; 5326 i++; 5327 } 5328 ind_dom[loc_recv[mpi_i]][0]= i; 5329 l += anz; 4646 5330 } 4647 5331 … … 4694 5378 free(counts); 4695 5379 #endif // end _NONLIND_ 4696 if( myid == root) free(loc_send); 5380 #endif 5381 if (myid == root) 5382 free(loc_send); 4697 5383 free(loc_recv); 4698 5384 break; … … 4765 5451 if (mpi_initialized){ 4766 5452 indcheck = s_r_indep; 4767 if ( id == 0 ){5453 if (mpi_id == 0 ){ 4768 5454 for ( i=0;i<indcheck;i++) { 4769 5455 crs[i] = (unsigned int*) malloc( sizeof(unsigned int)*(nonl_dom[i][0]+1));
Note: See TracChangeset
for help on using the changeset viewer.