Removed Itanium prefetch statements.

This commit is contained in:
henry 2009-06-23 22:58:26 +01:00
parent 94b8290593
commit a0e1aa543f
10 changed files with 6 additions and 414 deletions

View File

@ -67,38 +67,15 @@ void Foam::lduMatrix::Amul
register const label nCells = diag().size();
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&psiPtr[cell+96],0,1);
__builtin_prefetch (&diagPtr[cell+96],0,1);
__builtin_prefetch (&ApsiPtr[cell+96],1,1);
#endif
ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
}
register const label nFaces = upper().size();
#ifdef ICC_IA64_PREFETCH
#pragma swp
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[lPtr[face+32]],0,1);
__builtin_prefetch (&ApsiPtr[uPtr[face+32]],0,1);
#endif
ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&ApsiPtr[lPtr[face+32]],0,1);
#endif
ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
}
@ -151,34 +128,13 @@ void Foam::lduMatrix::Tmul
register const label nCells = diag().size();
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&psiPtr[cell+96],0,1);
__builtin_prefetch (&diagPtr[cell+96],0,1);
__builtin_prefetch (&TpsiPtr[cell+96],1,1);
#endif
TpsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
}
register const label nFaces = upper().size();
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[lPtr[face+32]],0,1);
__builtin_prefetch (&TpsiPtr[uPtr[face+32]],0,1);
#endif
TpsiPtr[uPtr[face]] += upperPtr[face]*psiPtr[lPtr[face]];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&TpsiPtr[lPtr[face+32]],0,1);
#endif
TpsiPtr[lPtr[face]] += lowerPtr[face]*psiPtr[uPtr[face]];
}
@ -218,34 +174,12 @@ void Foam::lduMatrix::sumA
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&diagPtr[cell+96],0,1);
__builtin_prefetch (&sumAPtr[cell+96],1,1);
#endif
sumAPtr[cell] = diagPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma swp
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&sumAPtr[uPtr[face+32]],0,1);
#endif
sumAPtr[uPtr[face]] += lowerPtr[face];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&sumAPtr[lPtr[face+32]],0,1);
#endif
sumAPtr[lPtr[face]] += upperPtr[face];
}
@ -323,39 +257,15 @@ void Foam::lduMatrix::residual
register const label nCells = diag().size();
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&psiPtr[cell+96],0,1);
__builtin_prefetch (&diagPtr[cell+96],0,1);
__builtin_prefetch (&sourcePtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],1,1);
#endif
rAPtr[cell] = sourcePtr[cell] - diagPtr[cell]*psiPtr[cell];
}
register const label nFaces = upper().size();
#ifdef ICC_IA64_PREFETCH
#pragma swp
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[lPtr[face+32]],0,1);
__builtin_prefetch (&rAPtr[uPtr[face+32]],0,1);
#endif
rAPtr[uPtr[face]] -= lowerPtr[face]*psiPtr[lPtr[face]];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&psiPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&rAPtr[lPtr[face+32]],0,1);
#endif
rAPtr[lPtr[face]] -= upperPtr[face]*psiPtr[uPtr[face]];
}

View File

@ -353,20 +353,7 @@ Foam::tmp<Foam::scalarField > Foam::lduMatrix::H1() const
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+32],0,0);
__builtin_prefetch (&lPtr[face+32],0,0);
__builtin_prefetch (&lowerPtr[face+32],0,1);
__builtin_prefetch (&H1Ptr[uPtr[face+32]],0,1);
#endif
H1Ptr[uPtr[face]] -= lowerPtr[face];
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&upperPtr[face+32],0,1);
__builtin_prefetch (&H1Ptr[lPtr[face+32]],0,1);
#endif
H1Ptr[lPtr[face]] -= upperPtr[face];
}
}

View File

@ -71,29 +71,15 @@ void Foam::DICPreconditioner::calcReciprocalD
register const label nFaces = matrix.upper().size();
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,1);
__builtin_prefetch (&rDPtr[lPtr[face+24]],0,1);
__builtin_prefetch (&rDPtr[uPtr[face+24]],1,1);
#endif
rDPtr[uPtr[face]] -= upperPtr[face]*upperPtr[face]/rDPtr[lPtr[face]];
}
// Calculate the reciprocal of the preconditioned diagonal
register const label nCells = rD.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&rDPtr[cell+96],0,1);
#endif
rDPtr[cell] = 1.0/rDPtr[cell];
}
}
@ -120,61 +106,18 @@ void Foam::DICPreconditioner::precondition
register label nFaces = solver_.matrix().upper().size();
register label nFacesM1 = nFaces - 1;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,upperPtr,rDPtr,wAPtr
#pragma nounroll
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,0);
__builtin_prefetch (&rDPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face+32]],0,1);
#endif
wAPtr[uPtr[face]] -= rDPtr[uPtr[face]]*upperPtr[face]*wAPtr[lPtr[face]];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,rDPtr,wAPtr
#pragma nounroll
#endif
for (register label face=nFacesM1; face>=0; face--)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face-95],0,0);
__builtin_prefetch (&lPtr[face-95],0,0);
__builtin_prefetch (&rDPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-16]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-24]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-32]],0,1);
#endif
wAPtr[lPtr[face]] -= rDPtr[lPtr[face]]*upperPtr[face]*wAPtr[uPtr[face]];
}
}

View File

@ -72,30 +72,15 @@ void Foam::DILUPreconditioner::calcReciprocalD
register label nFaces = matrix.upper().size();
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,1);
__builtin_prefetch (&lowerPtr[face+96],0,1);
__builtin_prefetch (&rDPtr[lPtr[face+24]],0,1);
__builtin_prefetch (&rDPtr[uPtr[face+24]],1,1);
#endif
rDPtr[uPtr[face]] -= upperPtr[face]*lowerPtr[face]/rDPtr[lPtr[face]];
}
// Calculate the reciprocal of the preconditioned diagonal
register label nCells = rD.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&rDPtr[cell+96],0,1);
#endif
rDPtr[cell] = 1.0/rDPtr[cell];
}
}
@ -128,26 +113,14 @@ void Foam::DILUPreconditioner::precondition
register label nFaces = solver_.matrix().upper().size();
register label nFacesM1 = nFaces - 1;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
}
register label sface;
#ifdef ICC_IA64_PREFETCH
#pragma nounroll
#endif
for (register label face=0; face<nFaces; face++)
{
sface = losortPtr[face];
@ -155,28 +128,8 @@ void Foam::DILUPreconditioner::precondition
rDPtr[uPtr[sface]]*lowerPtr[sface]*wAPtr[lPtr[sface]];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,rDPtr,wAPtr
#pragma nounroll
#endif
for (register label face=nFacesM1; face>=0; face--)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face-95],0,0);
__builtin_prefetch (&lPtr[face-95],0,0);
__builtin_prefetch (&upperPtr[face-95],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-16]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-24]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-32]],0,1);
#endif
wAPtr[lPtr[face]] -=
rDPtr[lPtr[face]]*upperPtr[face]*wAPtr[uPtr[face]];
}
@ -210,46 +163,20 @@ void Foam::DILUPreconditioner::preconditionT
register label nFaces = solver_.matrix().upper().size();
register label nFacesM1 = nFaces - 1;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wTPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rTPtr[cell+96],0,1);
#endif
wTPtr[cell] = rDPtr[cell]*rTPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,upperPtr,rDPtr,wTPtr
#pragma nounroll
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,1);
__builtin_prefetch (&rDPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&wTPtr[lPtr[face+32]],0,1);
__builtin_prefetch (&wTPtr[uPtr[face+32]],0,1);
#endif
wTPtr[uPtr[face]] -=
rDPtr[uPtr[face]]*upperPtr[face]*wTPtr[lPtr[face]];
}
register label sface;
#ifdef ICC_IA64_PREFETCH
#pragma nounroll
#endif
for (register label face=nFacesM1; face>=0; face--)
{
sface = losortPtr[face];

View File

@ -66,47 +66,17 @@ Foam::FDICPreconditioner::FDICPreconditioner
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,1);
__builtin_prefetch (&rDPtr[lPtr[face+24]],0,1);
__builtin_prefetch (&rDPtr[uPtr[face+24]],1,1);
#endif
rDPtr[uPtr[face]] -= sqr(upperPtr[face])/rDPtr[lPtr[face]];
}
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
// Generate reciprocal FDIC
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&rDPtr[cell+96],0,1);
#endif
rDPtr[cell] = 1.0/rDPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&upperPtr[face+96],0,0);
__builtin_prefetch (&rDuUpperPtr[face+96],0,0);
__builtin_prefetch (&rDlUpperPtr[face+96],0,0);
__builtin_prefetch (&rDPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&rDPtr[lPtr[face+32]],0,1);
#endif
rDuUpperPtr[face] = rDPtr[uPtr[face]]*upperPtr[face];
rDlUpperPtr[face] = rDPtr[lPtr[face]]*upperPtr[face];
}
@ -138,58 +108,18 @@ void Foam::FDICPreconditioner::precondition
register label nFaces = solver_.matrix().upper().size();
register label nFacesM1 = nFaces - 1;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,rDuUpperPtr,wAPtr
#pragma nounroll
#endif
for (register label face=0; face<nFaces; face++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face+96],0,0);
__builtin_prefetch (&lPtr[face+96],0,0);
__builtin_prefetch (&rDuUpperPtr[face+96],0,0);
__builtin_prefetch (&wAPtr[uPtr[face+32]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face+32]],0,1);
#endif
wAPtr[uPtr[face]] -= rDuUpperPtr[face]*wAPtr[lPtr[face]];
}
#ifdef ICC_IA64_PREFETCH
#pragma noprefetch uPtr,lPtr,rDlUpperPtr,wAPtr
#pragma nounroll
#endif
for (register label face=nFacesM1; face>=0; face--)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&uPtr[face-95],0,0);
__builtin_prefetch (&lPtr[face-95],0,0);
__builtin_prefetch (&rDlUpperPtr[face-95],0,0);
__builtin_prefetch (&wAPtr[lPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-16]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-24]],0,1);
__builtin_prefetch (&wAPtr[lPtr[face-32]],0,1);
__builtin_prefetch (&wAPtr[uPtr[face-32]],0,1);
#endif
wAPtr[lPtr[face]] -= rDlUpperPtr[face]*wAPtr[uPtr[face]];
}
}

View File

@ -58,18 +58,9 @@ Foam::diagonalPreconditioner::diagonalPreconditioner
register label nCells = rD.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
// Generate reciprocal diagonal
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&DPtr[cell+96],0,1);
#endif
rDPtr[cell] = 1.0/DPtr[cell];
}
}
@ -90,18 +81,8 @@ void Foam::diagonalPreconditioner::precondition
register label nCells = wA.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rDPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
}
}

View File

@ -68,17 +68,8 @@ void Foam::noPreconditioner::precondition
register label nCells = wA.size();
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
wAPtr[cell] = rAPtr[cell];
}
}

View File

@ -146,19 +146,6 @@ void Foam::GaussSeidelSmoother::smooth
for (register label cellI=0; cellI<nCells; cellI++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&psiPtr[cellI+64],0,1);
__builtin_prefetch (&bPrimePtr[cellI+64],0,1);
__builtin_prefetch (&ownStartPtr[cellI+64],0,1);
__builtin_prefetch (&diagPtr[cellI+64],0,1);
__builtin_prefetch (&uPtr[ownStartPtr[cellI+24]],0,1);
__builtin_prefetch (&uPtr[ownStartPtr[cellI+25]],0,1);
__builtin_prefetch (&uPtr[ownStartPtr[cellI+26]],0,1);
__builtin_prefetch (&uPtr[ownStartPtr[cellI+27]],0,1);
__builtin_prefetch (&upperPtr[ownStartPtr[cellI+24]],0,1);
__builtin_prefetch (&lowerPtr[ownStartPtr[cellI+24]],0,1);
#endif
// Start and end of this row
fStart = fEnd;
fEnd = ownStartPtr[cellI + 1];

View File

@ -144,19 +144,8 @@ Foam::lduMatrix::solverPerformance Foam::PBiCG::solve
if (solverPerf.nIterations() == 0)
{
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&pAPtr[cell+96],0,1);
__builtin_prefetch (&pTPtr[cell+96],0,1);
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&wTPtr[cell+96],0,1);
#endif
pAPtr[cell] = wAPtr[cell];
pTPtr[cell] = wTPtr[cell];
}
@ -165,19 +154,8 @@ Foam::lduMatrix::solverPerformance Foam::PBiCG::solve
{
scalar beta = wArT/wArTold;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&pAPtr[cell+96],0,1);
__builtin_prefetch (&pTPtr[cell+96],0,1);
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&wTPtr[cell+96],0,1);
#endif
pAPtr[cell] = wAPtr[cell] + beta*pAPtr[cell];
pTPtr[cell] = wTPtr[cell] + beta*pTPtr[cell];
}
@ -199,21 +177,8 @@ Foam::lduMatrix::solverPerformance Foam::PBiCG::solve
scalar alpha = wArT/wApT;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&pAPtr[cell+96],0,1);
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&wTPtr[cell+96],0,1);
__builtin_prefetch (&psiPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
__builtin_prefetch (&rTPtr[cell+96],0,1);
#endif
psiPtr[cell] += alpha*pAPtr[cell];
rAPtr[cell] -= alpha*wAPtr[cell];
rTPtr[cell] -= alpha*wTPtr[cell];

View File

@ -134,17 +134,8 @@ Foam::lduMatrix::solverPerformance Foam::PCG::solve
if (solverPerf.nIterations() == 0)
{
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&pAPtr[cell+96],0,1);
__builtin_prefetch (&wAPtr[cell+96],0,1);
#endif
pAPtr[cell] = wAPtr[cell];
}
}
@ -152,17 +143,8 @@ Foam::lduMatrix::solverPerformance Foam::PCG::solve
{
scalar beta = wArA/wArAold;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&pAPtr[cell+96],0,1);
__builtin_prefetch (&wAPtr[cell+96],0,1);
#endif
pAPtr[cell] = wAPtr[cell] + beta*pAPtr[cell];
}
}
@ -182,19 +164,8 @@ Foam::lduMatrix::solverPerformance Foam::PCG::solve
scalar alpha = wArA/wApA;
#ifdef ICC_IA64_PREFETCH
#pragma ivdep
#endif
for (register label cell=0; cell<nCells; cell++)
{
#ifdef ICC_IA64_PREFETCH
__builtin_prefetch (&pAPtr[cell+96],0,1);
__builtin_prefetch (&wAPtr[cell+96],0,1);
__builtin_prefetch (&psiPtr[cell+96],0,1);
__builtin_prefetch (&rAPtr[cell+96],0,1);
#endif
psiPtr[cell] += alpha*pAPtr[cell];
rAPtr[cell] -= alpha*wAPtr[cell];
}