Removed Itanium prefetch statements.
This commit is contained in:
parent
94b8290593
commit
a0e1aa543f
@ -67,38 +67,15 @@ void Foam::lduMatrix::Amul
|
||||
register const label nCells = diag().size();
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&psiPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&diagPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&ApsiPtr[cell+96],1,1);
|
||||
#endif
|
||||
|
||||
ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
||||
}
|
||||
|
||||
|
||||
register const label nFaces = upper().size();
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma swp
|
||||
#endif
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+32],0,0);
|
||||
__builtin_prefetch (&lPtr[face+32],0,0);
|
||||
__builtin_prefetch (&lowerPtr[face+32],0,1);
|
||||
__builtin_prefetch (&psiPtr[lPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&ApsiPtr[uPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&upperPtr[face+32],0,1);
|
||||
__builtin_prefetch (&psiPtr[uPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&ApsiPtr[lPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
||||
}
|
||||
|
||||
@ -151,34 +128,13 @@ void Foam::lduMatrix::Tmul
|
||||
register const label nCells = diag().size();
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&psiPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&diagPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&TpsiPtr[cell+96],1,1);
|
||||
#endif
|
||||
|
||||
TpsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
||||
}
|
||||
|
||||
register const label nFaces = upper().size();
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+32],0,0);
|
||||
__builtin_prefetch (&lPtr[face+32],0,0);
|
||||
__builtin_prefetch (&upperPtr[face+32],0,1);
|
||||
__builtin_prefetch (&psiPtr[lPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&TpsiPtr[uPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
TpsiPtr[uPtr[face]] += upperPtr[face]*psiPtr[lPtr[face]];
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&lowerPtr[face+32],0,1);
|
||||
__builtin_prefetch (&psiPtr[uPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&TpsiPtr[lPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
TpsiPtr[lPtr[face]] += lowerPtr[face]*psiPtr[uPtr[face]];
|
||||
}
|
||||
|
||||
@ -218,34 +174,12 @@ void Foam::lduMatrix::sumA
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&diagPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&sumAPtr[cell+96],1,1);
|
||||
#endif
|
||||
|
||||
sumAPtr[cell] = diagPtr[cell];
|
||||
}
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma swp
|
||||
#endif
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+32],0,0);
|
||||
__builtin_prefetch (&lPtr[face+32],0,0);
|
||||
__builtin_prefetch (&lowerPtr[face+32],0,1);
|
||||
__builtin_prefetch (&sumAPtr[uPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
sumAPtr[uPtr[face]] += lowerPtr[face];
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&upperPtr[face+32],0,1);
|
||||
__builtin_prefetch (&sumAPtr[lPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
sumAPtr[lPtr[face]] += upperPtr[face];
|
||||
}
|
||||
|
||||
@ -323,39 +257,15 @@ void Foam::lduMatrix::residual
|
||||
register const label nCells = diag().size();
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&psiPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&diagPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&sourcePtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rAPtr[cell+96],1,1);
|
||||
#endif
|
||||
|
||||
rAPtr[cell] = sourcePtr[cell] - diagPtr[cell]*psiPtr[cell];
|
||||
}
|
||||
|
||||
|
||||
register const label nFaces = upper().size();
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma swp
|
||||
#endif
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+32],0,0);
|
||||
__builtin_prefetch (&lPtr[face+32],0,0);
|
||||
__builtin_prefetch (&lowerPtr[face+32],0,1);
|
||||
__builtin_prefetch (&psiPtr[lPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&rAPtr[uPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
rAPtr[uPtr[face]] -= lowerPtr[face]*psiPtr[lPtr[face]];
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&upperPtr[face+32],0,1);
|
||||
__builtin_prefetch (&psiPtr[uPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&rAPtr[lPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
rAPtr[lPtr[face]] -= upperPtr[face]*psiPtr[uPtr[face]];
|
||||
}
|
||||
|
||||
|
@ -353,20 +353,7 @@ Foam::tmp<Foam::scalarField > Foam::lduMatrix::H1() const
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+32],0,0);
|
||||
__builtin_prefetch (&lPtr[face+32],0,0);
|
||||
__builtin_prefetch (&lowerPtr[face+32],0,1);
|
||||
__builtin_prefetch (&H1Ptr[uPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
H1Ptr[uPtr[face]] -= lowerPtr[face];
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&upperPtr[face+32],0,1);
|
||||
__builtin_prefetch (&H1Ptr[lPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
H1Ptr[lPtr[face]] -= upperPtr[face];
|
||||
}
|
||||
}
|
||||
|
@ -71,29 +71,15 @@ void Foam::DICPreconditioner::calcReciprocalD
|
||||
register const label nFaces = matrix.upper().size();
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+96],0,0);
|
||||
__builtin_prefetch (&lPtr[face+96],0,0);
|
||||
__builtin_prefetch (&upperPtr[face+96],0,1);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face+24]],0,1);
|
||||
__builtin_prefetch (&rDPtr[uPtr[face+24]],1,1);
|
||||
#endif
|
||||
|
||||
rDPtr[uPtr[face]] -= upperPtr[face]*upperPtr[face]/rDPtr[lPtr[face]];
|
||||
}
|
||||
|
||||
|
||||
// Calculate the reciprocal of the preconditioned diagonal
|
||||
register const label nCells = rD.size();
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&rDPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
rDPtr[cell] = 1.0/rDPtr[cell];
|
||||
}
|
||||
}
|
||||
@ -120,61 +106,18 @@ void Foam::DICPreconditioner::precondition
|
||||
register label nFaces = solver_.matrix().upper().size();
|
||||
register label nFacesM1 = nFaces - 1;
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rDPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rAPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
|
||||
}
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma noprefetch uPtr,lPtr,upperPtr,rDPtr,wAPtr
|
||||
#pragma nounroll
|
||||
#endif
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+96],0,0);
|
||||
__builtin_prefetch (&lPtr[face+96],0,0);
|
||||
__builtin_prefetch (&upperPtr[face+96],0,0);
|
||||
__builtin_prefetch (&rDPtr[uPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[uPtr[face]] -= rDPtr[uPtr[face]]*upperPtr[face]*wAPtr[lPtr[face]];
|
||||
}
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma noprefetch uPtr,lPtr,rDPtr,wAPtr
|
||||
#pragma nounroll
|
||||
#endif
|
||||
|
||||
for (register label face=nFacesM1; face>=0; face--)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face-95],0,0);
|
||||
__builtin_prefetch (&lPtr[face-95],0,0);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face-16]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face-16]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face-16]],0,1);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face-24]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face-24]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face-24]],0,1);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face-32]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face-32]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face-32]],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[lPtr[face]] -= rDPtr[lPtr[face]]*upperPtr[face]*wAPtr[uPtr[face]];
|
||||
}
|
||||
}
|
||||
|
@ -72,30 +72,15 @@ void Foam::DILUPreconditioner::calcReciprocalD
|
||||
register label nFaces = matrix.upper().size();
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+96],0,0);
|
||||
__builtin_prefetch (&lPtr[face+96],0,0);
|
||||
__builtin_prefetch (&upperPtr[face+96],0,1);
|
||||
__builtin_prefetch (&lowerPtr[face+96],0,1);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face+24]],0,1);
|
||||
__builtin_prefetch (&rDPtr[uPtr[face+24]],1,1);
|
||||
#endif
|
||||
|
||||
rDPtr[uPtr[face]] -= upperPtr[face]*lowerPtr[face]/rDPtr[lPtr[face]];
|
||||
}
|
||||
|
||||
|
||||
// Calculate the reciprocal of the preconditioned diagonal
|
||||
register label nCells = rD.size();
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&rDPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
rDPtr[cell] = 1.0/rDPtr[cell];
|
||||
}
|
||||
}
|
||||
@ -128,26 +113,14 @@ void Foam::DILUPreconditioner::precondition
|
||||
register label nFaces = solver_.matrix().upper().size();
|
||||
register label nFacesM1 = nFaces - 1;
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rDPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rAPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
|
||||
}
|
||||
|
||||
|
||||
register label sface;
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma nounroll
|
||||
#endif
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
sface = losortPtr[face];
|
||||
@ -155,28 +128,8 @@ void Foam::DILUPreconditioner::precondition
|
||||
rDPtr[uPtr[sface]]*lowerPtr[sface]*wAPtr[lPtr[sface]];
|
||||
}
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma noprefetch uPtr,lPtr,rDPtr,wAPtr
|
||||
#pragma nounroll
|
||||
#endif
|
||||
|
||||
for (register label face=nFacesM1; face>=0; face--)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face-95],0,0);
|
||||
__builtin_prefetch (&lPtr[face-95],0,0);
|
||||
__builtin_prefetch (&upperPtr[face-95],0,1);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face-16]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face-16]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face-16]],0,1);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face-24]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face-24]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face-24]],0,1);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face-32]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face-32]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face-32]],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[lPtr[face]] -=
|
||||
rDPtr[lPtr[face]]*upperPtr[face]*wAPtr[uPtr[face]];
|
||||
}
|
||||
@ -210,46 +163,20 @@ void Foam::DILUPreconditioner::preconditionT
|
||||
register label nFaces = solver_.matrix().upper().size();
|
||||
register label nFacesM1 = nFaces - 1;
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&wTPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rDPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rTPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
wTPtr[cell] = rDPtr[cell]*rTPtr[cell];
|
||||
}
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma noprefetch uPtr,lPtr,upperPtr,rDPtr,wTPtr
|
||||
#pragma nounroll
|
||||
#endif
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+96],0,0);
|
||||
__builtin_prefetch (&lPtr[face+96],0,0);
|
||||
__builtin_prefetch (&upperPtr[face+96],0,1);
|
||||
__builtin_prefetch (&rDPtr[uPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&wTPtr[lPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&wTPtr[uPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
wTPtr[uPtr[face]] -=
|
||||
rDPtr[uPtr[face]]*upperPtr[face]*wTPtr[lPtr[face]];
|
||||
}
|
||||
|
||||
|
||||
register label sface;
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma nounroll
|
||||
#endif
|
||||
|
||||
for (register label face=nFacesM1; face>=0; face--)
|
||||
{
|
||||
sface = losortPtr[face];
|
||||
|
@ -66,47 +66,17 @@ Foam::FDICPreconditioner::FDICPreconditioner
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+96],0,0);
|
||||
__builtin_prefetch (&lPtr[face+96],0,0);
|
||||
__builtin_prefetch (&upperPtr[face+96],0,1);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face+24]],0,1);
|
||||
__builtin_prefetch (&rDPtr[uPtr[face+24]],1,1);
|
||||
#endif
|
||||
|
||||
rDPtr[uPtr[face]] -= sqr(upperPtr[face])/rDPtr[lPtr[face]];
|
||||
}
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
// Generate reciprocal FDIC
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&rDPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
rDPtr[cell] = 1.0/rDPtr[cell];
|
||||
}
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+96],0,0);
|
||||
__builtin_prefetch (&lPtr[face+96],0,0);
|
||||
__builtin_prefetch (&upperPtr[face+96],0,0);
|
||||
__builtin_prefetch (&rDuUpperPtr[face+96],0,0);
|
||||
__builtin_prefetch (&rDlUpperPtr[face+96],0,0);
|
||||
__builtin_prefetch (&rDPtr[uPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&rDPtr[lPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
rDuUpperPtr[face] = rDPtr[uPtr[face]]*upperPtr[face];
|
||||
rDlUpperPtr[face] = rDPtr[lPtr[face]]*upperPtr[face];
|
||||
}
|
||||
@ -138,58 +108,18 @@ void Foam::FDICPreconditioner::precondition
|
||||
register label nFaces = solver_.matrix().upper().size();
|
||||
register label nFacesM1 = nFaces - 1;
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rDPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rAPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
|
||||
}
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma noprefetch uPtr,lPtr,rDuUpperPtr,wAPtr
|
||||
#pragma nounroll
|
||||
#endif
|
||||
|
||||
for (register label face=0; face<nFaces; face++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face+96],0,0);
|
||||
__builtin_prefetch (&lPtr[face+96],0,0);
|
||||
__builtin_prefetch (&rDuUpperPtr[face+96],0,0);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face+32]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face+32]],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[uPtr[face]] -= rDuUpperPtr[face]*wAPtr[lPtr[face]];
|
||||
}
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma noprefetch uPtr,lPtr,rDlUpperPtr,wAPtr
|
||||
#pragma nounroll
|
||||
#endif
|
||||
|
||||
for (register label face=nFacesM1; face>=0; face--)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&uPtr[face-95],0,0);
|
||||
__builtin_prefetch (&lPtr[face-95],0,0);
|
||||
__builtin_prefetch (&rDlUpperPtr[face-95],0,0);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face-16]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face-16]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face-24]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face-24]],0,1);
|
||||
__builtin_prefetch (&wAPtr[lPtr[face-32]],0,1);
|
||||
__builtin_prefetch (&wAPtr[uPtr[face-32]],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[lPtr[face]] -= rDlUpperPtr[face]*wAPtr[uPtr[face]];
|
||||
}
|
||||
}
|
||||
|
@ -58,18 +58,9 @@ Foam::diagonalPreconditioner::diagonalPreconditioner
|
||||
|
||||
register label nCells = rD.size();
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
// Generate reciprocal diagonal
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&rDPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&DPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
rDPtr[cell] = 1.0/DPtr[cell];
|
||||
}
|
||||
}
|
||||
@ -90,18 +81,8 @@ void Foam::diagonalPreconditioner::precondition
|
||||
|
||||
register label nCells = wA.size();
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rDPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rAPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[cell] = rDPtr[cell]*rAPtr[cell];
|
||||
}
|
||||
}
|
||||
|
@ -68,17 +68,8 @@ void Foam::noPreconditioner::precondition
|
||||
|
||||
register label nCells = wA.size();
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rAPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
wAPtr[cell] = rAPtr[cell];
|
||||
}
|
||||
}
|
||||
|
@ -146,19 +146,6 @@ void Foam::GaussSeidelSmoother::smooth
|
||||
|
||||
for (register label cellI=0; cellI<nCells; cellI++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&psiPtr[cellI+64],0,1);
|
||||
__builtin_prefetch (&bPrimePtr[cellI+64],0,1);
|
||||
__builtin_prefetch (&ownStartPtr[cellI+64],0,1);
|
||||
__builtin_prefetch (&diagPtr[cellI+64],0,1);
|
||||
__builtin_prefetch (&uPtr[ownStartPtr[cellI+24]],0,1);
|
||||
__builtin_prefetch (&uPtr[ownStartPtr[cellI+25]],0,1);
|
||||
__builtin_prefetch (&uPtr[ownStartPtr[cellI+26]],0,1);
|
||||
__builtin_prefetch (&uPtr[ownStartPtr[cellI+27]],0,1);
|
||||
__builtin_prefetch (&upperPtr[ownStartPtr[cellI+24]],0,1);
|
||||
__builtin_prefetch (&lowerPtr[ownStartPtr[cellI+24]],0,1);
|
||||
#endif
|
||||
|
||||
// Start and end of this row
|
||||
fStart = fEnd;
|
||||
fEnd = ownStartPtr[cellI + 1];
|
||||
|
@ -144,19 +144,8 @@ Foam::lduMatrix::solverPerformance Foam::PBiCG::solve
|
||||
|
||||
if (solverPerf.nIterations() == 0)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&pAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&pTPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&wTPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
pAPtr[cell] = wAPtr[cell];
|
||||
pTPtr[cell] = wTPtr[cell];
|
||||
}
|
||||
@ -165,19 +154,8 @@ Foam::lduMatrix::solverPerformance Foam::PBiCG::solve
|
||||
{
|
||||
scalar beta = wArT/wArTold;
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&pAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&pTPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&wTPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
pAPtr[cell] = wAPtr[cell] + beta*pAPtr[cell];
|
||||
pTPtr[cell] = wTPtr[cell] + beta*pTPtr[cell];
|
||||
}
|
||||
@ -199,21 +177,8 @@ Foam::lduMatrix::solverPerformance Foam::PBiCG::solve
|
||||
|
||||
scalar alpha = wArT/wApT;
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&pAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&wTPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&psiPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rTPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
psiPtr[cell] += alpha*pAPtr[cell];
|
||||
rAPtr[cell] -= alpha*wAPtr[cell];
|
||||
rTPtr[cell] -= alpha*wTPtr[cell];
|
||||
|
@ -134,17 +134,8 @@ Foam::lduMatrix::solverPerformance Foam::PCG::solve
|
||||
|
||||
if (solverPerf.nIterations() == 0)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&pAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
pAPtr[cell] = wAPtr[cell];
|
||||
}
|
||||
}
|
||||
@ -152,17 +143,8 @@ Foam::lduMatrix::solverPerformance Foam::PCG::solve
|
||||
{
|
||||
scalar beta = wArA/wArAold;
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&pAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
pAPtr[cell] = wAPtr[cell] + beta*pAPtr[cell];
|
||||
}
|
||||
}
|
||||
@ -182,19 +164,8 @@ Foam::lduMatrix::solverPerformance Foam::PCG::solve
|
||||
|
||||
scalar alpha = wArA/wApA;
|
||||
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
#pragma ivdep
|
||||
#endif
|
||||
|
||||
for (register label cell=0; cell<nCells; cell++)
|
||||
{
|
||||
#ifdef ICC_IA64_PREFETCH
|
||||
__builtin_prefetch (&pAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&wAPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&psiPtr[cell+96],0,1);
|
||||
__builtin_prefetch (&rAPtr[cell+96],0,1);
|
||||
#endif
|
||||
|
||||
psiPtr[cell] += alpha*pAPtr[cell];
|
||||
rAPtr[cell] -= alpha*wAPtr[cell];
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user