// Reenterable reent.cpp reent1.cpp // g++ -o reent reent1.cpp -fopenmp -fgnu-tm -fpermissive -std=c++0x -lm -lpthread -lOpenCL // Для теста на многоядерной системе, дающего реальное ускорение, необходимо // раскомментировать следующую строчку // #define __THOROW__ // #define __MPI__ // Post-Build + Custom Build + Dummy file in project // Rebuild recommended. #include using namespace std; #include #include #include #include #include #include #include #ifdef _OPENMP #include #endif #include #ifdef __INTEL_COMPILER #define max(a,b) ((a)>(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b)) #endif int _time(time_t * Arg) { return time(Arg); } #ifdef __MPI__ #include "mpi.h" double time() { return MPI_Wtime(); } #else double time() { #ifdef _MSC_VER struct _timeb timebuf; _ftime(&timebuf); #else struct timeb timebuf; ftime(&timebuf); #endif return (double) timebuf.time + (double) timebuf.millitm/1000; } #endif using namespace std; struct DataItem { protected: double Data[2000]; public: double length() { double Result = 0.0; for (int i=0; i (DataItem &V2) { return length()>V2.length(); } DataItem& operator = (const DataItem V2) { memmove(Data,V2.Data,sizeof(Data)); return *this; } double operator () () { return length(); } void setrand() { for (int i=0; i Childs; char Work; } _TreeNode; typedef struct NetTag { int Number; list Outs; } NetNode; #ifdef __THOROW__ #define ARR_SIZE (1<<13) #define _ARR_SIZE (ARR_SIZE/16) #else #define ARR_SIZE 32 #define _ARR_SIZE ARR_SIZE #endif TreeNode * Root; _TreeNode * _Root; DataItem A[ARR_SIZE]; reenterable Out(TreeNode * Cur, int Level); /* Прототип */ int CurLevel = 1; reenterable Out(TreeNode * Cur, int Level) { if (Level!=CurLevel) { CurLevel = Level; cout<<"\n"; } cout<Data()<<" "; if (Cur->Left) plan_last(Cur->Left,Level+1); /* Планируем вызов (не вызываем) эту процедуру */ if (Cur->Right) plan_last(Cur->Right,Level+1); /* То же самое */ } reenterable static global ParallelOut(TreeNode * Root, TreeNode * R, bool par) { if (Root) { if (par) { cout<Data()<<" "; } else { if (R->Left) ParallelOut(Root, R->Left, false); if (R->Right) ParallelOut(Root, R->Right, false); plan_last(Root, R, true); if (R == Root) plan_group_parallelize; else plan_stop; } } } void _Out(TreeNode * Cur) { list Queue = list(); int LevelNodes = 1; int NextLevelNodes = 0; Queue.push_back(Cur); while (!Queue.empty()) { Cur = Queue.front(); Queue.pop_front(); cout<Data()<<" "; if (Cur->Left) { Queue.push_back(Cur->Left); NextLevelNodes++; } if (Cur->Right) { Queue.push_back(Cur->Right); NextLevelNodes++; } if (--LevelNodes==0) { cout<<"\n"; LevelNodes = NextLevelNodes; NextLevelNodes = 0; } } } // chain[ARR_SIZE] FindMinMaxTree(id(NULL) TreeNode * Cur, char FindMin) if((!id->Left || id->Left->Work)&&(!id->Right || id->Right->Work) ) set(id->Work=1) reset(id->Work=0) depends(id->Left,id->Right); chain[ARR_SIZE] FindMinMaxTree(id(NULL) struct TreeTag * Cur, char FindMin); chain[ARR_SIZE] GenMinMaxTree(TreeNode * Cur, char FindMin) { if (Cur==Root) plan_group_parallelize; throw_first(Cur,FindMin); if (Cur->Left) plan_last(Cur->Left,!FindMin); if (Cur->Right) plan_last(Cur->Right,!FindMin); } chain[ARR_SIZE] FindMinMaxTree(id(NULL) struct TreeTag * Cur, char FindMin) { if (Cur==NULL) plan_group_parallelize; else if (Cur->Left && Cur->Right) if (FindMin) Cur->Data = min(Cur->Left->Data,Cur->Right->Data); else Cur->Data = max(Cur->Left->Data,Cur->Right->Data); else if (Cur->Left) Cur->Data = Cur->Left->Data; else if (Cur->Right) Cur->Data = Cur->Right->Data; } chain[ARR_SIZE] _GenMinMaxTree(_TreeNode * Cur, char FindMin) { if (Cur==_Root) plan_group_parallelize; throw_first(Cur,FindMin); for (int i=0; iChilds.size(); i++) if (Cur->Childs[i]) plan_last(Cur->Childs[i],!FindMin); } chain[ARR_SIZE] _FindMinMaxTree(id(NULL) _TreeNode * Cur, char FindMin) if( [for (int i=0; iChilds.size(); i++)] (!id->Childs[i] || id->Childs[i]->Work) ) set(id->Work=1) reset(id->Work=0) depends( [for (int i=0; iChilds.size(); i++)] id->Childs[i]) { if (Cur==NULL) plan_group_parallelize; else if (Cur->Childs.size()>0) { Cur->Data = Cur->Childs[0]->Data; for (int i=1; iChilds.size(); i++) if (FindMin) { if (Cur->Data>Cur->Childs[i]->Data) Cur->Data = Cur->Childs[i]->Data; } else if (Cur->DataChilds[i]->Data) Cur->Data = Cur->Childs[i]->Data; } } const int nHisto = 10; int Histo[nHisto] = {0}; reenterable FindHisto(TreeNode * Cur, double Min, double Max, bool calc) { if (!calc) { if (Cur == Root) plan_group_first; if (Cur->Left) { plan_first(Cur->Left, Min, Max, false); } if (Cur->Right) { plan_first(Cur->Right, Min, Max, false); } plan_last(Cur, Min, Max, true); } else { plan_group_atomize; int n = nHisto*(Cur->Data()-Min)/(Max-Min); if (n == nHisto) n--; Histo[n]++; } } chain FindHistoStage0(TreeNode * Cur, double Min, double Max) throw(TreeNode * Cur, double Min, double Max) { if (Cur == Root) plan_group_parallelize; throw_last(Cur, Min, Max); if (Cur->Left) { plan_last(Cur->Left, Min, Max); } if (Cur->Right) { plan_last(Cur->Right, Min, Max); } } chain FindHistoStage1(TreeNode * Cur, double Min, double Max) { if (Cur == Root) plan_group_atomize; int n = nHisto*(Cur->Data()-Min)/(Max-Min); if (n == nHisto) n--; Histo[n]++; } #if _OPENMP>=200805 void _omp3_FindMinMaxTree(TreeNode * Cur, char FindMin) { if (Cur->Left) #pragma omp task untied _omp3_FindMinMaxTree(Cur->Left,!FindMin); if (Cur->Right) #pragma omp task untied _omp3_FindMinMaxTree(Cur->Right,!FindMin); #pragma omp taskwait if (Cur->Left && Cur->Right) if (FindMin) Cur->Data = min(Cur->Left->Data,Cur->Right->Data); else Cur->Data = max(Cur->Left->Data,Cur->Right->Data); else if (Cur->Left) Cur->Data = Cur->Left->Data; else if (Cur->Right) Cur->Data = Cur->Right->Data; } void omp3_FindMinMaxTree(TreeNode * Root, char FindMin) { #pragma omp parallel { #pragma omp single { _omp3_FindMinMaxTree(Root,FindMin); } } } #endif reenterable EnumerateByFamilies(TreeNode * Cur, char Level, int &Number) { Cur->Data.fill(sqrt((Number++)/2000.0)); if (Level) { if (Cur->Left) plan_last(Cur->Left,0,Number); if (Cur->Right) plan_last(Cur->Right,0,Number); } else { if (Cur->Right) plan_first(Cur->Right,1,Number); if (Cur->Left) plan_first(Cur->Left,1,Number); } } chain RevEnumerateStage0(TreeNode * Cur) throw(TreeNode * Cur, int &Number) { if (Cur->Right) plan_last(Cur->Right); if (Cur->Left) plan_last(Cur->Left); throw_first(Cur,1); // можно и plan+признак(без перепланирования) } chain RevEnumerateStage1(TreeNode * Cur, int &Number) { Cur->Data.fill(sqrt((Number++)/2000.0)); } NetNode * NetRoot; map Markers; map NInps; reenterable FindSubSystem(NetNode * Cur) { list::iterator OutCounter = Cur->Outs.begin(); cout<Number<<" "; if (plan_empty && (Cur->Outs.size()==0 || Cur->Outs.size()==1 && NInps[Cur->Outs.front()]==1)) cout<<"\n"; for (int i=0; iOuts.size(); i++,OutCounter++) { Markers[*OutCounter]--; if (Markers[*OutCounter]==0) plan_last(*OutCounter); } } reenterable static local WalkTree(TreeNode * Cur) { if (Cur->Left) WalkTree(Cur->Left)/1; // recursion!!! nested parallelizing cout<Data()<<"."; if (Cur->Right) plan_first(Cur->Right); } enum {entryWT, yieldWT, anyWT}; reenterable static local WalkTreeGen(TreeNode * Cur, char Point, TreeNode * &Out) { switch (Point) { case entryWT: if (Cur->Left) { if (Out) continue WalkTreeGen(Cur->Left,entryWT,Out)/1; else WalkTreeGen(Cur->Left,entryWT,Out)/1; if (Out) { plan_first(Cur,entryWT,Cur!); plan_stop; return; } } Out = Cur; plan_first(Cur,yieldWT,NULL!); plan_stop; return; case yieldWT: if (Cur->Right) { if (Out) continue WalkTreeGen(Cur->Right,entryWT,Out)/1; else WalkTreeGen(Cur->Right,entryWT,Out)/1; if (Out) plan_first(Cur,yieldWT,Out!); plan_stop; } } } reenterable static local WalkTreeGen1(TreeNode * Cur, char Point, TreeNode * &Out) { switch (Point) { case entryWT: if (Cur->Left) { if (Out) continue WalkTreeGen1(Cur->Left,entryWT,Out)/1; else WalkTreeGen1(Cur->Left,entryWT,Out)/1; if (Out) { plan_first(Cur,entryWT,Cur!); plan_stop; return; } } Out = Cur; plan_first(Cur,yieldWT,NULL!); plan_stop; return; case yieldWT: if (Cur->Right) plan_first(Cur->Right,entryWT,NULL!); } } reenterable[ARR_SIZE] PutTree(TreeNode ** Cur,TreeNode * Ins) { if (*Cur) if (Ins->Data<(*Cur)->Data) plan_first(&((*Cur)->Left),Ins); else plan_first(&((*Cur)->Right),Ins); else *Cur = Ins; } void RecursivePutTree(TreeNode ** Cur,TreeNode * Ins) { if (*Cur) if (Ins->Data<(*Cur)->Data) RecursivePutTree(&((*Cur)->Left),Ins); else RecursivePutTree(&((*Cur)->Right),Ins); else *Cur = Ins; } reenterable static global stack(int &Item) { if (!plan_after_continue()) plan_first(Item); plan_stop; } reenterable static queue(int &Item) { if (!plan_after_continue()) plan_last(Item); plan_stop; } reenterable static Fib(int First, int Last, int &Item) { if (plan_after_continue()) { Item = First+Last; plan_first(Last,Item,0); } else { plan_first(0,1,0); Item = 1; } plan_stop; } reenterable[ARR_SIZE] void Sum(int Start, int Incr) { int Next = Start+2*Incr; if (Next=ARR_SIZE) { #pragma omp plan last /* Чтобы избежать суммирования дважды в одну ячейку */ if (Incr1) { DataItem * Buf = new DataItem[N/incr+1]; int Idxs[MAX_PROCS]; int actNP = nProcs; SortedArray = Arr; for (int i=0; iRights[Idx]) memmove(&Idxs[0],&Idxs[1],--actNP*sizeof(int)); else { DataItem buf = Arr[Lefts[Idx]]; int j; for (j=1; j1) Idxs[j-1] = Idx; } } for (int i=0, j=0; i=UseMergeSort) { int NLists = N%Incr[i] ? N%Incr[i] : Incr[i]; DataItem * B = Arr; int NB = N; plan_group_last; for (j=0; jEps) { plan_last(i+1); Sums[plan_processor_id()]+=f(i); } return 1; } double SumF(int NP) { double Result = 0.0; memset(Sums,0,sizeof(Sums)); cout<<(_SumF(1)/NP ? "\nReenterable function works\n" : "Reenterable function does not work"); for (int i=0; i=200805 double omp3_SumF(int NP) { double Result = 0.0; memset(Sums,0,sizeof(Sums)); #pragma omp parallel num_threads(NP) { #pragma omp single { for (int i=1; g(i)>Eps; i++) #pragma omp task untied Sums[omp_get_thread_num()]+=f(i); #pragma omp taskwait } } for (int i=0; iLeft) plan_last(Cur->Left); if (Cur->Right) plan_last(Cur->Right); SumResult[plan_processor_id()]+=Cur->Data; } DataItem * TreeSum(int NP) { static DataItem Result; int i; Result.fill(0.0); memset(SumResult,0,sizeof(SumResult)); _TreeSum(Root)/NP; for (i=0; iLeft) plan_last(Cur->Left,SumResult); if (Cur->Right) plan_last(Cur->Right,SumResult); SumResult = Cur->Data; } reenterable _rTreeMax(TreeNode * Cur, reduction(max) DataItem &Max) { if (Cur==Root) plan_group_parallelize; if (Cur->Left) plan_last(Cur->Left,Max); if (Cur->Right) plan_last(Cur->Right,Max); Max = Cur->Data; } double rTreeMax(TreeNode * Root) { DataItem R; R.fill(0.0); _rTreeMax(Root,R); return R(); } #if _OPENMP>=200805 double _omp3_TreeMax(TreeNode * Cur) { double L = -1E300; double R = -1E300; double C = Cur->Data(); if (Cur->Left) #pragma omp task shared(L) untied L = _omp3_TreeMax(Cur->Left); if (Cur->Right) #pragma omp task shared(R) untied R = _omp3_TreeMax(Cur->Right); #pragma omp taskwait return max(C,max(L,R)); } double omp3_TreeMax(TreeNode * Root) { double Result; #pragma omp parallel { #pragma omp single Result = _omp3_TreeMax(Root); } return Result; } #endif chain[ARR_SIZE] TreeMax(TreeNode * Cur, reduction(max) DataItem & MaxResult) { static DataItem DummyMin; throw_last(Cur,DummyMin); if (Cur==Root) plan_group_parallelize; if (Cur->Left) plan_last(Cur->Left,MaxResult); if (Cur->Right) plan_last(Cur->Right,MaxResult); MaxResult = Cur->Data; } chain[ARR_SIZE] TreeMin(TreeNode * Cur, reduction(min) DataItem & MinResult) { if (Cur==Root) plan_group_parallelize; MinResult = Cur->Data; } DataItem * TreeMinMax(int PerStageNP, DataItem & Min) { static DataItem Max; Max.fill(0.0); Min.fill(1000.0); if (PerStageNP) plan_parallel_chain(0,TreeMax(Root,Max)/PerStageNP,TreeMin(Root,Min)/PerStageNP); else plan_chain(0,TreeMax(Root,Max)/1,TreeMin(Root,Min)/1); return &Max; } // фильтрация изображения, векторные операции??? #ifdef _OPENMP const double tau = 0.1; const double D = 0.001; const double h = 0.1; const double hh = h*h; const double LeftT = 300.0; const double RightT = 350.0; const double InitT = 273.0; #ifdef __THOROW__ const int N = 4*100000+2; #else const int N = 4*12+2; #endif const double EndTime = tau*1000; double * Therm = NULL; double * _L[MAX_PROCS] = {NULL}; double * _M[MAX_PROCS] = {NULL}; int FirstIdx[MAX_PROCS]; int LastIdx[MAX_PROCS]; chain ThermalEquation(int NProcs, double Time, funnel(in,double) * outLEFT, funnel(out,double) * inLEFT) { int stage = throw_stage(); int NN = LastIdx[stage]-FirstIdx[stage]+1; static double Q1 = tau*D/hh; double * L = _L[stage]; double * M = _M[stage]; double Left_1, Left_0; int i,j,k; funnel(out,double) * putLEFT = new funnel(out,double)(outLEFT); funnel(in,double) * getLEFT = new funnel(in,double)(inLEFT); funnel(out,double) * putRIGHT = new funnel(out,double)(); funnel(in,double) * getRIGHT = new funnel(in,double)(); Time+=tau; if (stage==0 && Time0) { double Z1 = Q1; double Z2 = Q1; double Z3 = 1+2*Q1; *putLEFT = Therm[FirstIdx[stage]+1]; Left_0 = **getLEFT; Left_1 = **getLEFT; M[0] = (Left_0+Z1*Left_1+Z2*Therm[FirstIdx[stage]+1])/Z3; } else M[0] = LeftT; for (i=1, j=FirstIdx[stage]+1; i=1; i--, j--, k++) { Therm[j-1] = Therm[j]*L[i-1]+M[i-1]; if (k<2 && stage=1; i--) Therm[i-1] = Therm[i]*L[i-1]+M[i-1]; } cout<<"1 proc. Time = "<=0 && X1=0 && Y1=0 && X1=0 && Y1Data = Cur->Data; Dest->Childs = vector(0); Dest->Work = 0; if (Cur->Left) { Dest->Childs.resize(1,NULL); CopyTree(Cur->Left,Dest->Childs[0]); } if (Cur->Right) { Dest->Childs.resize(Dest->Childs.size()+1,NULL); CopyTree(Cur->Right,Dest->Childs[Dest->Childs.size()-1]); } } } int main(int argc, char ** argv) { TreeNode * CurNode; DataItem Max, Min; int i,j; DataItem RSUM; DataItem * Asave; int Number; int Q; double T1; RSUM.fill(0.0); #ifdef __MPI__ MPI_Init(&argc,&argv); #endif srand(_time(NULL)); for (i=0; iData = Asave[i]; CurNode->Work = 0; CurNode->Left = NULL; CurNode->Right = NULL; if (i>0) #ifdef __THOROW__ RecursivePutTree(&Root,CurNode); #else PutTree(&Root,CurNode); #endif } #ifndef __THOROW__ cout<<"SORTED:\n"; WalkTree(Root); cout<<"\nSORTED GEN:\n"; CurNode = NULL; WalkTreeGen(Root,entryWT,CurNode); cout<Data()<<"."; do { continue WalkTreeGen(Root,anyWT,CurNode); if (CurNode) cout<Data()<<"."; } while (CurNode); cout<<"\nSORTED GEN 1:\n"; CurNode = NULL; WalkTreeGen1(Root,entryWT,CurNode); cout<Data()<<"."; do { continue WalkTreeGen1(Root,anyWT,CurNode); if (CurNode) cout<Data()<<"."; } while (CurNode); cout<<"\nOUT:\n"; Out(Root,1); cout<<"\n_OUT:\n"; _Out(Root); #endif Sum(0,1)/1; memmove(A,Asave,sizeof(A)); for (i=1; i<=8; i++) { T1 = time(); Sum(0,1)/i; RSUM = A[0]; memmove(A,Asave,sizeof(A)); cout<=1; i--) { RSUM.fill(0.0); T1 = time(); TreeSum(Root,RSUM)/i; cout< _MAX) _MAX = T; } cout<<"SEQUENTIAL: TREE MIN = "<<_MIN<<" : TREE MAX = "<<_MAX<=200805 cout<<"OpenMP 3.0: Max in Tree = "<=0; i--) { T1 = time(); Max = *TreeMinMax(i,Min); cout<Number = 0; NetNode * CC1 = new NetNode; CC1->Number = 1; Markers[CC1] = 0; NetRoot->Outs.push_back(CC1); Markers[CC1]++; NetNode * CC2 = new NetNode; CC2->Number = 2; Markers[CC2] = 0; NetRoot->Outs.push_back(CC2); Markers[CC2]++; NetNode * CC3 = new NetNode; CC3->Number = 3; Markers[CC3] = 0; CC1->Outs.push_back(CC2); Markers[CC2]++; CC1->Outs.push_back(CC3); Markers[CC3]++; NetNode * CC4 = new NetNode; CC4->Number = 4; Markers[CC4] = 0; CC2->Outs.push_back(CC4); Markers[CC4]++; CC3->Outs.push_back(CC4); Markers[CC4]++; NetNode * CC5 = new NetNode; CC5->Number = 5; Markers[CC5] = 0; CC4->Outs.push_back(CC5); Markers[CC5]++; Markers[NetRoot] = 0; NInps = Markers; cout<<"\nSUBSYSTEM:\n"; FindSubSystem(NetRoot); cout<<"\nSTACK:\n"; Q=1; stack(Q); Q=2; stack(Q); Q=3; stack(Q); continue stack(Q); cout<=0; j--) R[i][j] = X1[i][j]+R[i][0]; } } cout<<" OMP: "; for (i=0; i<5; i++) { cout<<"("; for (int j=0; j=200805 cout<<"OpenMP 3.0: Sum(F) = "<=2; i--) IntegrateThermal(i); SingleIntegrateThermal(8); CopyTree(Root,_Root); #ifndef __THOROW__ plan_parallel_chain(1,GenMinMaxTree(Root,0),FindMinMaxTree(NULL,0)/8); cout<<"Declarative markup: Result of MinMaxing = "<Data()<<"\n"; plan_parallel_chain(1,_GenMinMaxTree(_Root,0),_FindMinMaxTree(NULL,0)/8); cout<<"Partially Declarative way: Result of MinMaxing = "<<_Root->Data()<<"\n"; Out(Root,1); #if _OPENMP>=200805 omp3_FindMinMaxTree(Root,0); cout<<"\nOpenMP 3.0: MinMaxing = "<Data()<<"\n"; #endif #endif #endif cout<<"\nPARALLEL OUT\n"; ParallelOut(Root, Root, false); cout<<"\nFINISH\n"; delete[] Asave; #ifdef __MPI__ MPI_Finalize(); #endif } //---------------------------------------------------------------------------