_LOOP SPLITTING UNDER WINDOWS NT_ by Charles Letner Listing One // File: CParallelProcess.h -- Programmer: Charles Letner /* This file contains the definiton of a class that implements loop splitting. There is a pure virtual function void LoopFunciton(unsigned int index) that must be defined in a derived class. This derived function contains the code for a single pass through the loop. The parameter is the index of the pass through the loop, ie/ 0->max index. Note that there is no garantee that lower indexs will be processed before a higher index. The derived header should include this file. Before the inclusion of this file the derived class header file must contain the two definitions: #define dDERIVED_CLASS_NAME (this is required for type casts that will be made in the function ParallelLoop) and #define dDERIVED_CLASS_HEADER "" (which defines the name of the header file for the dervied class. This is needed so that CParallelProcess.cpp can get the definition of the derived class. Again this is required due to the pure virtual function ParallelLoop.) Though the ISO standard supports this second use of a preprocessor definiton MSVC++ version 2.1 does not. To compile with MSVC++ you will have to manually enter the derived class header in CParallelProcess.cpp. */ #ifndef cal_PARALLELPROCESS #define cal_PARALLELPROCESS #define dDEFAULT_PROCESSORS 2 #include class CParallelProcess { // Memeber functions public: // Constructors and destructors CParallelProcess() { m_Processors = dDEFAULT_PROCESSORS; m_CounterEvent = CreateEvent(NULL, FALSE, TRUE, NULL); InitializeCriticalSection(&m_CSforProcessors); }; CParallelProcess(unsigned short int number_processors) { m_Processors = number_processors; m_CounterEvent = CreateEvent(NULL, FALSE, TRUE, NULL); InitializeCriticalSection(&m_CSforProcessors); }; ~CParallelProcess() { CloseHandle(m_CounterEvent); DeleteCriticalSection(&m_CSforProcessors); }; // This is the function that sets up the threads and executes // the parallel code. It calls LoopFunction. void CallParallelLoop(unsigned int m_First, unsigned int m_Last); // Member functions to obtain access to the variable m_Processors. // These functions allow an application to change the number of // processors that are available to a loop. unsigned int ReturnNumberProcessors(void) { return(m_Processors); }; void SetNumberProcessors(unsigned int NumberProcessors) { EnterCriticalSection(&m_CSforProcessors); m_Processors = NumberProcessors; LeaveCriticalSection(&m_CSforProcessors); }; protected: // This is a pure virtual function. It is required that derived // classes contain LoopFunction function to provide functionality // to the parallel loop. virtual void LoopFunction(unsigned int index)=0; private: // This is the function that each thread use to execute // the loop processing. static DWORD WINAPI ParallelLoop(LPVOID lpvParameters); // Member variables protected: // Variable defines the number of process to split loop over. This is // private to allow for protection of this value from modification by // another thread while CallParallelLoop is using this value; unsigned short int m_Processors; // These values are use to initalize the For loop in ParallelLoop. // Private to allow protection from modification by other threads. unsigned int m_Start; unsigned int m_Increment; unsigned int m_Stop; // The critical section and event objects to control accesd to // m_Start/m_Increment/m_Stop and m_Processor HANDLE m_CounterEvent; CRITICAL_SECTION m_CSforProcessors; }; // End class definition #endif Listing Two // File: CParallelProcess.cpp -- Programmer: Charles Letner // Choose one of these include depending on ISO compliency of your compiler. // MVC++ 2.1 does not support preprocessor defintions in #include statements. //#include dDERIVED_CLASS_HEADER #include "CParallelCounter.h" void CParallelProcess::CallParallelLoop(unsigned int first, unsigned int last) { HANDLE* lphThreads; DWORD* lpdThreadID; UINT process, index; // Enter a critical section to protect the value of m_Processors // from modification while the loop is executing. Create // the array of handles and thread ID's for use in loop splitting. EnterCriticalSection(&m_CSforProcessors); lphThreads = new HANDLE[m_Processors]; lpdThreadID = new DWORD[m_Processors]; // Loop to create the threads that implement loop splitting. One // thread is created for each processor available to execute the loop. for(process=first; process < m_Processors; process++) { // Enter a Event to protect the values that initialize the // for loops of each thread. WaitForSingleObject(m_CounterEvent, INFINITE); m_Start = process; m_Stop = last; m_Increment = m_Processors; lphThreads[process] = CreateThread(NULL, 0, ParallelLoop, this, 0, &lpdThreadID[process]); }; // End for loop // Put the main thread to sleep until all threads have exited and // the loop processing is complete WaitForMultipleObjects(m_Processors, lphThreads, TRUE, INFINITE); // Loop to close the handles to the threads for(index=0; index < m_Processors; index++) CloseHandle(lphThreads[index]); // Clean up the memory allocated for the thread handles // and arrays. Leave the critical section that protects m_Processors. delete []lphThreads; delete []lpdThreadID; LeaveCriticalSection(&m_CSforProcessors); } // End CallParallelLoop DWORD WINAPI CParallelProcess::ParallelLoop(LPVOID lpvParameters) { // Create a pointer to the object that called ParallelLoop by // use of the this pointer passed as the fourth parameter to // Create process in CallParallelLoop dDERIVED_CLASS_NAME *lpObject = (dDERIVED_CLASS_NAME *)lpvParameters; unsigned int index, first, last, step; DWORD dwResult = 0; // Initialize the variables that determine the indexing of // the for loop. Set the event to allow other // threads access to m_Start, m_Stop, and m_Increment. first = lpObject->m_Start; last = lpObject->m_Stop; step = lpObject->m_Increment; SetEvent(lpObject->m_CounterEvent); // Enter the for loop for the thread. Call LoopFunction to process data. // The variable index contains the index of iteration through the loop. for(index = first; index < last+1; index = index + step ) lpObject->LoopFunction(index); return(dwResult); } // End ParallelLoop Listing Three // File: sample1.cpp -- Programmer: Charles Letner // Determines number of times loop is executed. Value will take approximate 10 // seconds on a single Pentium 100 as a MSVC++ 2.1 compile. On // dual Pentium 100s, requires 5 seconds. Link this with libcmt.lib #define dMAXINDEX 10000 #include "CParallelCounter.h" #include #include void main(void) { // Declare two instance of CParallelCounter. scalar_object // will run on one processor while parallel_object will run on two. CParallelCounter scalar_object(1); CParallelCounter parallel_object(2); // Declare variables to time the loop execution. time_t inital_time, final_time; double total_time; // Run loop on one processor and report time required to process loop. time(&inital_time); scalar_object.CallParallelLoop(0, dMAXINDEX); time(&final_time); total_time = difftime(final_time, inital_time); cout << "The scalar loop required " << total_time << " seconds." << endl; // Run loop on two processors and report time required to process loop time(&inital_time); parallel_object.CallParallelLoop(0, dMAXINDEX); time(&final_time); total_time = difftime(final_time, inital_time); cout << "The parallel loop required " << total_time << " seconds." << endl; } // End main Listing Four // File: sample2.cpp -- Programmer: Charles Letner #include "CMatrixSum.h" void main(void) { // Declare an instance of CMatrixSum and initialize it to use 2 // processors for loop processing. CMatrixSum parallel_object(2); // Sum the entries of the array and print out the the result parallel_object.SumArray(); parallel_object.PrintSum(); }; // End main Listing Five // File: CParallelCounter.h -- Programmer: Charles Letner #define dDERIVED_CLASS_NAME CParallelCounter // Determines the number of time that the loop to simulate long processing // will run. See comments of sample1.cpp for information on timing obtained. #define dITERATIONS 10000 // Include the definiton of CParallelProcess #include "CParallelProcess.h" // Declare CParallelCounter to inherit CParallelProcess as public. This // is done so that CallParallelLoop is accesible in main. class CParallelCounter : public CParallelProcess { public: CParallelCounter(unsigned int processors) : CParallelProcess(processors) { /* Empty constructor */ }; // function that implements functionality for each iteration of loop. void LoopFunction(unsigned int iteration) { unsigned int index; double value; for(index=0; index < dITERATIONS; index++) value = (double) 300.0 / (double) 1.5; }; }; // End CParallelCounter definition Listing Six // File: CMatrixSum.h -- Programmer: Charles Letner #ifndef cal_CMATRIX_SUM #define cal_CMATRIX_SUM #define dDERIVED_CLASS_NAME CMatrixSum #define dNUMBER_ELEMENTS 10000 #include #include #include "CParallelProcess.h" class CMatrixSum : protected CParallelProcess { public: CMatrixSum(unsigned int number_processors) : CParallelProcess(number_processors) { InitializeArray(); m_Sum = (double) 0.0; InitializeCriticalSection(&m_CSforSum); }; ~CMatrixSum() { DeleteCriticalSection(&m_CSforSum); }; // Two functions that can be called to process the array with // multipule processors void SumArray(void) { // Initialize the pointer fp to point to Sum // and call CallParallelLoop. The second parameter // passed to CallParallelLoop is the maximum index // allowed. fp = &CMatrixSum::Sum; CallParallelLoop(0, dNUMBER_ELEMENTS-1); }; void InitializeArray(void) { // Initialize the pointer fp to point to Initialize // and call CallParallelLoop. The second parameter // passed to CallParallelLoop is the maximum index // allowed. fp = &CMatrixSum::Initialize; CallParallelLoop(0, dNUMBER_ELEMENTS-1); }; // A function to print out the sum of the elements in array void PrintSum(void) { cout << "The sum of the array is: "<< m_Sum << endl; }; /* ------ Section of definition needed for parallel processing ---- */ // The function called by CParallelPrcoess to process a single index void LoopFunction(unsigned int value) { // Call the function pointed to by the pointer fp and pass // it the value. (this->*fp)(value); }; // The two functions that perform the tasks for each pass through the // parallel loop void Initialize(unsigned int index) { m_Array[index] = (double) 1.0; }; void Sum(unsigned int index) { // Envoke a critical section so that two threads can not be // accessin m_Sum at the same time. EnterCriticalSection(&m_CSforSum); m_Sum = m_Sum + m_Array[index]; LeaveCriticalSection(&m_CSforSum); }; private: double m_Array[dNUMBER_ELEMENTS]; double m_Sum; // The declaration of a pointer to a member function of CMatrixSum // that takes an unsigned int as a parameter. void (CMatrixSum::*fp)(unsigned int); // The critical section to protect the variable m_Sum CRITICAL_SECTION m_CSforSum; }; // End CMatrixSum definiton #endif