MOOCHO (Single Doxygen Collection) Version of the Day
RTOpToMPI.c
Go to the documentation of this file.
00001 /*
00002 // @HEADER
00003 // ***********************************************************************
00004 // 
00005 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization
00006 //                  Copyright (2003) Sandia Corporation
00007 // 
00008 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00009 // license for use of this work by or on behalf of the U.S. Government.
00010 // 
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 
00039 // 
00040 // ***********************************************************************
00041 // @HEADER
00042 */
00043 
00044 #include "RTOpToMPI.h"
00045 
00046 #include <stdlib.h>
00047 
00048 #ifdef RTOP_TO_MPI_SHOW_TIMES
00049 #include <time.h>
00050 #include <stdio.h>
00051 #endif
00052 
00053 void RTOp_MPI_type_signature(
00054   const int num_values
00055   ,const int num_indexes
00056   ,const int num_chars
00057   ,int* num_entries
00058   ,int block_lengths[]
00059   ,MPI_Aint displacements[]
00060   ,MPI_Datatype datatypes[]
00061   )
00062 {
00063   int k = 0, off = 0;
00064   /* values */
00065   block_lengths[k] = 3 + num_values; /* must carry size information */
00066   displacements[k] = 0;
00067   datatypes[k]     = RTOpMPI_VALUE_TYPE;
00068   ++k;
00069   off += (3 + num_values) * sizeof(RTOp_value_type);
00070   /* indexes */
00071   if( num_indexes ) {
00072     block_lengths[k] = num_indexes;
00073     displacements[k] = off;
00074     datatypes[k]     = RTOpMPI_INDEX_TYPE;
00075     ++k;
00076     off += num_indexes * sizeof(RTOp_index_type);
00077   }
00078   /* chars */
00079   if( num_chars ) {
00080     block_lengths[k] = num_chars;
00081     displacements[k] = off;
00082     datatypes[k]     = RTOpMPI_CHAR_TYPE;
00083     ++k;
00084   }
00085   *num_entries = k;
00086 }
00087 
00088 int RTOp_extract_reduct_obj_ext_state(
00089   const struct RTOp_RTOp*   op
00090   ,RTOp_ReductTarget        reduct_obj
00091   ,int                      num_values
00092   ,int                      num_indexes
00093   ,int                      num_chars
00094   ,void*                    reduct_obj_ext
00095   )
00096 {
00097   int err = 0;
00098   int
00099     num_values_off  = 0,
00100     num_indexes_off = num_values_off  + sizeof(RTOp_value_type),
00101     num_chars_off   = num_indexes_off + sizeof(RTOp_value_type),
00102     values_off      = num_chars_off   + sizeof(RTOp_value_type),
00103     indexes_off     = values_off      + num_values  * sizeof(RTOp_value_type),
00104     chars_off       = indexes_off     + num_indexes * sizeof(RTOp_index_type);
00105   *(RTOp_value_type*)((char*)reduct_obj_ext + num_values_off)  = num_values;
00106   *(RTOp_value_type*)((char*)reduct_obj_ext + num_indexes_off) = num_indexes;
00107   *(RTOp_value_type*)((char*)reduct_obj_ext + num_chars_off)   = num_chars;
00108   err = RTOp_extract_reduct_obj_state(
00109     op,reduct_obj
00110     ,num_values,  (RTOp_value_type*)((char*)reduct_obj_ext + values_off)
00111     ,num_indexes, (RTOp_index_type*)((char*)reduct_obj_ext + indexes_off)
00112     ,num_chars,   (RTOp_char_type*)((char*)reduct_obj_ext  + chars_off)
00113     );
00114   return err;
00115 }
00116 
00117 int RTOp_load_reduct_obj_ext_state(
00118   const struct RTOp_RTOp*   op
00119   ,const void*              reduct_obj_ext
00120   ,RTOp_ReductTarget        reduct_obj
00121   )
00122 {
00123   int err = 0;
00124   int
00125     num_values_off  = 0,
00126     num_values      =                   *(RTOp_value_type*)((char*)reduct_obj_ext + num_values_off),
00127     num_indexes_off = num_values_off  + sizeof(RTOp_value_type),
00128     num_indexes     =                   *(RTOp_value_type*)((char*)reduct_obj_ext + num_indexes_off),
00129     num_chars_off   = num_indexes_off + sizeof(RTOp_value_type),
00130     num_chars       =                   *(RTOp_value_type*)((char*)reduct_obj_ext + num_chars_off),
00131     values_off      = num_chars_off + sizeof(RTOp_value_type),
00132     indexes_off     = values_off  + num_values  * sizeof(RTOp_value_type),
00133     chars_off       = indexes_off + num_indexes * sizeof(RTOp_index_type);
00134   err = RTOp_load_reduct_obj_state(
00135     op
00136     ,num_values,   (RTOp_value_type*)((char*)reduct_obj_ext + values_off)
00137     ,num_indexes, (RTOp_index_type*)((char*)reduct_obj_ext + indexes_off)
00138     ,num_chars,   (RTOp_char_type*)((char*) reduct_obj_ext + chars_off)
00139     ,reduct_obj
00140     );
00141   return err;
00142 }
00143 
00144 int RTOp_MPI_apply_op(
00145   MPI_Comm comm, const struct RTOp_RTOp* op, int root_rank
00146   ,const int num_cols
00147   ,const int num_vecs, const struct RTOp_SubVector sub_vecs[]
00148   ,const int num_targ_vecs, const struct RTOp_MutableSubVector sub_targ_vecs[]
00149   ,RTOp_ReductTarget reduct_objs[]
00150     )
00151 {
00152   int err = 0;
00153   int num_reduct_type_values = 0, num_reduct_type_indexes = 0,
00154     num_reduct_type_chars = 0,  num_reduct_type_entries = 0;
00155   RTOp_ReductTarget           *i_reduct_objs = NULL;
00156   int                         target_type_block_lengths[3];
00157   MPI_Aint                    target_type_displacements[3];
00158   RTOp_Datatype               target_type_datatypes[3];
00159   MPI_Datatype                mpi_reduct_ext_type = MPI_DATATYPE_NULL;
00160   int                         reduct_obj_ext_size = 0;
00161   char                        *i_reduct_objs_ext = NULL;
00162   char                        *i_reduct_objs_tmp = NULL;
00163   RTOp_reduct_op_func_ptr_t   reduct_op_func_ptr  = NULL;
00164   MPI_Op                      mpi_op = MPI_OP_NULL;
00165   int                         rank = -1;
00166   int                         k;
00167   int                         kc;
00168 #ifdef RTOP_TO_MPI_SHOW_TIMES
00169     const double secs_per_tick = ((double)1.0) / CLOCKS_PER_SEC;
00170   clock_t ticks_start_start, ticks_start=0, ticks_end = 0;
00171 #endif
00172 
00173   if( comm == MPI_COMM_NULL ) {
00174     /* Just do the reduction on this processor and be done with it! */
00175     if( sub_vecs || sub_targ_vecs ) {
00176       for( kc = 0; kc < num_cols; ++kc ) {
00177         err = RTOp_apply_op(
00178           op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
00179           ,reduct_objs ? reduct_objs[kc] : RTOp_REDUCT_OBJ_NULL
00180           );
00181         if (err) goto ERR_LABEL;
00182       }
00183     }
00184     return 0; /* Success! */
00185   }
00186 
00187   /* 
00188    * Get the description of the datatype of the target object. 
00189    * We need this in order to map it to an MPI user defined 
00190    * data type so that the reduction operations can be performed 
00191    * over all of the of processors. 
00192    * 
00193    * Get the number of the entries in the array that describes 
00194    * target type's datatypes 
00195    */
00196   err = RTOp_get_reduct_type_num_entries(
00197     op, &num_reduct_type_values, &num_reduct_type_indexes, &num_reduct_type_chars );
00198   if(err) goto ERR_LABEL;
00199   num_reduct_type_entries
00200     = (num_reduct_type_values ? 1 : 0)
00201     + (num_reduct_type_indexes ? 1 : 0)
00202     + (num_reduct_type_chars ? 1 : 0);
00203 
00204   if( num_reduct_type_entries ) {
00205 #ifdef RTOP_TO_MPI_SHOW_TIMES
00206     if(RTOp_MPI_apply_op_print_timings) {
00207       printf("RTOp_MPI_apply_op(...) : timing various MPI calls and other activities\n");
00208       ticks_start_start = clock();
00209     }
00210 #endif
00211     /*
00212      * There is a non-null reduction target object so we need to reduce 
00213      * it across processors 
00214      * 
00215      * Allocate the intermediate target object and perform the reduction for the 
00216      * vector elements on this processor. 
00217      */
00218         i_reduct_objs = malloc( sizeof(RTOp_ReductTarget) * num_cols );
00219     for( kc = 0; kc < num_cols; ++kc ) {
00220       i_reduct_objs[kc] = RTOp_REDUCT_OBJ_NULL;
00221       RTOp_reduct_obj_create( op, i_reduct_objs + kc );
00222     }
00223 #ifdef RTOP_TO_MPI_SHOW_TIMES
00224     if(RTOp_MPI_apply_op_print_timings) {
00225       printf("calling RTOp_apply_op(...)");
00226       ticks_start = clock();
00227     }
00228 #endif
00229     if( sub_vecs || sub_targ_vecs ) {
00230       for( kc = 0; kc < num_cols; ++kc ) {
00231         err = RTOp_apply_op(
00232           op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
00233           ,i_reduct_objs[kc]
00234           );
00235         if (err) goto ERR_LABEL;
00236       }
00237     }
00238 #ifdef RTOP_TO_MPI_SHOW_TIMES
00239     if(RTOp_MPI_apply_op_print_timings) {
00240       ticks_end = clock();
00241       printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00242     }
00243 #endif
00244     /* Get the arrays that describe the reduction object datatype */
00245     if( num_reduct_type_values == 0 ) ++num_reduct_type_entries; /* Add the block for the sizes */
00246     RTOp_MPI_type_signature(
00247       num_reduct_type_values, num_reduct_type_indexes, num_reduct_type_chars
00248       ,&num_reduct_type_entries
00249       ,target_type_block_lengths, target_type_displacements, target_type_datatypes
00250       );
00251     /* Translate this target datatype description to an MPI datatype */
00252 #ifdef RTOP_TO_MPI_SHOW_TIMES
00253     if(RTOp_MPI_apply_op_print_timings) {
00254       printf("calling MPI_Type_struct(...)");
00255       ticks_start = clock();
00256     }
00257 #endif
00258     err = MPI_Type_struct( num_reduct_type_entries
00259                  , target_type_block_lengths, target_type_displacements
00260                  , target_type_datatypes, &mpi_reduct_ext_type );
00261 #ifdef RTOP_TO_MPI_SHOW_TIMES
00262     if(RTOp_MPI_apply_op_print_timings) {
00263       ticks_end = clock();
00264       printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00265     }
00266 #endif
00267     if(err) goto ERR_LABEL;
00268 #ifdef RTOP_TO_MPI_SHOW_TIMES
00269     if(RTOp_MPI_apply_op_print_timings) {
00270       printf("calling MPI_Type_commit(...)");
00271       ticks_start = clock();
00272     }
00273 #endif
00274     err = MPI_Type_commit( &mpi_reduct_ext_type );
00275 #ifdef RTOP_TO_MPI_SHOW_TIMES
00276     if(RTOp_MPI_apply_op_print_timings) {
00277       ticks_end = clock();
00278       printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00279     }
00280 #endif
00281     if(err) goto ERR_LABEL;
00282     /* */
00283     /* Create an external contiguous representation for the intermediate reduction object */
00284     /* that MPI can use.  This is very low level but at least the user */
00285     /* does not have to do it. */
00286     /* */
00287     reduct_obj_ext_size =
00288       (3 + num_reduct_type_values) * sizeof(RTOp_value_type) +
00289       num_reduct_type_indexes      * sizeof(RTOp_index_type) +
00290       num_reduct_type_chars        * sizeof(RTOp_char_type);
00291     i_reduct_objs_ext = malloc( reduct_obj_ext_size * num_cols );
00292     for( kc = 0; kc < num_cols; ++kc ) {
00293       RTOp_extract_reduct_obj_ext_state(
00294         op,i_reduct_objs[kc],num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
00295         ,i_reduct_objs_ext+kc*reduct_obj_ext_size
00296         );
00297     }
00298     /* */
00299     /* Now perform a global reduction over all of the processors. */
00300     /* */
00301     /* Get the user defined reduction operation for MPI to use */
00302     RTOp_get_reduct_op( op, &reduct_op_func_ptr );
00303     if( reduct_op_func_ptr != NULL ) {
00304       /* */
00305       /* The operator object has returned an MPI compatible reduction function so */
00306       /* MPI will be of great help in preforming the global reduction :-). */
00307       /* */
00308       /* Create the MPI reduction operator object */
00309       /* */
00310 #ifdef RTOP_TO_MPI_SHOW_TIMES
00311       if(RTOp_MPI_apply_op_print_timings) {
00312         printf("calling MPI_Op_create(...)");
00313         ticks_start = clock();
00314       }
00315 #endif
00316       err = MPI_Op_create(
00317         reduct_op_func_ptr
00318         ,1                   /* The op is communitive? yes == 1! */
00319         ,&mpi_op
00320         );
00321 #ifdef RTOP_TO_MPI_SHOW_TIMES
00322       if(RTOp_MPI_apply_op_print_timings) {
00323         ticks_end = clock();
00324         printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00325       }
00326 #endif
00327       if(err) goto ERR_LABEL;
00328       if( root_rank >= 0 ) {
00329         MPI_Comm_rank( comm, &rank );
00330         /* Apply the reduction operation over all of the processors */
00331         /* and reduce to one target object only on the root processor */
00332         i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
00333         err = MPI_Reduce(
00334           i_reduct_objs_ext
00335           ,rank == root_rank ? i_reduct_objs_tmp : NULL  /* I don't know if this is MPI legal? */
00336           ,num_cols, mpi_reduct_ext_type
00337           ,mpi_op, root_rank, comm
00338           );
00339         if(err) goto ERR_LABEL;
00340         if( rank == root_rank ) { /* bit-by-bit copy */
00341           for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
00342         }
00343       }
00344       else {
00345         /* Apply the reduction operation over all of the processors */
00346         /* and reduce to one target object on each processor */
00347         i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
00348 #ifdef RTOP_TO_MPI_SHOW_TIMES
00349         if(RTOp_MPI_apply_op_print_timings) {
00350           printf("calling MPI_Allreduce(...)");
00351           ticks_start = clock();
00352         }
00353 #endif
00354         err = MPI_Allreduce(
00355           i_reduct_objs_ext, i_reduct_objs_tmp, num_cols
00356           ,mpi_reduct_ext_type, mpi_op, comm
00357           );
00358 #ifdef RTOP_TO_MPI_SHOW_TIMES
00359         if(RTOp_MPI_apply_op_print_timings) {
00360           ticks_end = clock();
00361           printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00362         }
00363 #endif
00364         if(err) goto ERR_LABEL;
00365         for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
00366       }
00367       /* Load the updated state of the reduction target object and reduce. */
00368       for( kc = 0; kc < num_cols; ++kc ) {
00369         RTOp_load_reduct_obj_ext_state( op, i_reduct_objs_ext+kc*reduct_obj_ext_size, i_reduct_objs[kc] );
00370         RTOp_reduce_reduct_objs( op, i_reduct_objs[kc], reduct_objs[kc] );
00371       }
00372     }
00373     else {
00374       /* */
00375       /* We must do without the MPI compatible reduction function :-(  We must */
00376       /* manually perform the reduction operation ourselves.  Note, this will */
00377       /* not be as efficient as when MPI would do it but it helps take some */
00378       /* of the burden off of the developers of operator classes. */
00379       /* */
00380       /* What we need to do is to gather all of the intermediate reduction */
00381       /* objects to the root process and then do the reduction pair-wise. */
00382       /* */
00383       assert( reduct_op_func_ptr ); /* ToDo: Implement! */
00384     }
00385   }
00386   else {
00387     /* */
00388     /* There is a null reduction target object so we don't need to reduce */
00389     /* it across processors */
00390     /* */
00391     if( sub_vecs || sub_targ_vecs ) {
00392       for( kc = 0; kc < num_cols; ++kc ) {
00393         err = RTOp_apply_op(
00394           op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
00395           ,RTOp_REDUCT_OBJ_NULL
00396           );
00397         if (err) goto ERR_LABEL;
00398       }
00399     }
00400   }
00401 
00402 ERR_LABEL:
00403 
00404   /* Clean up dynamically allocated memory! */
00405 
00406   if( i_reduct_objs_tmp != NULL )
00407     free( i_reduct_objs_tmp );
00408   if( mpi_op != MPI_OP_NULL )
00409     MPI_Op_free( &mpi_op );
00410   if( i_reduct_objs_ext != NULL )
00411     free( i_reduct_objs_ext );
00412   if( mpi_reduct_ext_type != MPI_DATATYPE_NULL )
00413     MPI_Type_free( &mpi_reduct_ext_type );
00414   if( i_reduct_objs != NULL ) {
00415     for( kc = 0; kc < num_cols; ++kc )
00416       RTOp_reduct_obj_free( op, &i_reduct_objs[0] );
00417     free( i_reduct_objs );
00418   }
00419 
00420   return err;
00421 }
00422 
00423 #ifdef RTOP_TO_MPI_SHOW_TIMES
00424 int RTOp_MPI_apply_op_print_timings = 0;
00425 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines