RTOpToMPI.c

Go to the documentation of this file.
00001 /*
00002 // @HEADER
00003 // ***********************************************************************
00004 // 
00005 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization
00006 //                  Copyright (2003) Sandia Corporation
00007 // 
00008 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00009 // license for use of this work by or on behalf of the U.S. Government.
00010 // 
00011 // This library is free software; you can redistribute it and/or modify
00012 // it under the terms of the GNU Lesser General Public License as
00013 // published by the Free Software Foundation; either version 2.1 of the
00014 // License, or (at your option) any later version.
00015 //  
00016 // This library is distributed in the hope that it will be useful, but
00017 // WITHOUT ANY WARRANTY; without even the implied warranty of
00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00019 // Lesser General Public License for more details.
00020 //  
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License along with this library; if not, write to the Free Software
00023 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00024 // USA
00025 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 
00026 // 
00027 // ***********************************************************************
00028 // @HEADER
00029 */
00030 
00031 #include "RTOpToMPI.h"
00032 
00033 #include <stdlib.h>
00034 
00035 #ifdef RTOP_TO_MPI_SHOW_TIMES
00036 #include <time.h>
00037 #include <stdio.h>
00038 #endif
00039 
00040 void RTOp_MPI_type_signature(
00041   const int num_values
00042   ,const int num_indexes
00043   ,const int num_chars
00044   ,int* num_entries
00045   ,int block_lengths[]
00046   ,MPI_Aint displacements[]
00047   ,MPI_Datatype datatypes[]
00048   )
00049 {
00050   int k = 0, off = 0;
00051   /* values */
00052   block_lengths[k] = 3 + num_values; /* must carry size information */
00053   displacements[k] = 0;
00054   datatypes[k]     = RTOpMPI_VALUE_TYPE;
00055   ++k;
00056   off += (3 + num_values) * sizeof(RTOp_value_type);
00057   /* indexes */
00058   if( num_indexes ) {
00059     block_lengths[k] = num_indexes;
00060     displacements[k] = off;
00061     datatypes[k]     = RTOpMPI_INDEX_TYPE;
00062     ++k;
00063     off += num_indexes * sizeof(RTOp_index_type);
00064   }
00065   /* chars */
00066   if( num_chars ) {
00067     block_lengths[k] = num_chars;
00068     displacements[k] = off;
00069     datatypes[k]     = RTOpMPI_CHAR_TYPE;
00070     ++k;
00071   }
00072   *num_entries = k;
00073 }
00074 
00075 int RTOp_extract_reduct_obj_ext_state(
00076   const struct RTOp_RTOp*   op
00077   ,RTOp_ReductTarget        reduct_obj
00078   ,int                      num_values
00079   ,int                      num_indexes
00080   ,int                      num_chars
00081   ,void*                    reduct_obj_ext
00082   )
00083 {
00084   int err = 0;
00085   int
00086     num_values_off  = 0,
00087     num_indexes_off = num_values_off  + sizeof(RTOp_value_type),
00088     num_chars_off   = num_indexes_off + sizeof(RTOp_value_type),
00089     values_off      = num_chars_off   + sizeof(RTOp_value_type),
00090     indexes_off     = values_off      + num_values  * sizeof(RTOp_value_type),
00091     chars_off       = indexes_off     + num_indexes * sizeof(RTOp_index_type);
00092   *(RTOp_value_type*)((char*)reduct_obj_ext + num_values_off)  = num_values;
00093   *(RTOp_value_type*)((char*)reduct_obj_ext + num_indexes_off) = num_indexes;
00094   *(RTOp_value_type*)((char*)reduct_obj_ext + num_chars_off)   = num_chars;
00095   err = RTOp_extract_reduct_obj_state(
00096     op,reduct_obj
00097     ,num_values,  (RTOp_value_type*)((char*)reduct_obj_ext + values_off)
00098     ,num_indexes, (RTOp_index_type*)((char*)reduct_obj_ext + indexes_off)
00099     ,num_chars,   (RTOp_char_type*)((char*)reduct_obj_ext  + chars_off)
00100     );
00101   return err;
00102 }
00103 
00104 int RTOp_load_reduct_obj_ext_state(
00105   const struct RTOp_RTOp*   op
00106   ,const void*              reduct_obj_ext
00107   ,RTOp_ReductTarget        reduct_obj
00108   )
00109 {
00110   int err = 0;
00111   int
00112     num_values_off  = 0,
00113     num_values      =                   *(RTOp_value_type*)((char*)reduct_obj_ext + num_values_off),
00114     num_indexes_off = num_values_off  + sizeof(RTOp_value_type),
00115     num_indexes     =                   *(RTOp_value_type*)((char*)reduct_obj_ext + num_indexes_off),
00116     num_chars_off   = num_indexes_off + sizeof(RTOp_value_type),
00117     num_chars       =                   *(RTOp_value_type*)((char*)reduct_obj_ext + num_chars_off),
00118     values_off      = num_chars_off + sizeof(RTOp_value_type),
00119     indexes_off     = values_off  + num_values  * sizeof(RTOp_value_type),
00120     chars_off       = indexes_off + num_indexes * sizeof(RTOp_index_type);
00121   err = RTOp_load_reduct_obj_state(
00122     op
00123     ,num_values,   (RTOp_value_type*)((char*)reduct_obj_ext + values_off)
00124     ,num_indexes, (RTOp_index_type*)((char*)reduct_obj_ext + indexes_off)
00125     ,num_chars,   (RTOp_char_type*)((char*) reduct_obj_ext + chars_off)
00126     ,reduct_obj
00127     );
00128   return err;
00129 }
00130 
00131 int RTOp_MPI_apply_op(
00132   MPI_Comm comm, const struct RTOp_RTOp* op, int root_rank
00133   ,const int num_cols
00134   ,const int num_vecs, const struct RTOp_SubVector sub_vecs[]
00135   ,const int num_targ_vecs, const struct RTOp_MutableSubVector sub_targ_vecs[]
00136   ,RTOp_ReductTarget reduct_objs[]
00137     )
00138 {
00139   int err = 0;
00140   int num_reduct_type_values = 0, num_reduct_type_indexes = 0,
00141     num_reduct_type_chars = 0,  num_reduct_type_entries = 0;
00142   RTOp_ReductTarget           *i_reduct_objs = NULL;
00143   int                         target_type_block_lengths[3];
00144   MPI_Aint                    target_type_displacements[3];
00145   RTOp_Datatype               target_type_datatypes[3];
00146   MPI_Datatype                mpi_reduct_ext_type = MPI_DATATYPE_NULL;
00147   int                         reduct_obj_ext_size = 0;
00148   char                        *i_reduct_objs_ext = NULL;
00149   char                        *i_reduct_objs_tmp = NULL;
00150   RTOp_reduct_op_func_ptr_t   reduct_op_func_ptr  = NULL;
00151   MPI_Op                      mpi_op = MPI_OP_NULL;
00152   int                         rank = -1;
00153   int                         k;
00154   int                         kc;
00155 #ifdef RTOP_TO_MPI_SHOW_TIMES
00156     const double secs_per_tick = ((double)1.0) / CLOCKS_PER_SEC;
00157   clock_t ticks_start_start, ticks_start=0, ticks_end = 0;
00158 #endif
00159 
00160   if( comm == MPI_COMM_NULL ) {
00161     /* Just do the reduction on this processor and be done with it! */
00162     if( sub_vecs || sub_targ_vecs ) {
00163       for( kc = 0; kc < num_cols; ++kc ) {
00164         err = RTOp_apply_op(
00165           op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
00166           ,reduct_objs ? reduct_objs[kc] : RTOp_REDUCT_OBJ_NULL
00167           );
00168         if (err) goto ERR_LABEL;
00169       }
00170     }
00171     return 0; /* Success! */
00172   }
00173 
00174   /* 
00175    * Get the description of the datatype of the target object. 
00176    * We need this in order to map it to an MPI user defined 
00177    * data type so that the reduction operations can be performed 
00178    * over all of the of processors. 
00179    * 
00180    * Get the number of the entries in the array that describes 
00181    * target type's datatypes 
00182    */
00183   err = RTOp_get_reduct_type_num_entries(
00184     op, &num_reduct_type_values, &num_reduct_type_indexes, &num_reduct_type_chars );
00185   if(err) goto ERR_LABEL;
00186   num_reduct_type_entries
00187     = (num_reduct_type_values ? 1 : 0)
00188     + (num_reduct_type_indexes ? 1 : 0)
00189     + (num_reduct_type_chars ? 1 : 0);
00190 
00191   if( num_reduct_type_entries ) {
00192 #ifdef RTOP_TO_MPI_SHOW_TIMES
00193     if(RTOp_MPI_apply_op_print_timings) {
00194       printf("RTOp_MPI_apply_op(...) : timing various MPI calls and other activities\n");
00195       ticks_start_start = clock();
00196     }
00197 #endif
00198     /*
00199      * There is a non-null reduction target object so we need to reduce 
00200      * it across processors 
00201      * 
00202      * Allocate the intermediate target object and perform the reduction for the 
00203      * vector elements on this processor. 
00204      */
00205         i_reduct_objs = malloc( sizeof(RTOp_ReductTarget) * num_cols );
00206     for( kc = 0; kc < num_cols; ++kc ) {
00207       i_reduct_objs[kc] = RTOp_REDUCT_OBJ_NULL;
00208       RTOp_reduct_obj_create( op, i_reduct_objs + kc );
00209     }
00210 #ifdef RTOP_TO_MPI_SHOW_TIMES
00211     if(RTOp_MPI_apply_op_print_timings) {
00212       printf("calling RTOp_apply_op(...)");
00213       ticks_start = clock();
00214     }
00215 #endif
00216     if( sub_vecs || sub_targ_vecs ) {
00217       for( kc = 0; kc < num_cols; ++kc ) {
00218         err = RTOp_apply_op(
00219           op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
00220           ,i_reduct_objs[kc]
00221           );
00222         if (err) goto ERR_LABEL;
00223       }
00224     }
00225 #ifdef RTOP_TO_MPI_SHOW_TIMES
00226     if(RTOp_MPI_apply_op_print_timings) {
00227       ticks_end = clock();
00228       printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00229     }
00230 #endif
00231     /* Get the arrays that describe the reduction object datatype */
00232     if( num_reduct_type_values == 0 ) ++num_reduct_type_entries; /* Add the block for the sizes */
00233     RTOp_MPI_type_signature(
00234       num_reduct_type_values, num_reduct_type_indexes, num_reduct_type_chars
00235       ,&num_reduct_type_entries
00236       ,target_type_block_lengths, target_type_displacements, target_type_datatypes
00237       );
00238     /* Translate this target datatype description to an MPI datatype */
00239 #ifdef RTOP_TO_MPI_SHOW_TIMES
00240     if(RTOp_MPI_apply_op_print_timings) {
00241       printf("calling MPI_Type_struct(...)");
00242       ticks_start = clock();
00243     }
00244 #endif
00245     err = MPI_Type_struct( num_reduct_type_entries
00246                  , target_type_block_lengths, target_type_displacements
00247                  , target_type_datatypes, &mpi_reduct_ext_type );
00248 #ifdef RTOP_TO_MPI_SHOW_TIMES
00249     if(RTOp_MPI_apply_op_print_timings) {
00250       ticks_end = clock();
00251       printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00252     }
00253 #endif
00254     if(err) goto ERR_LABEL;
00255 #ifdef RTOP_TO_MPI_SHOW_TIMES
00256     if(RTOp_MPI_apply_op_print_timings) {
00257       printf("calling MPI_Type_commit(...)");
00258       ticks_start = clock();
00259     }
00260 #endif
00261     err = MPI_Type_commit( &mpi_reduct_ext_type );
00262 #ifdef RTOP_TO_MPI_SHOW_TIMES
00263     if(RTOp_MPI_apply_op_print_timings) {
00264       ticks_end = clock();
00265       printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00266     }
00267 #endif
00268     if(err) goto ERR_LABEL;
00269     /* */
00270     /* Create an external contiguous representation for the intermediate reduction object */
00271     /* that MPI can use.  This is very low level but at least the user */
00272     /* does not have to do it. */
00273     /* */
00274     reduct_obj_ext_size =
00275       (3 + num_reduct_type_values) * sizeof(RTOp_value_type) +
00276       num_reduct_type_indexes      * sizeof(RTOp_index_type) +
00277       num_reduct_type_chars        * sizeof(RTOp_char_type);
00278     i_reduct_objs_ext = malloc( reduct_obj_ext_size * num_cols );
00279     for( kc = 0; kc < num_cols; ++kc ) {
00280       RTOp_extract_reduct_obj_ext_state(
00281         op,i_reduct_objs[kc],num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
00282         ,i_reduct_objs_ext+kc*reduct_obj_ext_size
00283         );
00284     }
00285     /* */
00286     /* Now perform a global reduction over all of the processors. */
00287     /* */
00288     /* Get the user defined reduction operation for MPI to use */
00289     RTOp_get_reduct_op( op, &reduct_op_func_ptr );
00290     if( reduct_op_func_ptr != NULL ) {
00291       /* */
00292       /* The operator object has returned an MPI compatible reduction function so */
00293       /* MPI will be of great help in preforming the global reduction :-). */
00294       /* */
00295       /* Create the MPI reduction operator object */
00296       /* */
00297 #ifdef RTOP_TO_MPI_SHOW_TIMES
00298       if(RTOp_MPI_apply_op_print_timings) {
00299         printf("calling MPI_Op_create(...)");
00300         ticks_start = clock();
00301       }
00302 #endif
00303       err = MPI_Op_create(
00304         reduct_op_func_ptr
00305         ,1                   /* The op is communitive? yes == 1! */
00306         ,&mpi_op
00307         );
00308 #ifdef RTOP_TO_MPI_SHOW_TIMES
00309       if(RTOp_MPI_apply_op_print_timings) {
00310         ticks_end = clock();
00311         printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00312       }
00313 #endif
00314       if(err) goto ERR_LABEL;
00315       if( root_rank >= 0 ) {
00316         MPI_Comm_rank( comm, &rank );
00317         /* Apply the reduction operation over all of the processors */
00318         /* and reduce to one target object only on the root processor */
00319         i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
00320         err = MPI_Reduce(
00321           i_reduct_objs_ext
00322           ,rank == root_rank ? i_reduct_objs_tmp : NULL  /* I don't know if this is MPI legal? */
00323           ,num_cols, mpi_reduct_ext_type
00324           ,mpi_op, root_rank, comm
00325           );
00326         if(err) goto ERR_LABEL;
00327         if( rank == root_rank ) { /* bit-by-bit copy */
00328           for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
00329         }
00330       }
00331       else {
00332         /* Apply the reduction operation over all of the processors */
00333         /* and reduce to one target object on each processor */
00334         i_reduct_objs_tmp = malloc( reduct_obj_ext_size * num_cols );
00335 #ifdef RTOP_TO_MPI_SHOW_TIMES
00336         if(RTOp_MPI_apply_op_print_timings) {
00337           printf("calling MPI_Allreduce(...)");
00338           ticks_start = clock();
00339         }
00340 #endif
00341         err = MPI_Allreduce(
00342           i_reduct_objs_ext, i_reduct_objs_tmp, num_cols
00343           ,mpi_reduct_ext_type, mpi_op, comm
00344           );
00345 #ifdef RTOP_TO_MPI_SHOW_TIMES
00346         if(RTOp_MPI_apply_op_print_timings) {
00347           ticks_end = clock();
00348           printf(" : cpu time = %g s\n", (ticks_end-ticks_start)*secs_per_tick );
00349         }
00350 #endif
00351         if(err) goto ERR_LABEL;
00352         for( k = 0; k < reduct_obj_ext_size * num_cols; ++k ) i_reduct_objs_ext[k] = i_reduct_objs_tmp[k];
00353       }
00354       /* Load the updated state of the reduction target object and reduce. */
00355       for( kc = 0; kc < num_cols; ++kc ) {
00356         RTOp_load_reduct_obj_ext_state( op, i_reduct_objs_ext+kc*reduct_obj_ext_size, i_reduct_objs[kc] );
00357         RTOp_reduce_reduct_objs( op, i_reduct_objs[kc], reduct_objs[kc] );
00358       }
00359     }
00360     else {
00361       /* */
00362       /* We must do without the MPI compatible reduction function :-(  We must */
00363       /* manually perform the reduction operation ourselves.  Note, this will */
00364       /* not be as efficient as when MPI would do it but it helps take some */
00365       /* of the burden off of the developers of operator classes. */
00366       /* */
00367       /* What we need to do is to gather all of the intermediate reduction */
00368       /* objects to the root process and then do the reduction pair-wise. */
00369       /* */
00370       assert( reduct_op_func_ptr ); /* ToDo: Implement! */
00371     }
00372   }
00373   else {
00374     /* */
00375     /* There is a null reduction target object so we don't need to reduce */
00376     /* it across processors */
00377     /* */
00378     if( sub_vecs || sub_targ_vecs ) {
00379       for( kc = 0; kc < num_cols; ++kc ) {
00380         err = RTOp_apply_op(
00381           op, num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
00382           ,RTOp_REDUCT_OBJ_NULL
00383           );
00384         if (err) goto ERR_LABEL;
00385       }
00386     }
00387   }
00388 
00389 ERR_LABEL:
00390 
00391   /* Clean up dynamically allocated memory! */
00392 
00393   if( i_reduct_objs_tmp != NULL )
00394     free( i_reduct_objs_tmp );
00395   if( mpi_op != MPI_OP_NULL )
00396     MPI_Op_free( &mpi_op );
00397   if( i_reduct_objs_ext != NULL )
00398     free( i_reduct_objs_ext );
00399   if( mpi_reduct_ext_type != MPI_DATATYPE_NULL )
00400     MPI_Type_free( &mpi_reduct_ext_type );
00401   if( i_reduct_objs != NULL ) {
00402     for( kc = 0; kc < num_cols; ++kc )
00403       RTOp_reduct_obj_free( op, &i_reduct_objs[0] );
00404     free( i_reduct_objs );
00405   }
00406 
00407   return err;
00408 }
00409 
00410 #ifdef RTOP_TO_MPI_SHOW_TIMES
00411 int RTOp_MPI_apply_op_print_timings = 0;
00412 #endif

Generated on Wed May 12 21:52:32 2010 for MOOCHO (Single Doxygen Collection) by  doxygen 1.4.7