Download Lecture 7

Why Derived Data Types j A[100][80][50] struct _tagStudent { int id; double grade; char note[100]; }; i struct _tagStudent Students[25]; k Surface [i][j][0]  Message data contains different data types  Can use several separate messages  performance may not be good  Message data involves non-contiguous memory locations  Can copy non-contiguous data to a contiguous storage, then communicate  additional memory copies 1 Derived Data Type MPI’s solution: derived data type No additional memory copy Transfer directly of data with various shape and size Idea: Specify the memory layout of data and corresponding basic data types. Usage: Construct derived data type Commit derived data type Use it in communication routines where MPI_Datatype argument is required. Free derived data type 2 Type Map & Type Signature  A general data type consists of  A sequence of basic data types  A sequence of byte displacements  Type map: sequence of pairs (basic data type, displacement) for the general data type  E.g. double A[2]  {(MPI_DOUBLE,0), (MPI_DOUBLE,8)}  _tagStudent  {(MPI_INT,0), (MPI_DOUBLE,8), (MPI_CHAR,16), …}  Type signature: sequence of basic data types for the general data type  E.g. double A[2]  {MPI_DOUBLE, MPI_DOUBLE}  _tagStudent  {MPI_INT, MPI_DOUBLE, MPI_CHAR …} 3 Communication Buffer Given a type map {(type0,disp0),(type1,disp1)} and base address buf, the communication buffer: Consists of 2 entries 1st entry at address buf+disp0, of type type0; 2nd entry at address buf+disp1, of type type1. E.g. double A[2]  1st entry at A, of type MPI_DOUBLE; 2nd entry at A+8, of type MPI_DOUBLE. If type map contains n entries  similar semantics 4 Type Constructor int MPI_Type_contiguous(int count, MPI_Datatype oldtype, MPI_Datatype *newtype) MPI_TYPE_CONTIGUOUS(COUNT, OLDTYPE, NEWTYPE, IERROR) integer COUNT, OLDTYPE, NEWTYPE, IERROR newtype is a concatenation of count copies of oldtype oldtype can be a basic data type or a derived data type j A[100][80][50] Surface: A[0][:][:] MPI_Datatype face_jk; MPI_Type_contiguous(80*50, MPI_DOUBLE, &face_jk); MPI_Type_commit(&face_jk); MPI_Send(&A[0][0][0],1,face_jk,rank,tag,comm); // MPI_Send(&A[0][0][0],80*50,MPI_DOUBLE,rank,tag,comm); MPI_Send(&A[99][0][0],1,face_jk,rank,tag,comm); ... MPI_Type_free(&face_jk); i k 5 Type Constructor int MPI_Type_vector(int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Datatype *newtype) blocklength stride  count – number of blocks  blocklength – number of elements in each block, in terms of oldtype  stride – number of elements between start of each block, in terms of oldtype  oldtype – old data type, can be basic or derived data type  newtype – created new data type  Data consists of equally spaced blocks: same oldtype, same block length, same spacing in terms of oldtype  Each block is a concatenation of blocklength copies of old datatype  Spacing between blocks is stride number of oldtype. 6 Example double A[4][4]; MPI_Datatype column; MPI_Type_vector(4,1,4,MPI_DOUBLE, &column); MPI_Type_commit(&column); MPI_Send(&A[0][1],1,column,rank,tag,comm); MPI_Send(&A[0][3],1,column, rank, tag, comm); ... A[4][4] Surface: A[:][0][:] j double A[100][80][50]; MPI_Datatype face_ik; A[100][80][50] MPI_Type_vector(100,50,80*50,MPI_DOUBLE,&face_ik); MPI_Type_commit(&face_ik); MPI_Send(&A[0][0][0],1,face_ik,rank,tag,comm); MPI_Send(&A[0][1][0],1,face_ik,rank,tag,comm); MPI_Send(&A[0][79][0],1,face_ik,rank,tag,comm); ... i k 7 Type Constructor int MPI_Type_hvector(int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype) blocklength stride  Same as MPI_Type_vector, except that stride is in terms of number of bytes, not number of elements of oldtype.  blocklength is still in terms of number of elements of oldtype.  Same oldtype in different blocks; same block lengths; same spacing between neighboring blocks, but in terms of bytes (not in terms of oldtype) 8 Example double A[4][4]; MPI_Datatype column; MPI_Type_hvector(4,1,4*sizeof(double), MPI_DOUBLE, &column); MPI_Type_commit(&column); MPI_Send(&A[0][1],1,column,rank,tag,comm); ... A[4][4] Surface: A[:][:][49] j double A[100][80][50]; MPI_Datatype face_ij, line_j; A[100][80][50] MPI_Type_vector(80,1,50,MPI_DOUBLE,&line_j); MPI_Type_hvector(100,1,80*50*sizeof(double), line_j, &face_ij); MPI_Type_commit(&face_ij); MPI_Send(&A[0][0][49],1,face_ij,rank,tag,comm); ... i k 9 Type Constructor int MPI_Type_indexed(int count, int *array_blocklen, int *array_disp, MPI_Datatype oldtype, MPI_Datatype *newtype) blocklen[i] disp[i]  count – number of blocks  array_blocklen – number of elements per block in term s of oldtype, dimension: count.  array_disp – displacements of each block in terms of number of elements of oldtype, dimension: count  oldtype – old data type  newtype – new data type  Data consists of count blocks of oldtype: same oldtype; different block lengths; different spacing between blocks  block i has length array_blocklen[i]  Block i has displacement array_disp[i], in terms of number of oldtype elements. 10 Example 1 2 3 4 Upper triangle of matrix A[4][4] 5 6 7 8 double A[4][4]; MPI_Datatype upper_tri; int blocklen[4], disp[4]; int i; for(i=0;i<4;i++) { blocklen[i] = 4-i; disp[i] = (4+1)*i; } MPI_Type_indexed(4,blocklen,disp,MPI_DOUBLE,&upper_tri); MPI_Type_commit(&upper_tri); MPI_Send(&A[0][0], 1, upper_tri, rank, tag, comm); ... 9 10 11 12 13 14 15 16 // Strict lower triangular MPI_Type lower_tri; for(i=0;i<3;i++) { blocklen[i] = i+1; disp[i] = (i+1)*4; } MPI_Type_indexed(3, blocklen, disp, MPI_DOUBLE, &lower_tri); ... 11 Type Constructor int MPI_Type_hindexed(int count, int *array_blocklen, MPI_Aint *array_disp, MPI_Datatype oldtype, MPI_Datatype *newtype) blocklen[i] disp[i] Same as MPI_Type_indexed, except that array_disp is specified in terms of number of bytes instead of number of oldtype. Same oldtype; Different block lengths; different spacing between blocks, displacement in terms of bytes, instead of number of oldtype elements 12 Example 1 2 3 4 Upper triangle of matrix A[4][4] 5 6 7 8 double A[4][4]; 9 10 11 12 MPI_Datatype upper_tri; int blocklen[4]; 13 14 15 16 MPI_Aint disp[4]; int i; for(i=0;i<4;i++) { blocklen[i] = 4-i; disp[i] = (4+1)*i*sizeof(double); } MPI_Type_hindexed(4,blocklen,disp,MPI_DOUBLE,&upper_tri); MPI_Type_commit(&upper_tri); MPI_Send(A, 1, upper_tri, rank, tag, comm); ... 13 Address Calculation int MPI_Address(void *location, MPI_Aint *address) MPI_ADDRESS(location, address) <type> location(*) integer address  Returns the address of the memory location (or variable)  The difference between two addresses gives the number of bytes between these two memory locations.  Address is different from pointers in C/C++  Cannot do pointer subtraction  Pointer + (or -) an integer n  new location: n*sizeof(data-type) Struct _tagStudent{ int id; double grade; char note[100]; } A_Student; MPI_Aint addr1, addr2, disp; MPI_Address(&A_Student.id, &addr1); MPI_Address(&A_Student.grade, &addr2); Disp = addr2 – addr1; 14 Type Constructor Int MPI_Type_struct(int count, int *array_blocklen, MPI_Aint *array_disp, int *array_types, MPI_Datatype *new type) blocklen[i] type[i] disp[i]  count – number of blocks  array_blocklen – array, number of elements in each block, in terms of oldtype; dimension: count  array_disp – array, displacements of each block, in terms of number of bytes; dimension: count  array_types, array, data types of each block; dimension: count  newtype – new data type  Different oldtype; different block lengths; different spacing between blocks, displacement in terms of bytes  Each block may have different data types  Most general 15 struct _tagStudent { int id; double grade; char note[100]; }; Example struct _tagStudent Students[25]; MPI_Datatype one_student, all_students; int block_len[3]; MPI_Datatype types[3]; MPI_Aint disp[3]; block_len[0] = block_len[1] = 1; block_len[2] = 100; types[0] = MPI_INT; types[1] = MPI_DOUBLE; types[2] = MPI_CHAR; MPI_Address(&Students[0].id, &disp[0]); // memory address MPI_Address(&Students[0].grade, &disp[1]); MPI_Address(&Students[0].note[0],&disp[2]); disp[1] = disp[1]-disp[0]; disp[2] = disp[2]-disp[0]; disp[0] = 0; MPI_Type_struct(3, block_len, disp, types, &one_student); MPI_Type_contiguous(25, one_student, &all_students); MPI_Type_commit(&all_students); MPI_Send(Students, 1, all_students, rank, tag, comm); // MPI_Type_commit(&one_student); // MPI_Send(Students, 25, one_student, rank, tag, comm); ... 16 Type Extent  “Length” of a data type in terms of bytes  E.g. double – MPI_DOUBLE – extent is 8 or sizeof(double)  int – MPI_INT – extent is 2 or sizeof(int)  Situation more complex for derived data types; There are two cases  Case 1: derived data types encountered so far (no boundary markers MPI_UB or MPI_LB)  Distance between first byte and the last byte of data type, plus some increment for memory alignment. • Memory alignment: A basic data type of length n will only be allocated in memory starting from an address of a multiple of n {(MPI_DOUBLE,0), (MPI_CHAR, 8)} Double – 8 bytes, byte 0-7 Char – 1 byte, byte 8 Increment – 7 bytes, to round off to next multiple of 8 Extent is: 8+1+7 = 16 17 Type Extent  Case 2: boundary marker(s) appear in data type definition  Pre-defined type MPI_LB marks lower boundary of data type; MPI_UB marks upper boundary of data type.  Length of MPI_LB and MPI_UB is zero.  Extent: distance between boundary markers  If only MPI_UB appears, extent is distance between first byte and MPI_UB  If only MPI_LB appears, extent is distance between MPI_LB and last byte, plus increment for memory alignment {(MPI_DOUBLE,0) (MPI_CHAR,8) (MPI_UB,8)} Extent of data type is 8 instead of 16. {(MPI_LB,-8) (MPI_DOUBLE,0) (MPI_CHAR,8)} Extent is: 8+8+1+7 = 24 {(MPI_LB,-8) (MPI_DOUBLE,0) (MPI_CHAR,8) (MPI_UB 9)} Extent: 9+8 = 17 Can use MPI_LB and MPI_UB to modify the extent to suit one’s needs 18 Example double A[4][4]; MPI_Datatype column; MPI_Type_vector(4,1,4,MPI_DOUBLE, &column); MPI_Type_commit(&column); // Extent of column is 13*sizeof(double)=104 bytes // Now modify extent of column to be sizeof(double)=8 using MPI_LB, MPI_UB // Create a new type, same as column, but with extent 8 // {(column, 0) (MPI_UB, 8)} MPI_Datatype modified_column; MPI_Datatype types[2]; MPI_Aint disp[2]; int block_len[2]; types[0] = column; types[1] = MPI_UB; block_len[0] = block_len[1] = 1; disp[0] = 0; disp[1] = sizeof(double); MPI_Type_struct(2, block_len, disp, types, &modified_column); // Now modified_column is same as column, but extent is sizeof(double)=8. 19 Type Extent is Important Concatenation of derived data types is based on their type extent extent extent A_type B_type MPI_Send(buf, 2, A_type, …); or MPI_Type_contiguous(2, A_type, &B_type); Modify extent of A_type using MPI_UB, MPI_LB extent A_type extent B_type 20 Example extent extent A_type 1.0 2.0 3.0 4.0 5.0 6.0 MPI_Send(buf,2,A_type,...) … Actual data send out: 4 numbers: 1.0, 3.0, 4.0, 6.0 buf extent extent A_type MPI_Send(buf,2,A_type,...) MPI_Send(buf, 4, MPI_DOUBLE, ...) Actual data sent out: 4 numbers: 1.0, 3.0, 2.0, 4.0 Actual data sent out: 4 numbers: 1.0, 2.0, 3.0, 4.0 21 Data arrived: 4 numbers: 1.0, 2.0, 3.0, 4.0 Example extent extent A_type 1.0 2.0 3.0 4.0 MPI_Recv(buf,2,A_type,...) … buf extent extent A_type MPI_Recv(buf,2,A_type,...) 1.0 buf 3.0 2.0 4.0 MPI_Recv(buf, 4, MPI_DOUBLE, ...) 1.0 buf 2.0 3.0 4.0 22 Type Commit & Free int MPI_Type_commit(MPI_Datatype &datatype) int MPI_Type_free(MPI_Datatype &datatype) A derived data type must be committed before being used in communication. Once committed, can be used comm routines same as pre-defined data types. If not used any more, need to free the derived data type 23 Type Matching Type matching rules need to be generalized with derived data types New rule: the type signature of the data sent must match the type signature of the that specified in receive routine Sequence of basic data types must match Number of basic elements in message sent can be smaller than that specified in receive, but must match. 24 Example A 1.0 B 1.0 C 1.0 D 1.0 2.0 3.0 2.0 4.0 3.0 4.0 Cpu 0: A  cpu 1: B Cpu 0: C  cpu 1: D 2.0 2.0 double A[4], B[8]; double C[2], D[8]; int my_rank; ... MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Datatype recv_type; if(my_rank==1) { MPI_Type_vector(4, 1, 2, MPI_DOUBLE, &recv_type); MPI_Commit(&recv_type); MPI_Recv(B, 1, recv_type, 0, tag, MPI_COMM_WORLD, &stat); MPI_Recv(D, 1, recv_type, 0, tag, MPI_COMM_WORLD, &stat); MPI_Type_free(&recv_type); } else if (my_rank==0) { MPI_Send(A, 4, MPI_DOUBLE, 1, tag, MPI_COMM_WORLD); MPI_Send(C, 2, MPI_DOUBLE, 1, tag, MPI_COMM_WORLD); } 25 Example B A 1.0 C 1.0 2.0 3.0 2.0 3.0 double A[N][N], B[N][N], C[N]; MPI_Datatype diag; ... MPI_Type_vector(N, 1, N+1, MPI_DOUBLE, &diag); MPI_Type_commit(&diag); if(my_rank==0) { MPI_Send(&A[0][0], 1, diag, 1, tag, MPI_COMM_WORLD); MPI_Send(&A[0][0], 1, diag, 1, tag, MPI_COMM_WORLD); } else if(my_rank==1) { MPI_Recv(&B[0][0], 1, diag, 0, tag, MPI_COMM_WORLD, &stat); MPI_Recv(&C[0], N, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &stat); } MPI_Type_free(&diag); 26 Example Cpu0: A^T  cpu 1: B B A 1.0 2.0 3.0 1.0 4.0 7.0 4.0 5.0 6.0 2.0 5.0 8.0 7.0 8.0 9.0 3.0 6.0 9.0 double A[N][N], B[N][N]; MPI_Datatype column, mat_transpose; ... MPI_Type_vector(N, 1, N, MPI_DOUBLE, &column); MPI_Type_hvector(N, 1, sizeof(double), column, &mat_transpose); // MPI_Datatype column_modified, types[2]; // int block_len[2]; // MPI_Aint disp[2]; // types[0] = column; types[1] = MPI_UB; // block_len[0] = block_len[1] = 1; // disp[0] = 0; disp[1] = sizeof(double); // MPI_Type_struct(2,block_len,disp,types,&column_modified); // MPI_Type_contiguous(N, column_modified, &mat_transpose); MPI_Type_commit(&mat_transpose); if(my_rank==0) { MPI_Send(&A[0][0], N*N, MPI_DOUBLE, 1, tag, MPI_COMM_WORLD); } else if(my_rank==1) { MPI_Recv(&B[0][0], 1, mat_transpose, 0, tag, MPI_COMM_WORLD, &stat); } MPI_Type_free(&mat_transpose); 27 Matrix Transpose Revisited A B T A11 A12 A13 A11 A21 A22 A23 A12 A31 A32 A33 A21 T A31 T A22 T A32 A13 A23 A33 T T T B = AT B also distributed on P cpus Rwo-wise decomposition Aij – (N/P)x(N/P) matrices Bij=AjiT Local transpose A11T A12T A13T A21T A22T A23T T T A – NxN matrix Distributed on P cpus Row-wise decomposition All-to-all Input: A[i]][j] = 2*i+j A31T A32T A33T 28 Example: Matrix Transpose 0 1 2 3 0 4 0 4 4 5 6 7 1 5 1 5 0 1 2 3 2 6 2 6 4 5 6 7 3 7 3 7 Three steps: 1. Divide A into blocks; 2. Transpose each block locally; 3. All-to-all comm; 4. Merge blocks locally; On each cpu, A is (N/P)xN matrix; First need to first re-write to P blocks of (N/P)x(N/P) matrices, then can do local transpose A: 2x4 0 1 2 3 4 5 6 7 Two 2x2 blocks 0 1 4 5 2 3 6 7 After all-to-all comm, have P blocks of (N/P)x(N/P) matrices; Need to merge into a (N/P)xN matrix 29 Transpose A extent B All-to-all Read data column by column Receive data block by block Need to be careful about extent Careful about extent Create derived data types for send and receive; No additional local manipulations 30 #include #include #include #include <stdio.h> <string.h> <mpi.h> "dmath.h" #define DIM 1000 // global A[DIM], B[DIM] Matrix Transposition int main(int argc, char **argv) { int ncpus, my_rank, i, j, iblock; int Nx, Ny; // Nx=DIM/ncpus, Ny=DIM, local array: A[Nx][Ny], B[Nx][Ny] double **A, **B; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &ncpus); if(DIM%ncpus != 0) { // make sure DIM can be divided by ncpus if(my_rank==0) printf("ERROR: DIM cannot be divided by ncpus!\n"); MPI_Finalize(); return -1; } Nx = DIM/ncpus; Ny = DIM; A = DMath::newD(Nx, Ny); // allocate memory B = DMath::newD(Nx, Ny); for(i=0;i<Nx;i++) for(j=0;j<Ny;j++) A[i][j] = 2*(my_rank*Nx+i) + j; memset(&B[0][0], '\0', sizeof(double)*Nx*Ny); // zero out B 31 // Create derived data types MPI_Datatype type_send, type_recv; MPI_Datatype type_line1, type_block; MPI_Aint displ[2]; MPI_Datatype types[2]; int block_len[2]; MPI_Type_vector(Nx, 1, Ny, MPI_DOUBLE, &type_line1); // a column in A types[0] = type_line1; types[1] = MPI_UB; // modify the extent of column to be 1 double block_len[0] = block_len[1] = 1; displ[0] = 0; displ[1] = sizeof(double); MPI_Type_struct(2, block_len, displ, types, &type_send); // modified column MPI_Type_commit(&type_send); // Now A is a concatenation of type_send MPI_Type_vector(Nx, Nx, Ny, MPI_DOUBLE, &type_block); // submatrix block types[0] = type_block; types[1] = MPI_UB; // modify extent of type_block block_len[0] = block_len[1] = 1; displ[0] = 0; displ[1] = Nx*sizeof(double); MPI_Type_struct(2, block_len, displ, types, &type_recv); // modified block MPI_Type_commit(&type_recv); // Now B is a cancatenation of type_recv // send/recv data MPI_Alltoall(&A[0][0], Nx, type_send, &B[0][0], 1, type_recv, MPI_COMM_WORLD); // clean up MPI_Type_free(&type_send); MPI_Type_free(&type_recv); DMath::del(A); DMath::del(B); MPI_Finalize(); return 0; } 32

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Top subcategories

Download Lecture 7