Download Lecture 7

Survey
yes no Was this document useful for you?
   Thank you for your participation!

* Your assessment is very important for improving the work of artificial intelligence, which forms the content of this project

Document related concepts
no text concepts found
Transcript
Why Derived Data Types
j
A[100][80][50]
struct _tagStudent {
int id;
double grade;
char note[100];
};
i
struct _tagStudent Students[25];
k
Surface [i][j][0]
 Message data contains different data types
 Can use several separate messages  performance may not be
good
 Message data involves non-contiguous memory
locations
 Can copy non-contiguous data to a contiguous storage, then
communicate  additional memory copies
1
Derived Data Type
MPI’s solution: derived data type
No additional memory copy
Transfer directly of data with various shape and size
Idea: Specify the memory layout of data and
corresponding basic data types.
Usage:
Construct derived data type
Commit derived data type
Use it in communication routines where
MPI_Datatype argument is required.
Free derived data type
2
Type Map & Type Signature
 A general data type consists of
 A sequence of basic data types
 A sequence of byte displacements
 Type map: sequence of pairs (basic data type,
displacement) for the general data type
 E.g. double A[2]  {(MPI_DOUBLE,0),
(MPI_DOUBLE,8)}
 _tagStudent  {(MPI_INT,0), (MPI_DOUBLE,8),
(MPI_CHAR,16), …}
 Type signature: sequence of basic data types for the
general data type
 E.g. double A[2]  {MPI_DOUBLE, MPI_DOUBLE}
 _tagStudent  {MPI_INT, MPI_DOUBLE, MPI_CHAR …}
3
Communication Buffer
Given a type map
{(type0,disp0),(type1,disp1)} and
base address buf, the communication buffer:
Consists of 2 entries
1st entry at address buf+disp0, of type type0;
2nd entry at address buf+disp1, of type type1.
E.g. double A[2]  1st entry at A, of type
MPI_DOUBLE; 2nd entry at A+8, of type
MPI_DOUBLE.
If type map contains n entries  similar
semantics
4
Type Constructor
int MPI_Type_contiguous(int count, MPI_Datatype oldtype,
MPI_Datatype *newtype)
MPI_TYPE_CONTIGUOUS(COUNT, OLDTYPE, NEWTYPE, IERROR)
integer COUNT, OLDTYPE, NEWTYPE, IERROR
newtype is a concatenation of count copies of
oldtype
oldtype can be a basic data type or a derived
data type
j
A[100][80][50]
Surface: A[0][:][:]
MPI_Datatype face_jk;
MPI_Type_contiguous(80*50, MPI_DOUBLE, &face_jk);
MPI_Type_commit(&face_jk);
MPI_Send(&A[0][0][0],1,face_jk,rank,tag,comm);
// MPI_Send(&A[0][0][0],80*50,MPI_DOUBLE,rank,tag,comm);
MPI_Send(&A[99][0][0],1,face_jk,rank,tag,comm);
...
MPI_Type_free(&face_jk);
i
k
5
Type Constructor
int MPI_Type_vector(int count, int blocklength, int stride,
MPI_Datatype oldtype, MPI_Datatype *newtype)
blocklength
stride
 count – number of blocks
 blocklength – number of elements in each block, in terms of oldtype
 stride – number of elements between start of each block, in terms of
oldtype
 oldtype – old data type, can be basic or derived data type
 newtype – created new data type
 Data consists of equally spaced blocks: same oldtype, same block
length, same spacing in terms of oldtype
 Each block is a concatenation of blocklength copies of old datatype
 Spacing between blocks is stride number of oldtype.
6
Example
double A[4][4];
MPI_Datatype column;
MPI_Type_vector(4,1,4,MPI_DOUBLE, &column);
MPI_Type_commit(&column);
MPI_Send(&A[0][1],1,column,rank,tag,comm);
MPI_Send(&A[0][3],1,column, rank, tag, comm);
...
A[4][4]
Surface: A[:][0][:]
j
double A[100][80][50];
MPI_Datatype face_ik;
A[100][80][50]
MPI_Type_vector(100,50,80*50,MPI_DOUBLE,&face_ik);
MPI_Type_commit(&face_ik);
MPI_Send(&A[0][0][0],1,face_ik,rank,tag,comm);
MPI_Send(&A[0][1][0],1,face_ik,rank,tag,comm);
MPI_Send(&A[0][79][0],1,face_ik,rank,tag,comm);
...
i
k
7
Type Constructor
int MPI_Type_hvector(int count, int blocklength, MPI_Aint stride,
MPI_Datatype oldtype, MPI_Datatype *newtype)
blocklength
stride
 Same as MPI_Type_vector, except that stride is in
terms of number of bytes, not number of elements of
oldtype.
 blocklength is still in terms of number of elements of
oldtype.
 Same oldtype in different blocks; same block lengths;
same spacing between neighboring blocks, but in terms
of bytes (not in terms of oldtype)
8
Example
double A[4][4];
MPI_Datatype column;
MPI_Type_hvector(4,1,4*sizeof(double),
MPI_DOUBLE, &column);
MPI_Type_commit(&column);
MPI_Send(&A[0][1],1,column,rank,tag,comm);
...
A[4][4]
Surface: A[:][:][49]
j
double A[100][80][50];
MPI_Datatype face_ij, line_j;
A[100][80][50]
MPI_Type_vector(80,1,50,MPI_DOUBLE,&line_j);
MPI_Type_hvector(100,1,80*50*sizeof(double),
line_j, &face_ij);
MPI_Type_commit(&face_ij);
MPI_Send(&A[0][0][49],1,face_ij,rank,tag,comm);
...
i
k
9
Type Constructor
int MPI_Type_indexed(int count, int *array_blocklen, int *array_disp,
MPI_Datatype oldtype, MPI_Datatype *newtype)
blocklen[i]
disp[i]
 count – number of blocks
 array_blocklen – number of elements per block in term s of oldtype,
dimension: count.
 array_disp – displacements of each block in terms of number of elements
of oldtype, dimension: count
 oldtype – old data type
 newtype – new data type
 Data consists of count blocks of oldtype: same oldtype; different
block lengths; different spacing between blocks
 block i has length array_blocklen[i]
 Block i has displacement array_disp[i], in terms of number of oldtype
elements.
10
Example
1
2
3
4
Upper triangle of matrix A[4][4]
5
6
7
8
double A[4][4];
MPI_Datatype upper_tri;
int blocklen[4], disp[4];
int i;
for(i=0;i<4;i++) {
blocklen[i] = 4-i;
disp[i] = (4+1)*i;
}
MPI_Type_indexed(4,blocklen,disp,MPI_DOUBLE,&upper_tri);
MPI_Type_commit(&upper_tri);
MPI_Send(&A[0][0], 1, upper_tri, rank, tag, comm);
...
9
10 11 12
13 14 15 16
// Strict lower triangular
MPI_Type lower_tri;
for(i=0;i<3;i++) {
blocklen[i] = i+1;
disp[i] = (i+1)*4;
}
MPI_Type_indexed(3, blocklen, disp, MPI_DOUBLE, &lower_tri);
...
11
Type Constructor
int MPI_Type_hindexed(int count, int *array_blocklen,
MPI_Aint *array_disp,
MPI_Datatype oldtype, MPI_Datatype *newtype)
blocklen[i]
disp[i]
Same as MPI_Type_indexed, except that
array_disp is specified in terms of number of
bytes instead of number of oldtype.
Same oldtype; Different block lengths; different
spacing between blocks, displacement in terms
of bytes, instead of number of oldtype elements
12
Example
1
2
3
4
Upper triangle of matrix A[4][4]
5
6
7
8
double A[4][4];
9 10 11 12
MPI_Datatype upper_tri;
int blocklen[4];
13 14 15 16
MPI_Aint disp[4];
int i;
for(i=0;i<4;i++) {
blocklen[i] = 4-i;
disp[i] = (4+1)*i*sizeof(double);
}
MPI_Type_hindexed(4,blocklen,disp,MPI_DOUBLE,&upper_tri);
MPI_Type_commit(&upper_tri);
MPI_Send(A, 1, upper_tri, rank, tag, comm);
...
13
Address Calculation
int MPI_Address(void *location, MPI_Aint *address)
MPI_ADDRESS(location, address)
<type> location(*)
integer address
 Returns the address of the memory location (or variable)
 The difference between two addresses gives the number of bytes
between these two memory locations.
 Address is different from pointers in C/C++
 Cannot do pointer subtraction
 Pointer + (or -) an integer n  new location: n*sizeof(data-type)
Struct _tagStudent{
int id;
double grade;
char note[100];
} A_Student;
MPI_Aint addr1, addr2, disp;
MPI_Address(&A_Student.id, &addr1);
MPI_Address(&A_Student.grade, &addr2);
Disp = addr2 – addr1;
14
Type Constructor
Int MPI_Type_struct(int count, int *array_blocklen, MPI_Aint *array_disp,
int *array_types, MPI_Datatype *new type)
blocklen[i]
type[i]
disp[i]
 count – number of blocks
 array_blocklen – array, number of elements in each block, in terms of
oldtype; dimension: count
 array_disp – array, displacements of each block, in terms of number of
bytes; dimension: count
 array_types, array, data types of each block; dimension: count
 newtype – new data type
 Different oldtype; different block lengths; different spacing between blocks,
displacement in terms of bytes
 Each block may have different data types
 Most general
15
struct _tagStudent {
int id;
double grade;
char note[100];
};
Example
struct _tagStudent Students[25];
MPI_Datatype one_student, all_students;
int block_len[3];
MPI_Datatype types[3];
MPI_Aint disp[3];
block_len[0] = block_len[1] = 1;
block_len[2] = 100;
types[0] = MPI_INT;
types[1] = MPI_DOUBLE;
types[2] = MPI_CHAR;
MPI_Address(&Students[0].id, &disp[0]); // memory address
MPI_Address(&Students[0].grade, &disp[1]);
MPI_Address(&Students[0].note[0],&disp[2]);
disp[1] = disp[1]-disp[0];
disp[2] = disp[2]-disp[0];
disp[0] = 0;
MPI_Type_struct(3, block_len, disp, types, &one_student);
MPI_Type_contiguous(25, one_student, &all_students);
MPI_Type_commit(&all_students);
MPI_Send(Students, 1, all_students, rank, tag, comm);
// MPI_Type_commit(&one_student);
// MPI_Send(Students, 25, one_student, rank, tag, comm);
...
16
Type Extent
 “Length” of a data type in terms of bytes
 E.g. double – MPI_DOUBLE – extent is 8 or sizeof(double)
 int – MPI_INT – extent is 2 or sizeof(int)
 Situation more complex for derived data types; There are
two cases
 Case 1: derived data types encountered so far (no
boundary markers MPI_UB or MPI_LB)
 Distance between first byte and the last byte of data type, plus
some increment for memory alignment.
• Memory alignment: A basic data type of length n will only be
allocated in memory starting from an address of a multiple of n
{(MPI_DOUBLE,0), (MPI_CHAR, 8)}
Double – 8 bytes, byte 0-7
Char – 1 byte, byte 8
Increment – 7 bytes, to round off to next multiple of 8
Extent is: 8+1+7 = 16
17
Type Extent
 Case 2: boundary marker(s) appear in data type
definition
 Pre-defined type MPI_LB marks lower boundary of data type;
MPI_UB marks upper boundary of data type.
 Length of MPI_LB and MPI_UB is zero.
 Extent: distance between boundary markers
 If only MPI_UB appears, extent is distance between first byte and
MPI_UB
 If only MPI_LB appears, extent is distance between MPI_LB and
last byte, plus increment for memory alignment
{(MPI_DOUBLE,0) (MPI_CHAR,8) (MPI_UB,8)}
Extent of data type is 8 instead of 16.
{(MPI_LB,-8) (MPI_DOUBLE,0) (MPI_CHAR,8)}
Extent is: 8+8+1+7 = 24
{(MPI_LB,-8) (MPI_DOUBLE,0) (MPI_CHAR,8) (MPI_UB 9)}
Extent: 9+8 = 17
Can use MPI_LB and MPI_UB to modify the extent to suit one’s needs
18
Example
double A[4][4];
MPI_Datatype column;
MPI_Type_vector(4,1,4,MPI_DOUBLE, &column);
MPI_Type_commit(&column);
// Extent of column is 13*sizeof(double)=104 bytes
// Now modify extent of column to be sizeof(double)=8 using MPI_LB, MPI_UB
// Create a new type, same as column, but with extent 8
//
{(column, 0) (MPI_UB, 8)}
MPI_Datatype modified_column;
MPI_Datatype types[2];
MPI_Aint disp[2];
int block_len[2];
types[0] = column;
types[1] = MPI_UB;
block_len[0] = block_len[1] = 1;
disp[0] = 0;
disp[1] = sizeof(double);
MPI_Type_struct(2, block_len, disp, types, &modified_column);
// Now modified_column is same as column, but extent is sizeof(double)=8.
19
Type Extent is Important
Concatenation of derived data types is
based on their type extent
extent
extent
A_type
B_type
MPI_Send(buf, 2, A_type, …);
or
MPI_Type_contiguous(2, A_type, &B_type);
Modify extent of A_type using MPI_UB, MPI_LB
extent
A_type
extent
B_type
20
Example
extent
extent
A_type
1.0
2.0
3.0
4.0
5.0
6.0
MPI_Send(buf,2,A_type,...)
…
Actual data send out:
4 numbers: 1.0, 3.0, 4.0, 6.0
buf
extent
extent
A_type
MPI_Send(buf,2,A_type,...)
MPI_Send(buf, 4, MPI_DOUBLE, ...)
Actual data sent out:
4 numbers: 1.0, 3.0, 2.0, 4.0
Actual data sent out:
4 numbers: 1.0, 2.0, 3.0, 4.0
21
Data arrived:
4 numbers: 1.0, 2.0, 3.0, 4.0
Example
extent
extent
A_type
1.0
2.0
3.0
4.0
MPI_Recv(buf,2,A_type,...)
…
buf
extent
extent
A_type
MPI_Recv(buf,2,A_type,...)
1.0
buf
3.0
2.0
4.0
MPI_Recv(buf, 4, MPI_DOUBLE, ...)
1.0
buf
2.0
3.0
4.0
22
Type Commit & Free
int MPI_Type_commit(MPI_Datatype &datatype)
int MPI_Type_free(MPI_Datatype &datatype)
A derived data type must be committed
before being used in communication.
Once committed, can be used comm
routines same as pre-defined data types.
If not used any more, need to free the
derived data type
23
Type Matching
Type matching rules need to be
generalized with derived data types
New rule: the type signature of the data
sent must match the type signature of the
that specified in receive routine
Sequence of basic data types must match
Number of basic elements in message sent
can be smaller than that specified in receive,
but must match.
24
Example
A
1.0
B
1.0
C
1.0
D
1.0
2.0
3.0
2.0
4.0
3.0
4.0
Cpu 0: A  cpu 1: B
Cpu 0: C  cpu 1: D
2.0
2.0
double A[4], B[8];
double C[2], D[8];
int my_rank;
...
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Datatype recv_type;
if(my_rank==1) {
MPI_Type_vector(4, 1, 2, MPI_DOUBLE, &recv_type);
MPI_Commit(&recv_type);
MPI_Recv(B, 1, recv_type, 0, tag, MPI_COMM_WORLD, &stat);
MPI_Recv(D, 1, recv_type, 0, tag, MPI_COMM_WORLD, &stat);
MPI_Type_free(&recv_type);
}
else if (my_rank==0) {
MPI_Send(A, 4, MPI_DOUBLE, 1, tag, MPI_COMM_WORLD);
MPI_Send(C, 2, MPI_DOUBLE, 1, tag, MPI_COMM_WORLD);
}
25
Example
B
A
1.0
C
1.0
2.0
3.0
2.0
3.0
double A[N][N], B[N][N], C[N];
MPI_Datatype diag;
...
MPI_Type_vector(N, 1, N+1, MPI_DOUBLE, &diag);
MPI_Type_commit(&diag);
if(my_rank==0) {
MPI_Send(&A[0][0], 1, diag, 1, tag, MPI_COMM_WORLD);
MPI_Send(&A[0][0], 1, diag, 1, tag, MPI_COMM_WORLD);
}
else if(my_rank==1) {
MPI_Recv(&B[0][0], 1, diag, 0, tag, MPI_COMM_WORLD, &stat);
MPI_Recv(&C[0], N, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &stat);
}
MPI_Type_free(&diag);
26
Example
Cpu0: A^T  cpu 1: B
B
A
1.0
2.0
3.0
1.0
4.0
7.0
4.0
5.0
6.0
2.0
5.0
8.0
7.0
8.0
9.0
3.0
6.0
9.0
double A[N][N], B[N][N];
MPI_Datatype column, mat_transpose;
...
MPI_Type_vector(N, 1, N, MPI_DOUBLE, &column);
MPI_Type_hvector(N, 1, sizeof(double), column, &mat_transpose);
// MPI_Datatype column_modified, types[2];
// int block_len[2];
// MPI_Aint disp[2];
// types[0] = column; types[1] = MPI_UB;
// block_len[0] = block_len[1] = 1;
// disp[0] = 0; disp[1] = sizeof(double);
// MPI_Type_struct(2,block_len,disp,types,&column_modified);
// MPI_Type_contiguous(N, column_modified, &mat_transpose);
MPI_Type_commit(&mat_transpose);
if(my_rank==0) {
MPI_Send(&A[0][0], N*N, MPI_DOUBLE, 1, tag, MPI_COMM_WORLD);
}
else if(my_rank==1) {
MPI_Recv(&B[0][0], 1, mat_transpose, 0, tag, MPI_COMM_WORLD, &stat);
}
MPI_Type_free(&mat_transpose);
27
Matrix Transpose Revisited
A
B
T
A11
A12
A13
A11
A21
A22
A23
A12
A31
A32
A33
A21
T
A31
T
A22
T
A32
A13
A23
A33
T
T
T
B = AT
B also distributed on P cpus
Rwo-wise decomposition
Aij – (N/P)x(N/P) matrices
Bij=AjiT
Local transpose
A11T A12T A13T
A21T A22T A23T
T
T
A – NxN matrix
Distributed on P cpus
Row-wise decomposition
All-to-all
Input:
A[i]][j] = 2*i+j
A31T A32T A33T
28
Example: Matrix Transpose
0
1
2
3
0
4
0
4
4
5
6
7
1
5
1
5
0
1
2
3
2
6
2
6
4
5
6
7
3
7
3
7
Three steps:
1. Divide A into blocks;
2. Transpose each
block locally;
3. All-to-all comm;
4. Merge blocks locally;
On each cpu, A is (N/P)xN matrix; First need to first re-write
to P blocks of (N/P)x(N/P) matrices, then can do local
transpose
A: 2x4
0
1
2
3
4
5
6
7
Two 2x2
blocks
0
1
4
5
2
3
6
7
After all-to-all comm, have P
blocks of (N/P)x(N/P) matrices;
Need to merge into a (N/P)xN
matrix
29
Transpose
A
extent
B
All-to-all
Read data column by column
Receive data block by block
Need to be careful about extent
Careful about extent
Create derived data types for send and receive; No additional local
manipulations
30
#include
#include
#include
#include
<stdio.h>
<string.h>
<mpi.h>
"dmath.h"
#define DIM 1000 // global A[DIM], B[DIM]
Matrix
Transposition
int main(int argc, char **argv)
{
int ncpus, my_rank, i, j, iblock;
int Nx, Ny; // Nx=DIM/ncpus, Ny=DIM, local array: A[Nx][Ny], B[Nx][Ny]
double **A, **B;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
if(DIM%ncpus != 0) { // make sure DIM can be divided by ncpus
if(my_rank==0)
printf("ERROR: DIM cannot be divided by ncpus!\n");
MPI_Finalize();
return -1;
}
Nx = DIM/ncpus;
Ny = DIM;
A = DMath::newD(Nx, Ny); // allocate memory
B = DMath::newD(Nx, Ny);
for(i=0;i<Nx;i++)
for(j=0;j<Ny;j++) A[i][j] = 2*(my_rank*Nx+i) + j;
memset(&B[0][0], '\0', sizeof(double)*Nx*Ny); // zero out B
31
// Create derived data types
MPI_Datatype type_send, type_recv;
MPI_Datatype type_line1, type_block;
MPI_Aint displ[2];
MPI_Datatype types[2];
int block_len[2];
MPI_Type_vector(Nx, 1, Ny, MPI_DOUBLE, &type_line1); // a column in A
types[0] = type_line1; types[1] = MPI_UB; // modify the extent of column to be 1 double
block_len[0] = block_len[1] = 1;
displ[0] = 0; displ[1] = sizeof(double);
MPI_Type_struct(2, block_len, displ, types, &type_send); // modified column
MPI_Type_commit(&type_send); // Now A is a concatenation of type_send
MPI_Type_vector(Nx, Nx, Ny, MPI_DOUBLE, &type_block); // submatrix block
types[0] = type_block; types[1] = MPI_UB; // modify extent of type_block
block_len[0] = block_len[1] = 1;
displ[0] = 0; displ[1] = Nx*sizeof(double);
MPI_Type_struct(2, block_len, displ, types, &type_recv); // modified block
MPI_Type_commit(&type_recv); // Now B is a cancatenation of type_recv
// send/recv data
MPI_Alltoall(&A[0][0], Nx, type_send, &B[0][0], 1, type_recv, MPI_COMM_WORLD);
// clean up
MPI_Type_free(&type_send);
MPI_Type_free(&type_recv);
DMath::del(A);
DMath::del(B);
MPI_Finalize();
return 0;
}
32
Related documents