Hybrid openmp mpi mandelbrot code -
im facing 1 problem: ive done mpi version of mandelbrot , works perfect. have implement hybrid version using openmp. ive put openmp parallelization in loop calculations ( inside function calculomandelbrot). if delete omp instruction ( omp parallel , omp barrier) works perfect ( mpi implementation). should work cant guess lost.
im having image of imageheight*imagewidth , every process mades part ( ex: if have image of 100 height, , 4 processes, every process calculates 25 rows, done in calculomandelbrot function). send message master results of calculation of part of every process.
the ppm results mess. dont know why... apreciated...
/ // mandelbrot.c // // // mandelbrot calculation iterate equation // z = z*z + c, z , c complex numbers, z // zero, , c coordinate of point being tested. if // magnitude of z remains less 2 ever, point // c in mandelbrot set. in code write out number of iterations // before magnitude of z exceeds 2, or uchar_max, whichever // smaller.// // // #include <stdio.h> #include <stdlib.h> #include <mpi.h> #include <omp.h> #include <math.h> #define tag_envio 3 int imagewidth = 600; int imageheight = 400; int iterations = 100000; int chunk = 1; int size; void escribirfichero(char* pixels) { int i; file *fp; fp = fopen("mandelbrotset.ppm", "w"); if (fp == null) { perror ( "unable open file" ); exit (exit_failure); } //printf("empezamos escribir en el fichero"); fprintf(fp, "p6\n# creator: mandel program\n"); fprintf(fp, "%d %d\n255\n", imagewidth, imageheight); (i = 0; < (imagewidth*3*imageheight); i++) fputc((char) pixels[i],fp); fclose(fp); } void calculomandelbrot(char *destino, int iproc, int height) { int posinici = iproc * height; int xactual, yactual; int poslocal = 0; int chunksize = height * imagewidth * 3; omp_set_dynamic(1); //each iteration, calculates: newz = oldz*oldz + p, p current pixel, , oldz stars @ origin double pr, pi; //real , imaginary part of pixel p double newre, newim, oldre, oldim; //real , imaginary parts of new , old z double zoom = 1, movex = -0.5, movey = 0; //you can change these zoom , change position int numcpu = omp_get_num_procs(); omp_set_num_threads(numcpu); if(iproc != 0) destino = (char *)malloc(sizeof(char) * chunksize); #pragma omp parallel //shared(movex, movey, zoom) private(xactual, yactual, pr, pi, newre, newim) if (numcpu>1) { #pragma omp schedule(dynamic) //, chunk for(yactual = posinici; yactual < posinici + height; yactual++) { for(xactual = 0; xactual < imagewidth; xactual++) { //calculate initial real , imaginary part of z, based on pixel location , zoom , position values pr = 1.5 * (xactual - imagewidth / 2) / (0.5 * zoom * imagewidth) + movex; pi = (yactual - imageheight / 2) / (0.5 * zoom * imageheight) + movey; newre = newim = oldre = oldim = 0; //these should start @ 0,0 //"i" represent number of iterations int i; //start iteration process for(i = 0; < iterations; i++) { //remember value of previous iteration oldre = newre; oldim = newim; //the actual iteration, real , imaginary part calculated newre = oldre * oldre - oldim * oldim + pr; newim = 2 * oldre * oldim + pi; //if point outside circle radius 2: stop if((newre * newre + newim * newim) > 4) break; } if(i == iterations) { //escribirarray(destino, poslocal, 0, 0, 0); destino[poslocal] = 0; destino[++poslocal] = 0; destino[++poslocal] = 0; ++poslocal; //me preparo para colocar siguiente. } else { double z = sqrt(newre * newre + newim * newim); int brightness = 256 * log2(1.75 + - log2(log2(z))) / log2((double)iterations); //escribirarray(envioarr, xactual, yactual, brightness, brightness, 255); destino[poslocal] = brightness; destino[++poslocal] = brightness; destino[++poslocal] = 255; ++poslocal; //me preparo para colocar siguiente } } } } #pragma omp barrier if(iproc != 0) { mpi_send(destino, chunksize, mpi_char, 0, tag_envio, mpi_comm_world); free(destino); } } void stringcopy(char *pixels, char *reciboarr, int sender, int height) { int posinici = sender * height * imagewidth*3; int pos; (pos = 0; pos < height * imagewidth*3; pos++, posinici++) { pixels[posinici] = reciboarr[pos]; } } int main(int argc, char** argv) { // mandelbrot char* pixels; // mpi int nproc, iproc; // calculos tiempo double begin; double end_calc; double end_total; if(argc >= 3) { // se supone que la lĂnia de ejecucion sera del tipo // -n 4 ${workspace_loc:mpi}/debug/mpi${build_files} 100 200 // falta provar en moore imagewidth = atoi(argv[1]); imageheight = atoi(argv[2]); } if(argc == 4) { iterations = atoi(argv[3]); } if(argc == 5) { chunk = atoi(argv[4]); } size = imageheight * imagewidth * 3; if (mpi_init(&argc, &argv) != mpi_success) { fprintf(stderr, "error al inicializar mpi.\n"); return 100; } begin = mpi_wtime(); if (mpi_comm_size(mpi_comm_world, &nproc) != mpi_success) { fprintf(stderr, "no se puede obtener el contador de procesos.\n"); mpi_finalize(); return 101; } else if (nproc < 2) { fprintf(stderr, "se necesitan almenos 2 procesos (se usan %d)\n", nproc); mpi_finalize(); return 102; } if (mpi_comm_rank(mpi_comm_world, &iproc) != mpi_success) { fprintf(stderr, "no se puede obtener el rango para el proceso.\n"); mpi_finalize(); return 103; } if ((imageheight % nproc) != 0) { printf("incompatable number of processes requested\nexiting...\n"); exit(exit_failure); } int height = imageheight/nproc; int chunksize = height * imagewidth * 3; if (iproc != 0) { char *envioarr = (char *)malloc(sizeof(char) * chunksize); calculomandelbrot(envioarr, iproc, height); } else if(iproc == 0) { printf("empezando los calculos de mandelbrot...\n"); printf("imagewidth %d imageheight %d iterations %d num procesos %d chunk %d\n", imagewidth, imageheight, iterations, nproc, chunk); pixels = (char *)malloc(size * sizeof(char)); calculomandelbrot(pixels, iproc, height); //inicio recibir el resto de mensajes char *reciboarr = (char *)malloc(sizeof(char)*chunksize); int i; mpi_status status; (i = 1; i<nproc; i++) { mpi_recv(reciboarr, height*imagewidth*3, mpi_char, mpi_any_source, tag_envio, mpi_comm_world, &status); stringcopy(pixels, reciboarr, status.mpi_source, height); } free(reciboarr); //final de recibir resto de mensajes end_calc = mpi_wtime() - begin; printf("tiempo en calculos: %.10lf segundos \n", end_calc); //mpi_barrier(mpi_comm_world); //printf("escribiendo la imagen\n"); escribirfichero(pixels); end_total = mpi_wtime() - begin; printf("tiempo en total: %.10lf segundos \n", end_total); free(pixels); } mpi_finalize(); return 0; }
just if heads same or similar problem solution was:
the begining point said in question, mpi version worked fine ( without openmp clauses). variable poslocal varies 0 180000 ( imagewidth*3*height) in case im debuggint: - imagewidth values 600 - height 100( imageheight 400 divided between 4 process )
so when introduce openmp clauses dont know why althoug using open mp barrier, poslocal final value stuck @ 175000 or near. after many tests have been able poslocal arrives 180000, making destino , poslocal shared, , xactual , yactual private. perhaps other combinations works well, works me.
thanks coincoin interest.
#pragma omp parallel shared ( destino) private ( xactual, yactual) { #pragma omp parallel schedule(static) for(yactual = posinici; yactual < posinici + height; yactual++) { for(xactual = 0; xactual < imagewidth; xactual++) { //calculate initial real , imaginary part of z, based on pixel location , zoom , position values pr = 1.5 * (xactual - imagewidth / 2) / (0.5 * zoom * imagewidth) + movex; pi = (yactual - imageheight / 2) / (0.5 * zoom * imageheight) + movey; newre = newim = oldre = oldim = 0; //these should start @ 0,0 //"i" represent number of iterations int i; //start iteration process for(i = 0; < iterations; i++) { //remember value of previous iteration oldre = newre; oldim = newim; //the actual iteration, real , imaginary part calculated newre = oldre * oldre - oldim * oldim + pr; newim = 2 * oldre * oldim + pi; //if point outside circle radius 2: stop if((newre * newre + newim * newim) > 4) break; } //printf("antes zona paralela: %d process thread: %d \n", iproc, numcpu); if(i == iterations) { //escribirarray(destino, poslocal, 0, 0, 0); destino[poslocal] = 0; destino[++poslocal] = 0; destino[++poslocal] = 0; ++poslocal; //me preparo para colocar siguiente. } else { double z = sqrt(newre * newre + newim * newim); int brightness = 256 * log2(1.75 + - log2(log2(z))) / log2((double)iterations); //escribirarray(envioarr, xactual, yactual, brightness, brightness, 255); destino[poslocal] = brightness; destino[++poslocal] = brightness; destino[++poslocal] = 255; ++poslocal; //me preparo para colocar siguiente } } } } #pragma omp barrier printf("despuess zona paralela: %d process thread: %d y poslocal %d \n", iproc, numcpu, poslocal); if(iproc != 0) { mpi_send(destino, chunksize, mpi_char, 0, tag_envio, mpi_comm_world); printf("estoy enviando en el proceso: %d y la poslocal es %d \n", iproc, poslocal); free(destino); }
Comments
Post a Comment