diff options
author | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2008-04-14 14:14:59 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2008-04-14 14:14:59 +0400 |
commit | 6da7b60cd3394259d503e2280d59e48306c42fb2 (patch) | |
tree | 82985b29587920770b8fa184c5e99df7cde91ba3 | |
parent | 3dcb3cc4cdbf2147365c51941d8c3863c9c8890b (diff) |
Attempted fix for bug #8349: QMC raytracing being very slow on some
computers, probably due to slow multithreaded malloc. Now each render
thread keeps a list of qmc samplers that it fills as needed while
rendering (since it is hard to predict the actual amount needed in
advance due to ray recursion).
4 files changed, 77 insertions, 47 deletions
diff --git a/source/blender/render/intern/include/render_types.h b/source/blender/render/intern/include/render_types.h index 22fb2e70d87..c2d2ea96745 100644 --- a/source/blender/render/intern/include/render_types.h +++ b/source/blender/render/intern/include/render_types.h @@ -64,8 +64,10 @@ typedef struct SampleTables typedef struct QMCSampler { + struct QMCSampler *next, *prev; int type; int tot; + int used; double *samp2d; double offs[BLENDER_MAX_THREADS][2]; } QMCSampler; @@ -150,7 +152,7 @@ struct Render /* samples */ SampleTables *samples; float jit[32][2]; - QMCSampler *qsa; + ListBase *qmcsamplers; /* shadow counter, detect shadow-reuse for shaders */ int shadowsamplenr[BLENDER_MAX_THREADS]; @@ -455,7 +457,6 @@ typedef struct LampRen { struct ShadBuf *shb; float *jitter; - QMCSampler *qsa; float imat[3][3]; float spottexfac; diff --git a/source/blender/render/intern/include/rendercore.h b/source/blender/render/intern/include/rendercore.h index cdc348279e5..0edca1a15a9 100644 --- a/source/blender/render/intern/include/rendercore.h +++ b/source/blender/render/intern/include/rendercore.h @@ -104,9 +104,7 @@ extern void ray_trace(ShadeInput *, ShadeResult *); extern void ray_ao(ShadeInput *, float *); extern void init_jitter_plane(LampRen *lar); extern void init_ao_sphere(struct World *wrld); -extern void init_lamp_hammersley(LampRen *lar); -extern void free_lamp_qmcsampler(LampRen *lar); -extern void init_render_hammersley(Render *re); +extern void init_render_qmcsampler(Render *re); extern void free_render_qmcsampler(Render *re); #endif /* RENDER_EXT_H */ diff --git a/source/blender/render/intern/source/convertblender.c b/source/blender/render/intern/source/convertblender.c index 0f741e8e358..c2bb6229131 100644 --- a/source/blender/render/intern/source/convertblender.c +++ b/source/blender/render/intern/source/convertblender.c @@ -3684,9 +3684,6 @@ static GroupObject *add_render_lamp(Render *re, Object *ob) if(re->r.mode & R_SHADOW) { - if ((lar->mode & LA_SHAD_RAY) && (lar->ray_samp_method == LA_SAMP_HAMMERSLEY)) { - init_lamp_hammersley(lar); - } if(la->type==LA_AREA && (lar->mode & LA_SHAD_RAY) && (lar->ray_samp_method == LA_SAMP_CONSTANT)) { init_jitter_plane(lar); } @@ -4372,7 +4369,6 @@ void RE_Database_Free(Render *re) freeshadowbuf(lar); if(lar->jitter) MEM_freeN(lar->jitter); if(lar->shadsamp) MEM_freeN(lar->shadsamp); - if(lar->qsa) free_lamp_qmcsampler(lar); curvemapping_free(lar->curfalloff); } @@ -4410,8 +4406,7 @@ void RE_Database_Free(Render *re) re->wrld.aotables= NULL; re->scene->world->aotables= NULL; } - if((re->r.mode & R_RAYTRACE) && (re->wrld.mode & WO_AMB_OCC) && - (re->wrld.ao_samp_method == WO_AOSAMP_HAMMERSLEY) && (re->qsa)) + if(re->r.mode & R_RAYTRACE) free_render_qmcsampler(re); if(re->r.mode & R_RAYTRACE) freeraytree(re); @@ -4786,11 +4781,12 @@ void RE_Database_FromScene(Render *re, Scene *scene, int use_camera_view) } init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */ - if((re->r.mode & R_RAYTRACE) && (re->wrld.mode & WO_AMB_OCC)) { - if (re->wrld.ao_samp_method == WO_AOSAMP_HAMMERSLEY) - init_render_hammersley(re); - else if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT) - init_ao_sphere(&re->wrld); + if(re->r.mode & R_RAYTRACE) { + init_render_qmcsampler(re); + + if(re->wrld.mode & WO_AMB_OCC) + if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT) + init_ao_sphere(&re->wrld); } /* still bad... doing all */ @@ -5439,11 +5435,12 @@ void RE_Database_Baking(Render *re, Scene *scene, int type, Object *actob) } init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */ - if((re->r.mode & R_RAYTRACE) && (re->wrld.mode & WO_AMB_OCC)) { - if (re->wrld.ao_samp_method == WO_AOSAMP_HAMMERSLEY) - init_render_hammersley(re); - else if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT) - init_ao_sphere(&re->wrld); + if(re->r.mode & R_RAYTRACE) { + init_render_qmcsampler(re); + + if(re->wrld.mode & WO_AMB_OCC) + if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT) + init_ao_sphere(&re->wrld); } /* still bad... doing all */ diff --git a/source/blender/render/intern/source/rayshade.c b/source/blender/render/intern/source/rayshade.c index d423abefe96..7ffed72329e 100644 --- a/source/blender/render/intern/source/rayshade.c +++ b/source/blender/render/intern/source/rayshade.c @@ -40,8 +40,9 @@ #include "BKE_utildefines.h" #include "BLI_arithb.h" -#include "BLI_rand.h" +#include "BLI_blenlib.h" #include "BLI_jitter.h" +#include "BLI_rand.h" #include "PIL_time.h" @@ -729,8 +730,8 @@ static void hammersley_create(double *out, int n) struct QMCSampler *QMC_initSampler(int type, int tot) { - QMCSampler *qsa = MEM_mallocN(sizeof(QMCSampler), "qmc sampler"); - qsa->samp2d = MEM_mallocN(2*sizeof(double)*tot, "qmc sample table"); + QMCSampler *qsa = MEM_callocN(sizeof(QMCSampler), "qmc sampler"); + qsa->samp2d = MEM_callocN(2*sizeof(double)*tot, "qmc sample table"); qsa->tot = tot; qsa->type = type; @@ -871,27 +872,55 @@ static void QMC_sampleHemiCosine(float *vec, QMCSampler *qsa, int thread, int nu #endif /* called from convertBlenderScene.c */ -/* samples don't change per pixel, so build the samples in advance for efficiency */ -void init_lamp_hammersley(LampRen *lar) +void init_render_qmcsampler(Render *re) { - lar->qsa = QMC_initSampler(SAMP_TYPE_HAMMERSLEY, lar->ray_totsamp); + re->qmcsamplers= MEM_callocN(sizeof(ListBase)*BLENDER_MAX_THREADS, "QMCListBase"); } -void init_render_hammersley(Render *re) +QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot) { - re->qsa = QMC_initSampler(SAMP_TYPE_HAMMERSLEY, (re->wrld.aosamp * re->wrld.aosamp)); + QMCSampler *qsa; + + /* create qmc samplers as needed, since recursion makes it hard to + * predict how many are needed */ + + for(qsa=re->qmcsamplers[thread].first; qsa; qsa=qsa->next) { + if(qsa->type == type && qsa->tot == tot && !qsa->used) { + qsa->used= 1; + return qsa; + } + } + + qsa= QMC_initSampler(type, tot); + qsa->used= 1; + BLI_addtail(&re->qmcsamplers[thread], qsa); + + return qsa; } -void free_lamp_qmcsampler(LampRen *lar) +void release_thread_qmcsampler(Render *re, int thread, QMCSampler *qsa) { - QMC_freeSampler(lar->qsa); - lar->qsa = NULL; + qsa->used= 0; } void free_render_qmcsampler(Render *re) { - QMC_freeSampler(re->qsa); - re->qsa = NULL; + QMCSampler *qsa, *next; + int a; + + if(re->qmcsamplers) { + for(a=0; a<BLENDER_MAX_THREADS; a++) { + for(qsa=re->qmcsamplers[a].first; qsa; qsa=next) { + next= qsa->next; + QMC_freeSampler(qsa); + } + + re->qmcsamplers[a].first= re->qmcsamplers[a].last= NULL; + } + + MEM_freeN(re->qmcsamplers); + re->qmcsamplers= NULL; + } } static int adaptive_sample_variance(int samples, float *col, float *colsq, float thresh) @@ -968,7 +997,7 @@ static void trace_refract(float *col, ShadeInput *shi, ShadeResult *shr) else samp_type = SAMP_TYPE_HAMMERSLEY; /* all samples are generated per pixel */ - qsa = QMC_initSampler(samp_type, max_samples); + qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples); QMC_initPixel(qsa, shi->thread); } else max_samples = 1; @@ -1026,7 +1055,8 @@ static void trace_refract(float *col, ShadeInput *shi, ShadeResult *shr) col[2] /= (float)samples; col[3] /= (float)samples; - if (qsa) QMC_freeSampler(qsa); + if (qsa) + release_thread_qmcsampler(&R, shi->thread, qsa); } static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float fresnelfac) @@ -1053,7 +1083,7 @@ static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float f else samp_type = SAMP_TYPE_HAMMERSLEY; /* all samples are generated per pixel */ - qsa = QMC_initSampler(samp_type, max_samples); + qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples); QMC_initPixel(qsa, shi->thread); } else max_samples = 1; @@ -1131,7 +1161,8 @@ static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float f col[1] /= (float)samples; col[2] /= (float)samples; - if (qsa) QMC_freeSampler(qsa); + if (qsa) + release_thread_qmcsampler(&R, shi->thread, qsa); } /* extern call from render loop */ @@ -1546,9 +1577,9 @@ void ray_ao_qmc(ShadeInput *shi, float *shadfac) max_samples /= speedfac; if (max_samples < 5) max_samples = 5; - qsa = QMC_initSampler(SAMP_TYPE_HALTON, max_samples); + qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples); } else if (R.wrld.ao_samp_method==WO_AOSAMP_HAMMERSLEY) - qsa = R.qsa; + qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples); QMC_initPixel(qsa, shi->thread); @@ -1621,7 +1652,8 @@ void ray_ao_qmc(ShadeInput *shi, float *shadfac) shadfac[0]= shadfac[1]= shadfac[2]= 1.0f - fac/(float)samples; } - if ((qsa) && (qsa->type == SAMP_TYPE_HALTON)) QMC_freeSampler(qsa); + if (qsa) + release_thread_qmcsampler(&R, shi->thread, qsa); } /* extern call from shade_lamp_loop, ambient occlusion calculus */ @@ -1787,11 +1819,11 @@ static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, float *lampco, float * /* sampling init */ if (lar->ray_samp_method==LA_SAMP_HALTON) { - qsa = QMC_initSampler(SAMP_TYPE_HALTON, max_samples); - qsa_jit = QMC_initSampler(SAMP_TYPE_HALTON, max_samples); + qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples); + qsa_jit = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples); } else if (lar->ray_samp_method==LA_SAMP_HAMMERSLEY) { - qsa = lar->qsa; - qsa_jit = QMC_initSampler(SAMP_TYPE_HAMMERSLEY, max_samples); + qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples); + qsa_jit = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples); } QMC_initPixel(qsa, shi->thread); @@ -1921,8 +1953,10 @@ static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, float *lampco, float * } else shadfac[3]= 1.0f-fac/samples; - if (qsa_jit) QMC_freeSampler(qsa_jit); - if ((qsa) && (qsa->type == SAMP_TYPE_HALTON)) QMC_freeSampler(qsa); + if (qsa_jit) + release_thread_qmcsampler(&R, shi->thread, qsa_jit); + if (qsa) + release_thread_qmcsampler(&R, shi->thread, qsa); } static void ray_shadow_jitter(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec) |