BGE performance, 3rd round: culling and rasterizer.

This commit extend the technique of dynamic linked list to the mesh slots so as to eliminate dumb scan or map lookup. It provides massive performance improvement in the culling and in the rasterizer when the majority of objects are static. Other improvements: - Compute the opengl matrix only for objects that are visible. - Simplify hash function for GEN_HasedPtr - Scan light list instead of general object list to render shadows - Remove redundant opengl calls to set specularity, shinyness and diffuse between each mesh slots. - Cache GPU material to avoid frequent call to GPU_material_from_blender - Only set once the fixed elements of mesh slot - Use more inline function The following table shows the performance increase between 2.48, 1st round and this round of improvement. The test was done with a scene containing 40000 objects, of which 1000 are in the view frustrum approximately. The object are simple textured cube to make sure the GPU is not the bottleneck. As some of the rasterizer processing time has moved under culling, I present the sum of scenegraph(includes culling)+rasterizer time Scenegraph+rasterizer(ms) 2.48 1st round 3rd round All objects static, 323.0 86.0 7.2 all visible, 1000 in the view frustrum All objects static, 219.0 49.7 N/A(*) all invisible. All objects moving, 323.0 105.6 34.7 all visible, 1000 in the view frustrum Scene destruction 40min 40min 4s (*) : this time is not representative because the frame rate was at 60fps. In that case, the GPU holds down the GE by frame sync. By design, the overhead of the rasterizer is 0 when the the objects are invisible. This table shows a global speed up between 9x and 45x compared to 2.48a for scenegraph, culling and rasterizer overhead. The speed up goes much higher when objects are invisible. An additional 2-4x speed up is possible in the scenegraph by upgrading the Moto library to use Eigen2 BLAS library instead of C++ classes but the scenegraph is already so fast that it is not a priority right now. Next speed up in logic: many things to do there...
author: Benoit Bolsee <benoit.bolsee@online.be> 2009-05-07 13:13:01 +0400
committer: Benoit Bolsee <benoit.bolsee@online.be> 2009-05-07 13:13:01 +0400
commit: 42557f90bd16771a5c6437dbfb3952527df7fb1a (patch)
tree: 783a84eef7c05adbea3015ecd09ec7e2b18a7cfa /source/gameengine/Rasterizer/RAS_BucketManager.cpp
parent: 779bf435ef2ba87fbcee6a28b053d97a551b8eb5 (diff)
1 files changed, 59 insertions, 1 deletions
diff --git a/source/gameengine/Rasterizer/RAS_BucketManager.cpp b/source/gameengine/Rasterizer/RAS_BucketManager.cpp
index ec290f89d9e..a111ac2786f 100644
--- a/source/gameengine/Rasterizer/RAS_BucketManager.cpp
+++ b/source/gameengine/Rasterizer/RAS_BucketManager.cpp
@@ -113,16 +113,38 @@ void RAS_BucketManager::OrderBuckets(const MT_Transform& cameratrans, BucketList
 	const MT_Vector3 pnorm(cameratrans.getBasis()[2]);
 
 	for (bit = buckets.begin(); bit != buckets.end(); ++bit)
+	{
+#if 1
+		SG_DList::iterator<RAS_MeshSlot> mit((*bit)->GetActiveMeshSlots());
+		for(mit.begin(); !mit.end(); ++mit)
+			size++;
+#else
 		for (mit = (*bit)->msBegin(); mit != (*bit)->msEnd(); ++mit)
 			if (!mit->IsCulled())
 				size++;
+#endif
+	}
 
 	slots.resize(size);
 
 	for (bit = buckets.begin(); bit != buckets.end(); ++bit)
+	{
+#if 1
+		RAS_MaterialBucket* bucket = *bit;
+		RAS_MeshSlot* ms;
+		// remove the mesh slot form the list, it culls them automatically for next frame
+		for(ms = bucket->GetNextActiveMeshSlot();
+			ms!= NULL;
+			ms = bucket->GetNextActiveMeshSlot())
+		{
+			slots[i++].set(ms, bucket, pnorm);
+		}
+#else
 		for (mit = (*bit)->msBegin(); mit != (*bit)->msEnd(); ++mit)
 			if (!mit->IsCulled())
 				slots[i++].set(&*mit, *bit, pnorm);
+#endif
+	}
 		
 	if(alpha)
 		sort(slots.begin(), slots.end(), backtofront());
@@ -161,11 +183,28 @@ void RAS_BucketManager::RenderSolidBuckets(
 	const MT_Transform& cameratrans, RAS_IRasterizer* rasty, RAS_IRenderTools* rendertools)
 {
 	BucketList::iterator bit;
-	list<RAS_MeshSlot>::iterator mit;
 
 	rasty->SetDepthMask(RAS_IRasterizer::KX_DEPTHMASK_ENABLED);
 
 	for (bit = m_SolidBuckets.begin(); bit != m_SolidBuckets.end(); ++bit) {
+#if 1
+		RAS_MaterialBucket* bucket = *bit;
+		RAS_MeshSlot* ms;
+		// remove the mesh slot form the list, it culls them automatically for next frame
+		for(ms = bucket->GetNextActiveMeshSlot();
+			ms!= NULL;
+			ms = bucket->GetNextActiveMeshSlot())
+		{
+			rendertools->SetClientObject(rasty, ms->m_clientObj);
+			while (bucket->ActivateMaterial(cameratrans, rasty, rendertools))
+				bucket->RenderMeshSlot(cameratrans, rasty, rendertools, *ms);
+
+			// make this mesh slot culled automatically for next frame
+			// it will be culled out by frustrum culling
+			ms->SetCulled(true);
+		}
+#else
+		list<RAS_MeshSlot>::iterator mit;
 		for (mit = (*bit)->msBegin(); mit != (*bit)->msEnd(); ++mit) {
 			if (mit->IsCulled())
 				continue;
@@ -179,6 +218,7 @@ void RAS_BucketManager::RenderSolidBuckets(
 			// it will be culled out by frustrum culling
 			mit->SetCulled(true);
 		}
+#endif
 	}
 	
 	/* this code draws meshes order front-to-back instead to reduce overdraw.
@@ -276,3 +316,21 @@ void RAS_BucketManager::ReleaseDisplayLists(RAS_IPolyMaterial *mat)
 	}
 }
 
+void RAS_BucketManager::ReleaseMaterials(RAS_IPolyMaterial * mat)
+{
+	BucketList::iterator bit;
+	list<RAS_MeshSlot>::iterator mit;
+
+	for (bit = m_SolidBuckets.begin(); bit != m_SolidBuckets.end(); ++bit) {
+		if (mat == NULL || (mat == (*bit)->GetPolyMaterial())) {
+			(*bit)->GetPolyMaterial()->ReleaseMaterial();
+		}
+	}
+	
+	for (bit = m_AlphaBuckets.begin(); bit != m_AlphaBuckets.end(); ++bit) {
+		if (mat == NULL || (mat == (*bit)->GetPolyMaterial())) {
+			(*bit)->GetPolyMaterial()->ReleaseMaterial();
+		}
+	}
+}
+
author	Benoit Bolsee <benoit.bolsee@online.be>	2009-05-07 13:13:01 +0400
committer	Benoit Bolsee <benoit.bolsee@online.be>	2009-05-07 13:13:01 +0400
commit	42557f90bd16771a5c6437dbfb3952527df7fb1a (patch)
tree	783a84eef7c05adbea3015ecd09ec7e2b18a7cfa /source/gameengine/Rasterizer/RAS_BucketManager.cpp
parent	779bf435ef2ba87fbcee6a28b053d97a551b8eb5 (diff)