首先我们需要看CGPOptimizer类(src/include/gpopt/CGPOptimizer.h)为Greenplum数据库提供ORCA优化器export出来的函数的封装。Greenplum数据库主流程调用extern "C"中提供的函数,比如初始化ORCA优化器的函数InitGPOPT,优化查询树的函数GPOPTOptimizedPlan,explain流程中调用的SerializeDXLPlan函数。

class CGPOptimizer{
public:	
	static PlannedStmt *GPOPTOptimizedPlan( Query *query, bool * had_unexpected_failure	// output : set to true if optimizer unexpectedly failed to produce plan ); // optimize given query using GP optimizer
	static char *SerializeDXLPlan(Query *query); // serialize planned statement into DXL
	static void InitGPOPT(); // gpopt initialize and terminate
	static void TerminateGPOPT();
};

extern "C" {
extern PlannedStmt *GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure);
extern char *SerializeDXLPlan(Query *query);
extern void InitGPOPT();
extern void TerminateGPOPT();
}

CGPOptimizer为GP提供ORCA优化器初始化函数

InitGPOPT函数用于Initialize GPTOPT and dependent libraries,最终调用的是gpos_init(&params)gpdxl_init()gpopt_init()CMemoryPoolPallocManager::Init()该函数由src/backend/utils/init/postinit.c/InitPostgre函数调用

extern "C" {
void InitGPOPT() {
	GPOS_TRY {
		return CGPOptimizer::InitGPOPT();
	}GPOS_CATCH_EX(ex){
		if (GPOS_MATCH_EX(ex, gpdxl::ExmaGPDB, gpdxl::ExmiGPDBError)){
			PG_RE_THROW();
		}
	}
	GPOS_CATCH_END;
}
}
void CGPOptimizer::InitGPOPT() {
	if (optimizer_use_gpdb_allocators){ CMemoryPoolPallocManager::Init(); }
	struct gpos_init_params params = {gpdb::IsAbortRequested};
	gpos_init(¶ms); gpdxl_init(); gpopt_init();
}

ORCA优化器浅析——GP数据库调用优化器流程_java

CGPOptimizer为GP提供ORCA优化器优化函数

ORCA优化器浅析——GP数据库调用优化器流程_java_02


GP有两种优化器:PG优化器和ORCA优化器。如何确定执行计划来自PG优化器还是ORCA优化器,通过查看PlanGenerator值typedef enum PlanGenerator {PLANGEN_PLANNER, /* plan produced by the planner*/ PLANGEN_OPTIMIZER, /* plan produced by the optimizer*/ } PlanGenerator;

ORCA优化器浅析——GP数据库调用优化器流程_初始化_03


Master端简查询入口函数exec_simple_query开始生成执行计划并进行分发,如下图所示。在standard_planner函数中分为orca优化器和PG优化器2个分支产生执行计划。产生执行计划后,由函数PortalStart函数开始调用standard_ExecutorStart从而执行分发执行计划函数CdbDispathPlan将执行计划从master分发到各个segment。摘自https://blog.51cto.com/yanzongshuai/5675056

ORCA优化器浅析——GP数据库调用优化器流程_执行计划_04

extern "C" {
PlannedStmt *GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure){ return CGPOptimizer::GPOPTOptimizedPlan(query, had_unexpected_failure); }
}
//---------------------------------------------------------------------------
//	@function:		CGPOptimizer::PlstmtOptimize
//	@doc:		Optimize given query using GP optimizer
//---------------------------------------------------------------------------
PlannedStmt *CGPOptimizer::GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure	// output : set to true if optimizer unexpectedly failed to produce plan){
	SOptContext gpopt_context;	PlannedStmt *plStmt = NULL;	*had_unexpected_failure = false;
	GPOS_TRY{
		plStmt = COptTasks::GPOPTOptimizedPlan(query, &gpopt_context);		
		gpopt_context.Free(gpopt_context.epinQuery, gpopt_context.epinPlStmt); // clean up context
	}GPOS_CATCH_EX(ex){
        ...
	}GPOS_CATCH_END;
	return plStmt;
}
//---------------------------------------------------------------------------
//	@function:		COptTasks::GPOPTOptimizedPlan
//	@doc:		optimizes a query to plannedstmt
//---------------------------------------------------------------------------
PlannedStmt *COptTasks::GPOPTOptimizedPlan(Query *query, SOptContext *gpopt_context){
	gpopt_context->m_query = query;gpopt_context->m_should_generate_plan_stmt = true;
	Execute(&OptimizeTask, gpopt_context);
	return gpopt_context->m_plan_stmt;
}

CGPOptimizer为GP提供ORCA优化器explain函数

SerializeDXLPlan函数用于Serialize planned statement into DXL将planned statement序列化为DXL,其最终调用COptTasks::Optimize(query)函数。

extern "C" {
char *SerializeDXLPlan(Query *query){ return CGPOptimizer::SerializeDXLPlan(query); }
}
char *CGPOptimizer::SerializeDXLPlan(Query *query){
	GPOS_TRY;{ return COptTasks::Optimize(query); }
	GPOS_CATCH_EX(ex);{
		errstart(ERROR, ex.Filename(), ex.Line(), NULL, TEXTDOMAIN);
		errfinish(errcode(ERRCODE_INTERNAL_ERROR),errmsg("optimizer failed to produce plan"));
	}
	GPOS_CATCH_END;
	return NULL;
}
//---------------------------------------------------------------------------
//	@function:		COptTasks::Optimize
//	@doc:		optimizes a query to physical DXL
//---------------------------------------------------------------------------
char *COptTasks::Optimize(Query *query){
	SOptContext gpopt_context;
	gpopt_context.m_query = query;
	gpopt_context.m_should_serialize_plan_dxl = true;
	Execute(&OptimizeTask, &gpopt_context);	
	gpopt_context.Free(gpopt_context.epinQuery, gpopt_context.epinPlanDXL); // clean up context
	return gpopt_context.m_plan_dxl;
}

COptTasks::GPOPTOptimizedPlanh和COptTasks::Optimize函数都是调用OptimizeTask函数,不同的是设置SOptContext的m_should_generate_plan_stmt或m_should_serialize_plan_dxl,达到不同的作用:optimizes a query to plannedstmt、optimizes a query to physical DXL。

SerializeDXLPlan函数用于explain流程:ExplainQuery/ExplainOneUtility --> ExplainOneQuery --> ExplainDXL --> SerializeDXLPlan。

ORCA优化器浅析——GP数据库调用优化器流程_java_05

CGPOptimizer为GP提供ORCA优化器结束函数

TerminateGPOPT函数用于Terminate GPOPT and dependent libraries,主要调用gpopt_terminate()、gpdxl_terminate()、gpos_terminate()。

extern "C" {
void TerminateGPOPT() {
	GPOS_TRY { return CGPOptimizer::TerminateGPOPT();
	} GPOS_CATCH_EX(ex){
		if (GPOS_MATCH_EX(ex, gpdxl::ExmaGPDB, gpdxl::ExmiGPDBError)) {
			PG_RE_THROW();
		}
	}
	GPOS_CATCH_END;
}
}
void CGPOptimizer::TerminateGPOPT() {
	gpopt_terminate();
	gpdxl_terminate();
	gpos_terminate();
}

ShutdownPostgres函数调用TerminateGPOPT,并销毁OptimizerMemoryContext内存上下文。

ORCA优化器浅析——GP数据库调用优化器流程_初始化_06