ORCA优化器浅析——DXLToPlStmt[CTranslatorDXLToPlStmt]_List


如上图所示是将plan_dxl转为plan_stmt的主入口函数。其主要工作就是创建plan_id_generator、motion_id_generator、param_id_generator和table_list、subplans_list,并将其设置到CContextDXLToPlStmt dxl_to_plan_stmt_ctxt中供后续流程调用;初始化CTranslatorDXLToPlStmt类,形参为MDACCESSOR和CContextDXLToPlStmt;最终调用dxl_to_plan_stmt_translator.GetPlannedStmtFromDXL实现DXL -> PlannedStmt的转化。

//---------------------------------------------------------------------------
//	@function:
//		COptTasks::ConvertToPlanStmtFromDXL
//	@doc:
//		Translate a DXL tree into a planned statement
//---------------------------------------------------------------------------
PlannedStmt *COptTasks::ConvertToPlanStmtFromDXL(CMemoryPool *mp, CMDAccessor *md_accessor, const CDXLNode *dxlnode, bool can_set_tag, DistributionHashOpsKind distribution_hashops) {
	CIdGenerator plan_id_generator(1 /* ulStartId */);
	CIdGenerator motion_id_generator(1 /* ulStartId */);
	CIdGenerator param_id_generator(0 /* ulStartId */);
	List *table_list = NULL; List *subplans_list = NULL;
	CContextDXLToPlStmt dxl_to_plan_stmt_ctxt(mp, &plan_id_generator, &motion_id_generator, ¶m_id_generator, distribution_hashops, &table_list, &subplans_list);

	// translate DXL -> PlannedStmt
	CTranslatorDXLToPlStmt dxl_to_plan_stmt_translator( mp, md_accessor, &dxl_to_plan_stmt_ctxt, gpdb::GetGPSegmentCount());
	return dxl_to_plan_stmt_translator.GetPlannedStmtFromDXL(dxlnode,
															 can_set_tag);
}

CContextDXLToPlStmt

CContextDXLToPlStmt类提供对CIdGenerators的使用和RangeTableEntries、Subplans的访问。【providing access to CIdGenerators (needed to number initplans, motion nodes as well as params), list of RangeTableEntries and Subplans generated so far during DXL–>PlStmt translation.】
CContextDXLToPlStmt构造函数函数签名为 CContextDXLToPlStmt(CMemoryPool *mp, CIdGenerator *plan_id_counter, CIdGenerator *motion_id_counter, CIdGenerator *param_id_counter, DistributionHashOpsKind distribution_hashops, List **rtable_entries_list, List **subplan_entries_list);为m_plan_id_counter、m_motion_id_counter、m_param_id_counter、m_distribution_hashops、m_subplan_entries_list提供了初始值,初始化m_cte_consumer_info和m_num_partition_selectors_array。

class CContextDXLToPlStmt {
private:
    CMemoryPool *m_mp;
    
	CIdGenerator *m_plan_id_counter; // counter for generating plan ids	
	CIdGenerator *m_motion_id_counter; // counter for generating motion ids	
	CIdGenerator *m_param_id_counter; // counter for generating unique param ids	
	DistributionHashOpsKind m_distribution_hashops; // What operator classes to use for distribution keys?	
	List **m_rtable_entries_list; // list of all rtable entries	
	List **m_subplan_entries_list; // list of all subplan entries

	struct SCTEConsumerInfo{ // cte consumer information	
		List *m_cte_consumer_list; // list of ShareInputScan represent cte consumers	
		SCTEConsumerInfo(List *plan_cte) : m_cte_consumer_list(plan_cte){} // ctor
		void AddCTEPlan(ShareInputScan *share_input_scan) { m_cte_consumer_list = gpdb::LAppend(m_cte_consumer_list, share_input_scan); }
		~SCTEConsumerInfo() { gpdb::ListFree(m_cte_consumer_list); }
	};
	// hash maps mapping ULONG -> SCTEConsumerInfo
	typedef CHashMap<ULONG, SCTEConsumerInfo, gpos::HashValue<ULONG>, gpos::Equals<ULONG>, CleanupDelete<ULONG>,  CleanupDelete<SCTEConsumerInfo> > HMUlCTEConsumerInfo;
	// hash map of the cte identifiers and the cte consumers with the same cte identifier
	HMUlCTEConsumerInfo *m_cte_consumer_info;
	
	
	List *m_partitioned_tables_list; // list of oids of partitioned tables	
	ULongPtrArray *m_num_partition_selectors_array; // number of partition selectors for each dynamic scan
	
	ULONG m_result_relation_index; // index of the target relation in the rtable or 0 if not a DML statement

	IntoClause *m_into_clause; // into clause
	
	GpPolicy *m_distribution_policy; // CTAS distribution policy

public:	
	CContextDXLToPlStmt(CMemoryPool *mp, CIdGenerator *plan_id_counter,CIdGenerator *motion_id_counter,CIdGenerator *param_id_counter,DistributionHashOpsKind distribution_hashops,List **rtable_entries_list,List **subplan_entries_list); // ctor/dtor
	~CContextDXLToPlStmt(); // dtor
	
	ULONG GetNextPlanId() { return m_plan_id_counter->next_id(); }; // retrieve the next plan id	
	ULONG GetCurrentMotionId() { return m_motion_id_counter->current_id(); }; // retrieve the current motion id	
	ULONG GetNextMotionId() { return m_motion_id_counter->next_id(); }; // retrieve the next motion id
	ULONG GetCurrentParamId() { return m_param_id_counter->next_id(); }; // retrieve the current parameter id	
	ULONG GetNextParamId() { return m_param_id_counter->current_id(); }; // retrieve the next parameter id

	// add a newly found CTE consumer
	void AddCTEConsumerInfo(ULONG cte_id, ShareInputScan *share_input_scan);
	// return the list of shared input scan plans representing the CTE consumers
	List *GetCTEConsumerList(ULONG cte_id) const;

	
	List *GetRTableEntriesList() { return (*(m_rtable_entries_list)); }; // return list of range table entries
	// add a range table entry
	void AddRTE(RangeTblEntry *rte, BOOL is_result_relation = false);
	// index of result relation in the rtable
	ULONG GetResultRelationIndex() const { return m_result_relation_index; }	
    void AddSubplan(Plan *);
    List *GetSubplanEntriesList();
    
	// return list of partitioned table indexes
	List *GetPartitionedTablesList() const{ return m_partitioned_tables_list; }
	// return list containing number of partition selectors for every scan id
	List *GetNumPartitionSelectorsList() const;
	// add a partitioned table index
	void AddPartitionedTable(OID oid);
	// increment the number of partition selectors for the given scan id
	void IncrementPartitionSelectors(ULONG scan_id);

	// add CTAS information
	void AddCtasInfo(IntoClause *into_clause, GpPolicy *distribution_policy) { m_into_clause = into_clause; m_distribution_policy = distribution_policy; };
	// into clause
	IntoClause *GetIntoClause() const{ return m_into_clause; }
	// CTAS distribution policy
	GpPolicy * GetDistributionPolicy() const { return m_distribution_policy; }

	// Get the hash opclass or hash function for given datatype, based on decision made by DetermineDistributionHashOpclasses()
	Oid GetDistributionHashOpclassForType(Oid typid);
	Oid GetDistributionHashFuncForType(Oid typid);
};

该类提供如下功能:
1 CIdGenerators的next和current id函数
2 返回RangeTableEntries和subplans generated so far列表,其实就是m_rtable_entries_list和m_subplan_entries_list。
AddRTE函数向m_rtable_entries_list添加RangeTblEntry,如果设置is_result_relation则需要将m_result_relation_index更新为刚加入的RangeTblEntry位置。

//---------------------------------------------------------------------------
//	@function: CContextDXLToPlStmt::AddRTE
//	@doc: Add a RangeTableEntries
//---------------------------------------------------------------------------
void CContextDXLToPlStmt::AddRTE(RangeTblEntry *rte, BOOL is_result_relation){
	(*(m_rtable_entries_list)) = gpdb::LAppend((*(m_rtable_entries_list)), rte);
	rte->inFromCl = true;

	if (is_result_relation) {
		rte->inFromCl = false;
		m_result_relation_index = gpdb::ListLength(*(m_rtable_entries_list));
	}
}

3 AddCTEConsumerInfo需要将share_input_scan包装为cte_plan构造成SCTEConsumerInfo结构体。m_cte_consumer_info是key为cte_id、value为SCTEConsumerInfo的map,因此插入时需要先查找映射关系是否存在。

//---------------------------------------------------------------------------
//	@function:		CContextDXLToPlStmt::AddCTEConsumerInfo
//	@doc:		Add information about the newly found CTE entry
//---------------------------------------------------------------------------
void CContextDXLToPlStmt::AddCTEConsumerInfo(ULONG cte_id, ShareInputScan *share_input_scan) {
	SCTEConsumerInfo *cte_info = m_cte_consumer_info->Find(&cte_id);
	if (NULL != cte_info){
		cte_info->AddCTEPlan(share_input_scan); return;
	}
	List *cte_plan = ListMake1(share_input_scan);
	ULONG *key = GPOS_NEW(m_mp) ULONG(cte_id);
	m_cte_consumer_info->Insert(key, GPOS_NEW(m_mp) SCTEConsumerInfo(cte_plan));
}

//---------------------------------------------------------------------------
//	@function:		CContextDXLToPlStmt::GetCTEConsumerList
//	@doc:	Return the list of GPDB plan nodes representing the CTE consumers		with the given CTE identifier
//---------------------------------------------------------------------------
List *CContextDXLToPlStmt::GetCTEConsumerList(ULONG cte_id) const{
	SCTEConsumerInfo *cte_info = m_cte_consumer_info->Find(&cte_id);
	if (NULL != cte_info){
		return cte_info->m_cte_consumer_list;
	}
	return NULL;
}

4 m_partitioned_tables_list存放的是分区表indexes列表,其中存放的是表oid。

// return list of partitioned table indexes
List *GetPartitionedTablesList() const{ return m_partitioned_tables_list; }
// add a partitioned table oid
void AddPartitionedTable(OID oid){
	if (!gpdb::ListMemberOid(m_partitioned_tables_list, oid)){
		m_partitioned_tables_list = gpdb::LAppendOid(m_partitioned_tables_list, oid);
	}
}

partition_selectors_list存放的是每个scan id中所包含的partition selectors的数量列表,而IncrementPartitionSelectors函数真针对给定scan id 的partition selectors进行递增操作

// return list containing number of partition selectors for every scan id
List *CContextDXLToPlStmt::GetNumPartitionSelectorsList() const{
	List *partition_selectors_list = NIL;
	const ULONG len = m_num_partition_selectors_array->Size();
	for (ULONG ul = 0; ul < len; ul++) {
		ULONG *num_partition_selectors = (*m_num_partition_selectors_array)[ul];
		partition_selectors_list = gpdb::LAppendInt(partition_selectors_list,*num_partition_selectors);
	}
	return partition_selectors_list;
}

// increment the number of partition selectors for the given scan id
void CContextDXLToPlStmt::IncrementPartitionSelectors(ULONG scan_id) {
	// add extra elements to the array if necessary
	const ULONG len = m_num_partition_selectors_array->Size();
	for (ULONG ul = len; ul <= scan_id; ul++){
		ULONG *pul = GPOS_NEW(m_mp) ULONG(0);
		m_num_partition_selectors_array->Append(pul);
	}
	ULONG *ul = (*m_num_partition_selectors_array)[scan_id];
	(*ul)++;
}

CTranslatorDXLToPlStmt

CTranslatorDXLToPlStmt类提供了DXLToPlStmt的转换函数。其构造函数主要是将元数据访问类md_accessor,dxl_to_plstmt_context设置到对应的成员中,同样需要初始化CTranslatorDXLToScalar类,和QueryToDXL流程相似。InitTranslators函数则是初始化对应DXLNode转换为PlStmt的函数。

CTranslatorDXLToPlStmt::CTranslatorDXLToPlStmt(CMemoryPool *mp, CMDAccessor *md_accessor, CContextDXLToPlStmt *dxl_to_plstmt_context, ULONG num_of_segments)
	: m_mp(mp),  m_md_accessor(md_accessor), m_dxl_to_plstmt_context(dxl_to_plstmt_context),  m_cmd_type(CMD_SELECT),  m_is_tgt_tbl_distributed(false), m_result_rel_list(NULL), m_num_of_segments(num_of_segments), m_partition_selector_counter(0){
	m_translator_dxl_to_scalar = GPOS_NEW(m_mp)CTranslatorDXLToScalar(m_mp, m_md_accessor, m_num_of_segments);
	InitTranslators();
}

void CTranslatorDXLToPlStmt::InitTranslators(){
	for (ULONG idx = 0; idx < GPOS_ARRAY_SIZE(m_dxlop_translator_func_mapping_array); idx++) 
		m_dxlop_translator_func_mapping_array[idx] = NULL;

	// array mapping operator type to translator function
	static const STranslatorMapping dxlop_translator_func_mapping_array[] = {
		{EdxlopPhysicalTableScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLTblScan},
		{EdxlopPhysicalExternalScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLTblScan},
		{EdxlopPhysicalMultiExternalScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLTblScan},
		{EdxlopPhysicalIndexScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLIndexScan},
		{EdxlopPhysicalIndexOnlyScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLIndexOnlyScan},
		{EdxlopPhysicalHashJoin,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLHashJoin},
		{EdxlopPhysicalNLJoin,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLNLJoin},
		{EdxlopPhysicalMergeJoin,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLMergeJoin},
		{EdxlopPhysicalMotionGather,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLMotion},
		{EdxlopPhysicalMotionBroadcast,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLMotion},
		{EdxlopPhysicalMotionRedistribute,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLDuplicateSensitiveMotion},
		{EdxlopPhysicalMotionRandom,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLDuplicateSensitiveMotion},
		{EdxlopPhysicalMotionRoutedDistribute,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLMotion},
		{EdxlopPhysicalLimit,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLLimit},
		{EdxlopPhysicalAgg, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLAgg},
		{EdxlopPhysicalWindow,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLWindow},
		{EdxlopPhysicalSort, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLSort},
		{EdxlopPhysicalSubqueryScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLSubQueryScan},
		{EdxlopPhysicalResult,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLResult},
		{EdxlopPhysicalAppend,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLAppend},
		{EdxlopPhysicalMaterialize,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLMaterialize},
		{EdxlopPhysicalSequence,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLSequence},
		{EdxlopPhysicalDynamicTableScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLDynTblScan},
		{EdxlopPhysicalDynamicIndexScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLDynIdxScan},
		{EdxlopPhysicalTVF, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLTvf},
		{EdxlopPhysicalDML, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLDml},
		{EdxlopPhysicalSplit,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLSplit},
		{EdxlopPhysicalRowTrigger,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLRowTrigger},
		{EdxlopPhysicalAssert,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLAssert},
		{EdxlopPhysicalCTEProducer,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLCTEProducerToSharedScan},
		{EdxlopPhysicalCTEConsumer,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLCTEConsumerToSharedScan},
		{EdxlopPhysicalBitmapTableScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLBitmapTblScan},
		{EdxlopPhysicalDynamicBitmapTableScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLBitmapTblScan},
		{EdxlopPhysicalCTAS, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLCtas},
		{EdxlopPhysicalPartitionSelector,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLPartSelector},
		{EdxlopPhysicalValuesScan,
		 &gpopt::CTranslatorDXLToPlStmt::TranslateDXLValueScan},
	};

	const ULONG num_of_translators =GPOS_ARRAY_SIZE(dxlop_translator_func_mapping_array);
	for (ULONG idx = 0; idx < num_of_translators; idx++){
		STranslatorMapping elem = dxlop_translator_func_mapping_array[idx];
		m_dxlop_translator_func_mapping_array[elem.dxl_op_id] = elem.dxlnode_to_logical_funct;
	}
}

GetPlannedStmtFromDXL

实现Translate DXL node into a PlannedStmt功能的入口函数:1 初始化CDXLTranslateContext dxl_translate_ctxt和CDXLTranslationContextArray *ctxt_translation_prev_siblings 2 调用TranslateDXLOperatorToPlan(dxlnode, &dxl_translate_ctxt, ctxt_translation_prev_siblings)进行转换【TranslateDXLOperatorToPlan函数根据dxlnode->GetOperator()->GetDXLOperator()不同的操作符id获取对应的转换函数,并调用转换函数进行转换】3 将所有的RangeTblEntry中的RTE_RELATION的oid从RangeTblEntry提取出来放到oids_list中
4 组装planned stmt 5 如果是CMD_SELECT,且dxlnode中的m_direct_dispatch_info不为null,说明该执行计划可以进行direct dispath给某个segment,因此需要设置plan中的directDispatch信息,比如dispath的segment contentId列表,将planned_stmt->planTree中的Motion节点都设置directDispatch信息。

PlannedStmt *CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode,  bool can_set_tag) {
	CDXLTranslateContext dxl_translate_ctxt(m_mp, false);
	CDXLTranslationContextArray *ctxt_translation_prev_siblings = GPOS_NEW(m_mp) CDXLTranslationContextArray(m_mp);
	
	Plan *plan = TranslateDXLOperatorToPlan(dxlnode, &dxl_translate_ctxt, ctxt_translation_prev_siblings);
	ctxt_translation_prev_siblings->Release();

	// collect oids from rtable
	List *oids_list = NIL;ListCell *lc_rte = NULL;
	ForEach(lc_rte, m_dxl_to_plstmt_context->GetRTableEntriesList()){
		RangeTblEntry *pRTE = (RangeTblEntry *) lfirst(lc_rte);
		if (pRTE->rtekind == RTE_RELATION){
			oids_list = gpdb::LAppendOid(oids_list, pRTE->relid);
		}
	}

	// assemble planned stmt
	PlannedStmt *planned_stmt = MakeNode(PlannedStmt); planned_stmt->planGen = PLANGEN_OPTIMIZER;
	planned_stmt->rtable = m_dxl_to_plstmt_context->GetRTableEntriesList();
	planned_stmt->subplans = m_dxl_to_plstmt_context->GetSubplanEntriesList();
	planned_stmt->planTree = plan;
	// store partitioned table indexes in planned stmt
	planned_stmt->queryPartOids = m_dxl_to_plstmt_context->GetPartitionedTablesList();
	planned_stmt->canSetTag = can_set_tag;
	planned_stmt->relationOids = oids_list;
	planned_stmt->numSelectorsPerScanId = m_dxl_to_plstmt_context->GetNumPartitionSelectorsList();
	plan->nMotionNodes = m_dxl_to_plstmt_context->GetCurrentMotionId() - 1;
	planned_stmt->nMotionNodes = m_dxl_to_plstmt_context->GetCurrentMotionId() - 1;
	planned_stmt->commandType = m_cmd_type;
	if (0 == plan->nMotionNodes && !m_is_tgt_tbl_distributed){
		// no motion nodes and not a DML on a distributed table
		plan->dispatch = DISPATCH_SEQUENTIAL;
	}else{
		plan->dispatch = DISPATCH_PARALLEL;
	}
	planned_stmt->resultRelations = m_result_rel_list;
	// GPDB_92_MERGE_FIXME: we really *should* be handling intoClause
	// but currently planner cheats (c.f. createas.c)
	// shift the intoClause handling into planner and re-enable this
	//	pplstmt->intoClause = m_pctxdxltoplstmt->Pintocl();
	planned_stmt->intoPolicy = m_dxl_to_plstmt_context->GetDistributionPolicy();

	SetInitPlanVariables(planned_stmt);

	if (CMD_SELECT == m_cmd_type && NULL != dxlnode->GetDXLDirectDispatchInfo()){
		List *direct_dispatch_segids = TranslateDXLDirectDispatchInfo(dxlnode->GetDXLDirectDispatchInfo());
		plan->directDispatch.contentIds = direct_dispatch_segids;
		plan->directDispatch.isDirectDispatch = (NIL != direct_dispatch_segids);
		if (plan->directDispatch.isDirectDispatch){
			List *motion_node_list = gpdb::ExtractNodesPlan(planned_stmt->planTree, T_Motion,  true /*descendIntoSubqueries*/);
			ListCell *lc = NULL;
			ForEach(lc, motion_node_list){
				Motion *motion = (Motion *) lfirst(lc);
				motion->plan.directDispatch.isDirectDispatch = true;
				motion->plan.directDispatch.contentIds = plan->directDispatch.contentIds;
			}
		}
	}

	return planned_stmt;
}