如上图所示是将plan_dxl转为plan_stmt的主入口函数。其主要工作就是创建plan_id_generator、motion_id_generator、param_id_generator和table_list、subplans_list,并将其设置到CContextDXLToPlStmt dxl_to_plan_stmt_ctxt中供后续流程调用;初始化CTranslatorDXLToPlStmt类,形参为MDACCESSOR和CContextDXLToPlStmt;最终调用dxl_to_plan_stmt_translator.GetPlannedStmtFromDXL实现DXL -> PlannedStmt的转化。
//---------------------------------------------------------------------------
// @function:
// COptTasks::ConvertToPlanStmtFromDXL
// @doc:
// Translate a DXL tree into a planned statement
//---------------------------------------------------------------------------
PlannedStmt *COptTasks::ConvertToPlanStmtFromDXL(CMemoryPool *mp, CMDAccessor *md_accessor, const CDXLNode *dxlnode, bool can_set_tag, DistributionHashOpsKind distribution_hashops) {
CIdGenerator plan_id_generator(1 /* ulStartId */);
CIdGenerator motion_id_generator(1 /* ulStartId */);
CIdGenerator param_id_generator(0 /* ulStartId */);
List *table_list = NULL; List *subplans_list = NULL;
CContextDXLToPlStmt dxl_to_plan_stmt_ctxt(mp, &plan_id_generator, &motion_id_generator, ¶m_id_generator, distribution_hashops, &table_list, &subplans_list);
// translate DXL -> PlannedStmt
CTranslatorDXLToPlStmt dxl_to_plan_stmt_translator( mp, md_accessor, &dxl_to_plan_stmt_ctxt, gpdb::GetGPSegmentCount());
return dxl_to_plan_stmt_translator.GetPlannedStmtFromDXL(dxlnode,
can_set_tag);
}
CContextDXLToPlStmt
CContextDXLToPlStmt类提供对CIdGenerators的使用和RangeTableEntries、Subplans的访问。【providing access to CIdGenerators (needed to number initplans, motion nodes as well as params), list of RangeTableEntries and Subplans generated so far during DXL–>PlStmt translation.】
CContextDXLToPlStmt构造函数函数签名为 CContextDXLToPlStmt(CMemoryPool *mp, CIdGenerator *plan_id_counter, CIdGenerator *motion_id_counter, CIdGenerator *param_id_counter, DistributionHashOpsKind distribution_hashops, List **rtable_entries_list, List **subplan_entries_list);
为m_plan_id_counter、m_motion_id_counter、m_param_id_counter、m_distribution_hashops、m_subplan_entries_list提供了初始值,初始化m_cte_consumer_info和m_num_partition_selectors_array。
class CContextDXLToPlStmt {
private:
CMemoryPool *m_mp;
CIdGenerator *m_plan_id_counter; // counter for generating plan ids
CIdGenerator *m_motion_id_counter; // counter for generating motion ids
CIdGenerator *m_param_id_counter; // counter for generating unique param ids
DistributionHashOpsKind m_distribution_hashops; // What operator classes to use for distribution keys?
List **m_rtable_entries_list; // list of all rtable entries
List **m_subplan_entries_list; // list of all subplan entries
struct SCTEConsumerInfo{ // cte consumer information
List *m_cte_consumer_list; // list of ShareInputScan represent cte consumers
SCTEConsumerInfo(List *plan_cte) : m_cte_consumer_list(plan_cte){} // ctor
void AddCTEPlan(ShareInputScan *share_input_scan) { m_cte_consumer_list = gpdb::LAppend(m_cte_consumer_list, share_input_scan); }
~SCTEConsumerInfo() { gpdb::ListFree(m_cte_consumer_list); }
};
// hash maps mapping ULONG -> SCTEConsumerInfo
typedef CHashMap<ULONG, SCTEConsumerInfo, gpos::HashValue<ULONG>, gpos::Equals<ULONG>, CleanupDelete<ULONG>, CleanupDelete<SCTEConsumerInfo> > HMUlCTEConsumerInfo;
// hash map of the cte identifiers and the cte consumers with the same cte identifier
HMUlCTEConsumerInfo *m_cte_consumer_info;
List *m_partitioned_tables_list; // list of oids of partitioned tables
ULongPtrArray *m_num_partition_selectors_array; // number of partition selectors for each dynamic scan
ULONG m_result_relation_index; // index of the target relation in the rtable or 0 if not a DML statement
IntoClause *m_into_clause; // into clause
GpPolicy *m_distribution_policy; // CTAS distribution policy
public:
CContextDXLToPlStmt(CMemoryPool *mp, CIdGenerator *plan_id_counter,CIdGenerator *motion_id_counter,CIdGenerator *param_id_counter,DistributionHashOpsKind distribution_hashops,List **rtable_entries_list,List **subplan_entries_list); // ctor/dtor
~CContextDXLToPlStmt(); // dtor
ULONG GetNextPlanId() { return m_plan_id_counter->next_id(); }; // retrieve the next plan id
ULONG GetCurrentMotionId() { return m_motion_id_counter->current_id(); }; // retrieve the current motion id
ULONG GetNextMotionId() { return m_motion_id_counter->next_id(); }; // retrieve the next motion id
ULONG GetCurrentParamId() { return m_param_id_counter->next_id(); }; // retrieve the current parameter id
ULONG GetNextParamId() { return m_param_id_counter->current_id(); }; // retrieve the next parameter id
// add a newly found CTE consumer
void AddCTEConsumerInfo(ULONG cte_id, ShareInputScan *share_input_scan);
// return the list of shared input scan plans representing the CTE consumers
List *GetCTEConsumerList(ULONG cte_id) const;
List *GetRTableEntriesList() { return (*(m_rtable_entries_list)); }; // return list of range table entries
// add a range table entry
void AddRTE(RangeTblEntry *rte, BOOL is_result_relation = false);
// index of result relation in the rtable
ULONG GetResultRelationIndex() const { return m_result_relation_index; }
void AddSubplan(Plan *);
List *GetSubplanEntriesList();
// return list of partitioned table indexes
List *GetPartitionedTablesList() const{ return m_partitioned_tables_list; }
// return list containing number of partition selectors for every scan id
List *GetNumPartitionSelectorsList() const;
// add a partitioned table index
void AddPartitionedTable(OID oid);
// increment the number of partition selectors for the given scan id
void IncrementPartitionSelectors(ULONG scan_id);
// add CTAS information
void AddCtasInfo(IntoClause *into_clause, GpPolicy *distribution_policy) { m_into_clause = into_clause; m_distribution_policy = distribution_policy; };
// into clause
IntoClause *GetIntoClause() const{ return m_into_clause; }
// CTAS distribution policy
GpPolicy * GetDistributionPolicy() const { return m_distribution_policy; }
// Get the hash opclass or hash function for given datatype, based on decision made by DetermineDistributionHashOpclasses()
Oid GetDistributionHashOpclassForType(Oid typid);
Oid GetDistributionHashFuncForType(Oid typid);
};
该类提供如下功能:
1 CIdGenerators的next和current id函数
2 返回RangeTableEntries和subplans generated so far列表,其实就是m_rtable_entries_list和m_subplan_entries_list。
AddRTE函数向m_rtable_entries_list添加RangeTblEntry,如果设置is_result_relation则需要将m_result_relation_index更新为刚加入的RangeTblEntry位置。
//---------------------------------------------------------------------------
// @function: CContextDXLToPlStmt::AddRTE
// @doc: Add a RangeTableEntries
//---------------------------------------------------------------------------
void CContextDXLToPlStmt::AddRTE(RangeTblEntry *rte, BOOL is_result_relation){
(*(m_rtable_entries_list)) = gpdb::LAppend((*(m_rtable_entries_list)), rte);
rte->inFromCl = true;
if (is_result_relation) {
rte->inFromCl = false;
m_result_relation_index = gpdb::ListLength(*(m_rtable_entries_list));
}
}
3 AddCTEConsumerInfo需要将share_input_scan包装为cte_plan构造成SCTEConsumerInfo结构体。m_cte_consumer_info是key为cte_id、value为SCTEConsumerInfo的map,因此插入时需要先查找映射关系是否存在。
//---------------------------------------------------------------------------
// @function: CContextDXLToPlStmt::AddCTEConsumerInfo
// @doc: Add information about the newly found CTE entry
//---------------------------------------------------------------------------
void CContextDXLToPlStmt::AddCTEConsumerInfo(ULONG cte_id, ShareInputScan *share_input_scan) {
SCTEConsumerInfo *cte_info = m_cte_consumer_info->Find(&cte_id);
if (NULL != cte_info){
cte_info->AddCTEPlan(share_input_scan); return;
}
List *cte_plan = ListMake1(share_input_scan);
ULONG *key = GPOS_NEW(m_mp) ULONG(cte_id);
m_cte_consumer_info->Insert(key, GPOS_NEW(m_mp) SCTEConsumerInfo(cte_plan));
}
//---------------------------------------------------------------------------
// @function: CContextDXLToPlStmt::GetCTEConsumerList
// @doc: Return the list of GPDB plan nodes representing the CTE consumers with the given CTE identifier
//---------------------------------------------------------------------------
List *CContextDXLToPlStmt::GetCTEConsumerList(ULONG cte_id) const{
SCTEConsumerInfo *cte_info = m_cte_consumer_info->Find(&cte_id);
if (NULL != cte_info){
return cte_info->m_cte_consumer_list;
}
return NULL;
}
4 m_partitioned_tables_list存放的是分区表indexes列表,其中存放的是表oid。
// return list of partitioned table indexes
List *GetPartitionedTablesList() const{ return m_partitioned_tables_list; }
// add a partitioned table oid
void AddPartitionedTable(OID oid){
if (!gpdb::ListMemberOid(m_partitioned_tables_list, oid)){
m_partitioned_tables_list = gpdb::LAppendOid(m_partitioned_tables_list, oid);
}
}
partition_selectors_list存放的是每个scan id中所包含的partition selectors的数量列表,而IncrementPartitionSelectors函数真针对给定scan id 的partition selectors进行递增操作
// return list containing number of partition selectors for every scan id
List *CContextDXLToPlStmt::GetNumPartitionSelectorsList() const{
List *partition_selectors_list = NIL;
const ULONG len = m_num_partition_selectors_array->Size();
for (ULONG ul = 0; ul < len; ul++) {
ULONG *num_partition_selectors = (*m_num_partition_selectors_array)[ul];
partition_selectors_list = gpdb::LAppendInt(partition_selectors_list,*num_partition_selectors);
}
return partition_selectors_list;
}
// increment the number of partition selectors for the given scan id
void CContextDXLToPlStmt::IncrementPartitionSelectors(ULONG scan_id) {
// add extra elements to the array if necessary
const ULONG len = m_num_partition_selectors_array->Size();
for (ULONG ul = len; ul <= scan_id; ul++){
ULONG *pul = GPOS_NEW(m_mp) ULONG(0);
m_num_partition_selectors_array->Append(pul);
}
ULONG *ul = (*m_num_partition_selectors_array)[scan_id];
(*ul)++;
}
CTranslatorDXLToPlStmt
CTranslatorDXLToPlStmt类提供了DXLToPlStmt的转换函数。其构造函数主要是将元数据访问类md_accessor,dxl_to_plstmt_context设置到对应的成员中,同样需要初始化CTranslatorDXLToScalar类,和QueryToDXL流程相似。InitTranslators函数则是初始化对应DXLNode转换为PlStmt的函数。
CTranslatorDXLToPlStmt::CTranslatorDXLToPlStmt(CMemoryPool *mp, CMDAccessor *md_accessor, CContextDXLToPlStmt *dxl_to_plstmt_context, ULONG num_of_segments)
: m_mp(mp), m_md_accessor(md_accessor), m_dxl_to_plstmt_context(dxl_to_plstmt_context), m_cmd_type(CMD_SELECT), m_is_tgt_tbl_distributed(false), m_result_rel_list(NULL), m_num_of_segments(num_of_segments), m_partition_selector_counter(0){
m_translator_dxl_to_scalar = GPOS_NEW(m_mp)CTranslatorDXLToScalar(m_mp, m_md_accessor, m_num_of_segments);
InitTranslators();
}
void CTranslatorDXLToPlStmt::InitTranslators(){
for (ULONG idx = 0; idx < GPOS_ARRAY_SIZE(m_dxlop_translator_func_mapping_array); idx++)
m_dxlop_translator_func_mapping_array[idx] = NULL;
// array mapping operator type to translator function
static const STranslatorMapping dxlop_translator_func_mapping_array[] = {
{EdxlopPhysicalTableScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLTblScan},
{EdxlopPhysicalExternalScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLTblScan},
{EdxlopPhysicalMultiExternalScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLTblScan},
{EdxlopPhysicalIndexScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLIndexScan},
{EdxlopPhysicalIndexOnlyScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLIndexOnlyScan},
{EdxlopPhysicalHashJoin,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLHashJoin},
{EdxlopPhysicalNLJoin,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLNLJoin},
{EdxlopPhysicalMergeJoin,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMergeJoin},
{EdxlopPhysicalMotionGather,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMotion},
{EdxlopPhysicalMotionBroadcast,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMotion},
{EdxlopPhysicalMotionRedistribute,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLDuplicateSensitiveMotion},
{EdxlopPhysicalMotionRandom,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLDuplicateSensitiveMotion},
{EdxlopPhysicalMotionRoutedDistribute,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMotion},
{EdxlopPhysicalLimit,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLLimit},
{EdxlopPhysicalAgg, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLAgg},
{EdxlopPhysicalWindow,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLWindow},
{EdxlopPhysicalSort, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLSort},
{EdxlopPhysicalSubqueryScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLSubQueryScan},
{EdxlopPhysicalResult,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLResult},
{EdxlopPhysicalAppend,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLAppend},
{EdxlopPhysicalMaterialize,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLMaterialize},
{EdxlopPhysicalSequence,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLSequence},
{EdxlopPhysicalDynamicTableScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLDynTblScan},
{EdxlopPhysicalDynamicIndexScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLDynIdxScan},
{EdxlopPhysicalTVF, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLTvf},
{EdxlopPhysicalDML, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLDml},
{EdxlopPhysicalSplit,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLSplit},
{EdxlopPhysicalRowTrigger,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLRowTrigger},
{EdxlopPhysicalAssert,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLAssert},
{EdxlopPhysicalCTEProducer,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLCTEProducerToSharedScan},
{EdxlopPhysicalCTEConsumer,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLCTEConsumerToSharedScan},
{EdxlopPhysicalBitmapTableScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLBitmapTblScan},
{EdxlopPhysicalDynamicBitmapTableScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLBitmapTblScan},
{EdxlopPhysicalCTAS, &gpopt::CTranslatorDXLToPlStmt::TranslateDXLCtas},
{EdxlopPhysicalPartitionSelector,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLPartSelector},
{EdxlopPhysicalValuesScan,
&gpopt::CTranslatorDXLToPlStmt::TranslateDXLValueScan},
};
const ULONG num_of_translators =GPOS_ARRAY_SIZE(dxlop_translator_func_mapping_array);
for (ULONG idx = 0; idx < num_of_translators; idx++){
STranslatorMapping elem = dxlop_translator_func_mapping_array[idx];
m_dxlop_translator_func_mapping_array[elem.dxl_op_id] = elem.dxlnode_to_logical_funct;
}
}
GetPlannedStmtFromDXL
实现Translate DXL node into a PlannedStmt功能的入口函数:1 初始化CDXLTranslateContext dxl_translate_ctxt和CDXLTranslationContextArray *ctxt_translation_prev_siblings 2 调用TranslateDXLOperatorToPlan(dxlnode, &dxl_translate_ctxt, ctxt_translation_prev_siblings)
进行转换【TranslateDXLOperatorToPlan函数根据dxlnode->GetOperator()->GetDXLOperator()
不同的操作符id获取对应的转换函数,并调用转换函数进行转换】3 将所有的RangeTblEntry中的RTE_RELATION的oid从RangeTblEntry提取出来放到oids_list中
4 组装planned stmt 5 如果是CMD_SELECT,且dxlnode中的m_direct_dispatch_info不为null,说明该执行计划可以进行direct dispath给某个segment,因此需要设置plan中的directDispatch信息,比如dispath的segment contentId列表,将planned_stmt->planTree中的Motion节点都设置directDispatch信息。
PlannedStmt *CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, bool can_set_tag) {
CDXLTranslateContext dxl_translate_ctxt(m_mp, false);
CDXLTranslationContextArray *ctxt_translation_prev_siblings = GPOS_NEW(m_mp) CDXLTranslationContextArray(m_mp);
Plan *plan = TranslateDXLOperatorToPlan(dxlnode, &dxl_translate_ctxt, ctxt_translation_prev_siblings);
ctxt_translation_prev_siblings->Release();
// collect oids from rtable
List *oids_list = NIL;ListCell *lc_rte = NULL;
ForEach(lc_rte, m_dxl_to_plstmt_context->GetRTableEntriesList()){
RangeTblEntry *pRTE = (RangeTblEntry *) lfirst(lc_rte);
if (pRTE->rtekind == RTE_RELATION){
oids_list = gpdb::LAppendOid(oids_list, pRTE->relid);
}
}
// assemble planned stmt
PlannedStmt *planned_stmt = MakeNode(PlannedStmt); planned_stmt->planGen = PLANGEN_OPTIMIZER;
planned_stmt->rtable = m_dxl_to_plstmt_context->GetRTableEntriesList();
planned_stmt->subplans = m_dxl_to_plstmt_context->GetSubplanEntriesList();
planned_stmt->planTree = plan;
// store partitioned table indexes in planned stmt
planned_stmt->queryPartOids = m_dxl_to_plstmt_context->GetPartitionedTablesList();
planned_stmt->canSetTag = can_set_tag;
planned_stmt->relationOids = oids_list;
planned_stmt->numSelectorsPerScanId = m_dxl_to_plstmt_context->GetNumPartitionSelectorsList();
plan->nMotionNodes = m_dxl_to_plstmt_context->GetCurrentMotionId() - 1;
planned_stmt->nMotionNodes = m_dxl_to_plstmt_context->GetCurrentMotionId() - 1;
planned_stmt->commandType = m_cmd_type;
if (0 == plan->nMotionNodes && !m_is_tgt_tbl_distributed){
// no motion nodes and not a DML on a distributed table
plan->dispatch = DISPATCH_SEQUENTIAL;
}else{
plan->dispatch = DISPATCH_PARALLEL;
}
planned_stmt->resultRelations = m_result_rel_list;
// GPDB_92_MERGE_FIXME: we really *should* be handling intoClause
// but currently planner cheats (c.f. createas.c)
// shift the intoClause handling into planner and re-enable this
// pplstmt->intoClause = m_pctxdxltoplstmt->Pintocl();
planned_stmt->intoPolicy = m_dxl_to_plstmt_context->GetDistributionPolicy();
SetInitPlanVariables(planned_stmt);
if (CMD_SELECT == m_cmd_type && NULL != dxlnode->GetDXLDirectDispatchInfo()){
List *direct_dispatch_segids = TranslateDXLDirectDispatchInfo(dxlnode->GetDXLDirectDispatchInfo());
plan->directDispatch.contentIds = direct_dispatch_segids;
plan->directDispatch.isDirectDispatch = (NIL != direct_dispatch_segids);
if (plan->directDispatch.isDirectDispatch){
List *motion_node_list = gpdb::ExtractNodesPlan(planned_stmt->planTree, T_Motion, true /*descendIntoSubqueries*/);
ListCell *lc = NULL;
ForEach(lc, motion_node_list){
Motion *motion = (Motion *) lfirst(lc);
motion->plan.directDispatch.isDirectDispatch = true;
motion->plan.directDispatch.contentIds = plan->directDispatch.contentIds;
}
}
}
return planned_stmt;
}