ORCA优化器浅析——metadata cache中的表对象_前端

首先看一下metadata cache中metadata-related objects对象的Base接口IMDInterface,metadata中所有对象的父类,代码位于src/backend/gporca/libnaucrates/include/naucrates/md/IMDInterface.h文件中。

class IMDInterface : public CRefCount{
public: virtual ~IMDInterface(){}
};

再看一下metadata cache中metadata cache objects对象的Base接口IMDCacheObject,metadata cache中所有对象的父类,代码位于src/backend/gporca/libnaucrates/include/naucrates/md/IMDCacheObject.h文件中。其包含了将metadata id elements列表序列化的函数SerializeMDIdList。metadata cache objects所包含的对象的种类,本文关注EmdtRel,即表对象。

class IMDCacheObject : public IMDInterface{
public:
	// Serialize a list of metadata id elements using pstrTokenList as the root XML element for the list, and each metadata id is serialized in the form of a pstrTokenListItem element. The serialized information looks like this:
	// <strTokenList>
	//		<strTokenListItem .../>...
	// </strTokenList>
	static void SerializeMDIdList(CXMLSerializer *xml_serializer, const IMdIdArray *mdid_array, const CWStringConst *strTokenList, const CWStringConst *strTokenListItem);
	
	enum Emdtype{ // type of md object
		EmdtRel, EmdtInd, EmdtFunc, EmdtAgg, EmdtOp, EmdtType, EmdtTrigger, EmdtCheckConstraint, EmdtRelStats, EmdtColStats, EmdtCastFunc, EmdtScCmp };	
	virtual IMDId *MDId() const = 0; // md id of cache object	
	virtual CMDName Mdname() const = 0; // cache object name	
	virtual Emdtype MDType() const = 0; // object type	
	 
	virtual void Serialize(gpdxl::CXMLSerializer *) const = 0; // serialize object in DXL format	
	virtual const CWStringDynamic *GetStrRepr() const = 0; // DXL string representation of cache object
	// serialize the metadata id information as the attributes of an element with the given name
	virtual void SerializeMDIdAsElem(gpdxl::CXMLSerializer *xml_serializer, const CWStringConst *element_name, const IMDId *mdid) const;
};

typedef CDynamicPtrArray<IMDCacheObject, CleanupRelease> IMDCacheObjectArray;

IMDRelation接口

IMDRelation接口定义在src/backend/gporca/libnaurates/include/naucrates/md/IMDRelation.h文件中,是metadata cache中relations相关的接口。从如下Erelstoragetype和Ereldistrpolicy可以看出,该接口和GPDB的表存储类型【RELSTORAGE_HEAP - stored on disk using heap storage. RELSTORAGE_AOROWS - stored on disk using append only storage. RELSTORAGE_AOCOLS - stored on dist using append only column storage. RELSTORAGE_VIRTUAL - has virtual storage, meaning, relation has no data directly stored forit (right now this relates to views and comp types). RELSTORAGE_EXTERNAL- stored externally using external tables. RELSTORAGE_FOREIGN - stored in another server.】和分布策略、分区策略是相关的,因此如果需要支持其他数据库,需要创建新的接口继承IMDCacheObject接口,并导入相关特性。

class IMDRelation : public IMDCacheObject{
public:	
	enum Erelstoragetype { //		Storage type of a relation
		ErelstorageHeap,ErelstorageAppendOnlyCols,ErelstorageAppendOnlyRows,ErelstorageAppendOnlyParquet,
		ErelstorageExternal,ErelstorageVirtual,ErelstorageSentinel
	};
	enum Ereldistrpolicy{ //		Distribution policy of a relation
		EreldistrMasterOnly,EreldistrHash,EreldistrRandom,EreldistrReplicated,EreldistrSentinel
	};	
	enum Erelpartitiontype{ // Partition type of a partitioned relation
		ErelpartitionRange = 'r', ErelpartitionList = 'l'
	};

protected:	static CWStringDynamic *ColumnsToStr(CMemoryPool *mp, ULongPtrArray *colid_array); // serialize an array of column ids into a comma-separated string

public:	
	virtual Emdtype MDType() const { return EmdtRel; } // object type	
	virtual BOOL IsTemporary() const = 0; // is this a temp relation	
	virtual Erelstoragetype RetrieveRelStorageType() const = 0; // storage type (heap, appendonly, ...)	
	virtual Ereldistrpolicy GetRelDistribution() const = 0; // distribution policy (none, hash, random)	
	virtual ULONG ColumnCount() const = 0; // number of columns	
	virtual DOUBLE ColWidth(ULONG pos) const = 0; // width of a column with regards to the position	
	virtual BOOL HasDroppedColumns() const = 0; // does relation have dropped columns	
	virtual ULONG NonDroppedColsCount() const = 0; // number of non-dropped columns	
	virtual ULONG NonDroppedColAt(ULONG pos) const = 0; // return the position of the given attribute position excluding dropped columns	
	virtual ULONG GetPosFromAttno(INT attno) const = 0; // return the position of a column in the metadata object given the attribute number in the system catalog	
	virtual ULongPtrArray *NonDroppedColsArray() const = 0; // return the original positions of all the non-dropped columns	
	virtual ULONG SystemColumnsCount() const = 0; // number of system columns	
	virtual const IMDColumn *GetMdCol(ULONG pos) const = 0; // retrieve the column at the given position	
	virtual ULONG KeySetCount() const = 0; // number of key sets	
	virtual const ULongPtrArray *KeySetAt(ULONG pos) const = 0; // key set at given position	
	virtual ULONG DistrColumnCount() const = 0; // number of distribution columns	
	virtual const IMDColumn *GetDistrColAt(ULONG pos) const = 0; // retrieve the column at the given position in the distribution key for the relation
	virtual IMDId *GetDistrOpfamilyAt(ULONG pos) const = 0;	
	virtual BOOL ConvertHashToRandom() const = 0; // return true if a hash distributed table needs to be considered as random	
	virtual BOOL HasOids() const = 0; // does this table have oids	
	virtual BOOL IsPartitioned() const = 0; // is this a partitioned table	
	virtual ULONG PartColumnCount() const = 0; // number of partition columns	
	virtual ULONG PartitionCount() const = 0; // number of partitions
	virtual const IMDColumn *PartColAt(ULONG pos) const = 0; // retrieve the partition column at the given position	
	virtual CharPtrArray *GetPartitionTypes() const = 0; // retrieve list of partition types	
	virtual CHAR PartTypeAtLevel(ULONG pos) const = 0; // retrieve the partition type of the given partition level
	virtual ULONG IndexCount() const = 0; // number of indices	
	virtual ULONG TriggerCount() const = 0; // number of triggers	
	virtual IMDId *IndexMDidAt(ULONG pos) const = 0; // retrieve the id of the metadata cache index at the given position	
	virtual BOOL IsPartialIndex(IMDId *mdid) const { return false; }; // check if index is partial given its mdid	
	virtual IMDId *TriggerMDidAt(ULONG pos) const = 0; // retrieve the id of the metadata cache trigger at the given position	
	virtual ULONG CheckConstraintCount() const = 0; // number of check constraints	
	virtual IMDId *CheckConstraintMDidAt(ULONG pos) const = 0; // retrieve the id of the check constraint cache at the given position	
	virtual IMDPartConstraint *MDPartConstraint() const = 0; // part constraint

	// external partitions (for partitioned tables) 对应增加GetForeignPartitions
	virtual IMdIdArray *GetExternalPartitions() const { return NULL; } 
	// contains any external partitions (for partitioned tables only) 对应增加HasForeignPartitions
	BOOL HasExternalPartitions() const {
		return (NULL != GetExternalPartitions() && GetExternalPartitions()->Size() > 0);
	}
	static const CWStringConst *GetDistrPolicyStr( Ereldistrpolicy rel_distr_policy){ // relation distribution policy as a string value
	  switch (rel_distr_policy){
		case EreldistrMasterOnly: return CDXLTokens::GetDXLTokenStr(EdxltokenRelDistrMasterOnly);
		case EreldistrHash: return CDXLTokens::GetDXLTokenStr(EdxltokenRelDistrHash);
		case EreldistrRandom: return CDXLTokens::GetDXLTokenStr(EdxltokenRelDistrRandom);
		case EreldistrReplicated: return CDXLTokens::GetDXLTokenStr(EdxltokenRelDistrReplicated);
		default: return NULL;
	   }
    }
	// name of storage type  为该函数增加Foreign分支
	static const CWStringConst *GetStorageTypeStr( IMDRelation::Erelstoragetype rel_storage_type){
	  switch (rel_storage_type){
		case ErelstorageHeap: return CDXLTokens::GetDXLTokenStr(EdxltokenRelStorageHeap);
		case ErelstorageAppendOnlyCols: return CDXLTokens::GetDXLTokenStr( EdxltokenRelStorageAppendOnlyCols);
		case ErelstorageAppendOnlyRows: return CDXLTokens::GetDXLTokenStr( EdxltokenRelStorageAppendOnlyRows);
		case ErelstorageAppendOnlyParquet: return CDXLTokens::GetDXLTokenStr( EdxltokenRelStorageAppendOnlyParquet);
		case ErelstorageExternal: return CDXLTokens::GetDXLTokenStr(EdxltokenRelStorageExternal);
		case ErelstorageVirtual: return CDXLTokens::GetDXLTokenStr(EdxltokenRelStorageVirtual);
		default: return NULL;
	  }
    }
	BOOL IsAORowOrColTable() const {
		Erelstoragetype st = RetrieveRelStorageType();
		return st == ErelstorageAppendOnlyCols || st == ErelstorageAppendOnlyRows;
	}
};

// common structure over relation and external relation metadata for index info
typedef CDynamicPtrArray<CMDIndexInfo, CleanupRelease> CMDIndexInfoArray;