分析原始语法树并将其转变为Query形式。parse_analyze函数还包含parse_analyze勾子的点。

Query *parse_analyze(RawStmt *parseTree, const char *sourceText, Oid *paramTypes, int numParams, QueryEnvironment *queryEnv) {
ParseState *pstate = make_parsestate(NULL);
Query *query;
Assert(sourceText != NULL); /* required as of 8.4 */

pstate->p_sourcetext = sourceText;
if (numParams > 0) parse_fixed_parameters(pstate, paramTypes, numParams);
pstate->p_queryEnv = queryEnv;
query = transformTopLevelStmt(pstate, parseTree);

if (post_parse_analyze_hook) (*post_parse_analyze_hook) (pstate, query);
free_parsestate(pstate);
return query;
}

/* Set up to process a query containing references to fixed parameters. */
void parse_fixed_parameters(ParseState *pstate, Oid *paramTypes, int numParams) {
FixedParamState *parstate = palloc(sizeof(FixedParamState));
parstate->paramTypes = paramTypes;
parstate->numParams = numParams;
pstate->p_ref_hook_state = (void *) parstate;
pstate->p_paramref_hook = fixed_paramref_hook;
/* no need to use p_coerce_param_hook */
}

数据结构

/*
* State information used during parse analysis
*
* parentParseState: NULL in a top-level ParseState. When parsing a subquery,
* links to current parse state of outer query.
*
* p_sourcetext: source string that generated the raw parsetree being
* analyzed, or NULL if not available. (The string is used only to
* generate cursor positions in error messages: we need it to convert
* byte-wise locations in parse structures to character-wise cursor
* positions.)
*
* p_rtable: list of RTEs that will become the rangetable of the query.
* Note that neither relname nor refname of these entries are necessarily
* unique; searching the rtable by name is a bad idea.
*
* p_joinexprs: list of JoinExpr nodes associated with p_rtable entries.
* This is one-for-one with p_rtable, but contains NULLs for non-join
* RTEs, and may be shorter than p_rtable if the last RTE(s) aren't joins.
*
* p_joinlist: list of join items (RangeTblRef and JoinExpr nodes) that
* will become the fromlist of the query's top-level FromExpr node.
*
* p_namespace: list of ParseNamespaceItems that represents the current
* namespace for table and column lookup. (The RTEs listed here may be just
* a subset of the whole rtable. See ParseNamespaceItem comments below.)
*
* p_lateral_active: true if we are currently parsing a LATERAL subexpression
* of this parse level. This makes p_lateral_only namespace items visible,
* whereas they are not visible when p_lateral_active is FALSE.
*
* p_ctenamespace: list of CommonTableExprs (WITH items) that are visible
* at the moment. This is entirely different from p_namespace because a CTE
* is not an RTE, rather "visibility" means you could make an RTE from it.
*
* p_future_ctes: list of CommonTableExprs (WITH items) that are not yet
* visible due to scope rules. This is used to help improve error messages.
*
* p_parent_cte: CommonTableExpr that immediately contains the current query,
* if any.
*
* p_target_relation: target relation, if query is INSERT, UPDATE, or DELETE.
*
* p_target_rangetblentry: target relation's entry in the rtable list.
*
* p_is_insert: true to process assignment expressions like INSERT, false
* to process them like UPDATE. (Note this can change intra-statement, for
* cases like INSERT ON CONFLICT UPDATE.)
*
* p_windowdefs: list of WindowDefs representing WINDOW and OVER clauses.
* We collect these while transforming expressions and then transform them
* afterwards (so that any resjunk tlist items needed for the sort/group
* clauses end up at the end of the query tlist). A WindowDef's location in
* this list, counting from 1, is the winref number to use to reference it.
*
* p_expr_kind: kind of expression we're currently parsing, as per enum above;
* EXPR_KIND_NONE when not in an expression.
*
* p_next_resno: next TargetEntry.resno to assign, starting from 1.
*
* p_multiassign_exprs: partially-processed MultiAssignRef source expressions.
*
* p_locking_clause: query's FOR UPDATE/FOR SHARE clause, if any.
*
* p_locked_from_parent: true if parent query level applies FOR UPDATE/SHARE
* to this subquery as a whole.
*
* p_resolve_unknowns: resolve unknown-type SELECT output columns as type TEXT
* (this is true by default).
*
* p_hasAggs, p_hasWindowFuncs, etc: true if we've found any of the indicated
* constructs in the query.
*
* p_last_srf: the set-returning FuncExpr or OpExpr most recently found in
* the query, or NULL if none.
*
* p_pre_columnref_hook, etc: optional parser hook functions for modifying the
* interpretation of ColumnRefs and ParamRefs.
*
* p_ref_hook_state: passthrough state for the parser hook functions.
*/
struct ParseState
{
struct ParseState *parentParseState; /* stack link */
const char *p_sourcetext; /* source text, or NULL if not available */
List *p_rtable; /* range table so far */
List *p_joinexprs; /* JoinExprs for RTE_JOIN p_rtable entries */
List *p_joinlist; /* join items so far (will become FromExpr
* node's fromlist) */
List *p_namespace; /* currently-referenceable RTEs (List of
* ParseNamespaceItem) */
bool p_lateral_active; /* p_lateral_only items visible? */
List *p_ctenamespace; /* current namespace for common table exprs */
List *p_future_ctes; /* common table exprs not yet in namespace */
CommonTableExpr *p_parent_cte; /* this query's containing CTE */
Relation p_target_relation; /* INSERT/UPDATE/DELETE target rel */
RangeTblEntry *p_target_rangetblentry; /* target rel's RTE */
bool p_is_insert; /* process assignment like INSERT not UPDATE */
List *p_windowdefs; /* raw representations of window clauses */
ParseExprKind p_expr_kind; /* what kind of expression we're parsing */
int p_next_resno; /* next targetlist resno to assign */
List *p_multiassign_exprs; /* junk tlist entries for multiassign */
List *p_locking_clause; /* raw FOR UPDATE/FOR SHARE info */
bool p_locked_from_parent; /* parent has marked this subquery
* with FOR UPDATE/FOR SHARE */
bool p_resolve_unknowns; /* resolve unknown-type SELECT outputs as
* type text */

QueryEnvironment *p_queryEnv; /* curr env, incl refs to enclosing env */

/* Flags telling about things found in the query: */
bool p_hasAggs;
bool p_hasWindowFuncs;
bool p_hasTargetSRFs;
bool p_hasSubLinks;
bool p_hasModifyingCTE;

Node *p_last_srf; /* most recent set-returning func/op found */

/*
* Optional hook functions for parser callbacks. These are null unless
* set up by the caller of make_parsestate.
*/
PreParseColumnRefHook p_pre_columnref_hook;
PostParseColumnRefHook p_post_columnref_hook;
ParseParamRefHook p_paramref_hook;
CoerceParamHook p_coerce_param_hook;
void *p_ref_hook_state; /* common passthrough link for above */
};

transformTopLevelStmt

transformTopLevelStmt函数会调用transformOptionalSelectInto函数将原始语法树转换成查询树。

Query *transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree) {
Query *result;
/* We're at top level, so allow SELECT INTO */
result = transformOptionalSelectInto(pstate, parseTree->stmt);
result->stmt_location = parseTree->stmt_location;
result->stmt_len = parseTree->stmt_len;
return result;
}

transformOptionalSelectInto先特殊处理一下含有into子句的select语句,将其转换为CREATE TABLE AS

/* transformOptionalSelectInto -
* If SELECT has INTO, convert it to CREATE TABLE AS.
*
* The only thing we do here that we don't do in transformStmt() is to
* convert SELECT ... INTO into CREATE TABLE AS. Since utility statements
* aren't allowed within larger statements, this is only allowed at the top
* of the parse tree, and so we only try it before entering the recursive
* transformStmt() processing.
*/
static Query *transformOptionalSelectInto(ParseState *pstate, Node *parseTree) {
if (IsA(parseTree, SelectStmt)) {
SelectStmt *stmt = (SelectStmt *) parseTree;
/* If it's a set-operation tree, drill down to leftmost SelectStmt */
while (stmt && stmt->op != SETOP_NONE) stmt = stmt->larg;
Assert(stmt && IsA(stmt, SelectStmt) &&stmt->larg == NULL);
if (stmt->intoClause) {
CreateTableAsStmt *ctas = makeNode(CreateTableAsStmt);
ctas->query = parseTree;
ctas->into = stmt->intoClause;
ctas->relkind = OBJECT_TABLE;
ctas->is_select_into = true;
/* Remove the intoClause from the SelectStmt. This makes it safe for transformSelectStmt to complain if it finds intoClause set (implying that the INTO appeared in a disallowed place). */
stmt->intoClause = NULL;
parseTree = (Node *) ctas;
}
}

return transformStmt(pstate, parseTree);
}

transformStmt

处理Insert语句(T_InsertStmt):​​transformInsertStmt(pstate, (InsertStmt *) parseTree)​​​ 处理Delete语句(T_DeleteStmt):​​transformDeleteStmt(pstate, (DeleteStmt *) parseTree)​​ 处理Update语句(T_UpdateStmt):​​transformUpdateStmt(pstate, (UpdateStmt *) parseTree)​​ 处理Select语句(T_SelectStmt):
处理DeclareCursor语句(T_DeclareCursorStmt):​​transformDeclareCursorStmt(pstate, (DeclareCursorStmt *) parseTree)​​ 处理Explain语句(T_ExplainStmt):​​transformExplainStmt(pstate, (ExplainStmt *) parseTree)​​ 等等

/* transformStmt -    recursively transform a Parse tree into a Query tree. */
Query *transformStmt(ParseState *pstate, Node *parseTree) {
Query *result;
switch (nodeTag(parseTree)) {
/* Optimizable statements */
case T_InsertStmt:
result = transformInsertStmt(pstate, (InsertStmt *) parseTree); break;
case T_DeleteStmt:
result = transformDeleteStmt(pstate, (DeleteStmt *) parseTree); break;
case T_UpdateStmt:
result = transformUpdateStmt(pstate, (UpdateStmt *) parseTree); break;
case T_SelectStmt:
{
SelectStmt *n = (SelectStmt *) parseTree;
if (n->valuesLists) result = transformValuesClause(pstate, n);
else if (n->op == SETOP_NONE) result = transformSelectStmt(pstate, n);
else result = transformSetOperationStmt(pstate, n);
}
break;
/* Special cases */
case T_DeclareCursorStmt:
result = transformDeclareCursorStmt(pstate, (DeclareCursorStmt *) parseTree);
break;
case T_ExplainStmt:
result = transformExplainStmt(pstate, (ExplainStmt *) parseTree);
break;
case T_CreateTableAsStmt:
result = transformCreateTableAsStmt(pstate, (CreateTableAsStmt *) parseTree);
break;
case T_CallStmt:
result = transformCallStmt(pstate, (CallStmt *) parseTree);
break;
default:
/* other statements don't require any transformation; just return the original parsetree with a Query node plastered on top. */
result = makeNode(Query);
result->commandType = CMD_UTILITY;
result->utilityStmt = (Node *) parseTree;
break;
}
/* Mark as original query until we learn differently */
result->querySource = QSRC_ORIGINAL;
result->canSetTag = true;
return result;
}