《PG源码学习--3.查询语义分析》

一.背景说明

数据库的一条普通的查询SQL,首先要通过查询编译,生成数据库识别的数据结构,然后数据库对生成的数据结构进行语义分析解析,最后返回内部的查询结构,共查询重写和查询优化使用。
查询代码如select * from table;类似这样的语句是外部系统和数据库交互的DSL语言。本文通过Postgres的代码学习,来加深对查询编译过程的理解。
Postgres的代码使用 最新的master分支。

二.物理代码

2.1 postgres.c

postgres\src\backend\tcop\postgres.c ,exec_simple_query 中调用pg_analyze_and_rewrite

2.2 analyze.c

postgres\src\backend\parser\analyze.c 具体逻辑实现

三.数据结构

核心数据结构ParseState解析状态,Query查询体

3.1 解析状态

代码位置postgres\src\include\parser\parse_node.h

struct ParseState
{
   struct ParseState *parentParseState;   /* stack link */
   const char *p_sourcetext;  /* source text, or NULL if not available */
   List      *p_rtable;     /* range table so far */
   List      *p_joinexprs;   /* JoinExprs for RTE_JOIN p_rtable entries */
   List      *p_joinlist;       /* join items so far (will become FromExpr
                         * node's fromlist) */
   List      *p_namespace;   /* currently-referenceable RTEs (List of
                         * ParseNamespaceItem) */
   bool      p_lateral_active;  /* p_lateral_only items visible? */
   List      *p_ctenamespace; /* current namespace for common table exprs */
   List      *p_future_ctes; /* common table exprs not yet in namespace */
   CommonTableExpr *p_parent_cte; /* this query's containing CTE */
   Relation   p_target_relation; /* INSERT/UPDATE/DELETE target rel */
   RangeTblEntry *p_target_rangetblentry; /* target rel's RTE */
   bool      p_is_insert;   /* process assignment like INSERT not UPDATE */
   List      *p_windowdefs;  /* raw representations of window clauses */
   ParseExprKind p_expr_kind; /* what kind of expression we're parsing */
   int          p_next_resno;  /* next targetlist resno to assign */
   List      *p_multiassign_exprs;   /* junk tlist entries for multiassign */
   List      *p_locking_clause;  /* raw FOR UPDATE/FOR SHARE info */
   bool      p_locked_from_parent;  /* parent has marked this subquery
                               * with FOR UPDATE/FOR SHARE */
   bool      p_resolve_unknowns; /* resolve unknown-type SELECT outputs as
                            * type text */

   QueryEnvironment *p_queryEnv;  /* curr env, incl refs to enclosing env */

   /* Flags telling about things found in the query: */
   bool      p_hasAggs;
   bool      p_hasWindowFuncs;
   bool      p_hasTargetSRFs;
   bool      p_hasSubLinks;
   bool      p_hasModifyingCTE;

   Node      *p_last_srf;       /* most recent set-returning func/op found */

   /*
    * Optional hook functions for parser callbacks.  These are null unless
    * set up by the caller of make_parsestate.
    */
   PreParseColumnRefHook p_pre_columnref_hook;
   PostParseColumnRefHook p_post_columnref_hook;
   ParseParamRefHook p_paramref_hook;
   CoerceParamHook p_coerce_param_hook;
   void      *p_ref_hook_state;  /* common passthrough link for above */
};

3.2 查询体

代码位置postgres\include\server\nodes\parsenodes.h

typedef struct Query
{
   NodeTag       type;

   CmdType       commandType;   /* select|insert|update|delete|utility */

   QuerySource querySource;   /* where did I come from? */

   uint32    queryId;      /* query identifier (can be set by plugins) */

   bool      canSetTag;    /* do I set the command result tag? */

   Node      *utilityStmt;   /* non-null if commandType == CMD_UTILITY */

   int          resultRelation; /* rtable index of target relation for
                         * INSERT/UPDATE/DELETE; 0 for SELECT */

   bool      hasAggs;      /* has aggregates in tlist or havingQual */
   bool      hasWindowFuncs; /* has window functions in tlist */
   bool      hasTargetSRFs; /* has set-returning functions in tlist */
   bool      hasSubLinks;   /* has subquery SubLink */
   bool      hasDistinctOn; /* distinctClause is from DISTINCT ON */
   bool      hasRecursive;  /* WITH RECURSIVE was specified */
   bool      hasModifyingCTE;   /* has INSERT/UPDATE/DELETE in WITH */
   bool      hasForUpdate;  /* FOR [KEY] UPDATE/SHARE was specified */
   bool      hasRowSecurity; /* rewriter has applied some RLS policy */

   List      *cteList;      /* WITH list (of CommonTableExpr's) */

   List      *rtable;          /* list of range table entries */
   FromExpr   *jointree;     /* table join tree (FROM and WHERE clauses) */

   List      *targetList;       /* target list (of TargetEntry) */

   OverridingKind override;   /* OVERRIDING clause */

   OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] */

   List      *returningList; /* return-values list (of TargetEntry) */

   List      *groupClause;   /* a list of SortGroupClause's */

   List      *groupingSets;  /* a list of GroupingSet's if present */

   Node      *havingQual;       /* qualifications applied to groups */

   List      *windowClause;  /* a list of WindowClause's */

   List      *distinctClause; /* a list of SortGroupClause's */

   List      *sortClause;       /* a list of SortGroupClause's */

   Node      *limitOffset;   /* # of result tuples to skip (int8 expr) */
   Node      *limitCount;       /* # of result tuples to return (int8 expr) */

   List      *rowMarks;     /* a list of RowMarkClause's */

   Node      *setOperations; /* set-operation tree if this is top level of
                         * a UNION/INTERSECT/EXCEPT query */

   List      *constraintDeps; /* a list of pg_constraint OIDs that the query
                         * depends on to be semantically valid */

   List      *withCheckOptions;  /* a list of WithCheckOption's, which are
                            * only added during rewrite and therefore
                            * are not written out as part of Query. */

   /*
    * The following two fields identify the portion of the source text string
    * containing this query.  They are typically only populated in top-level
    * Queries, not in sub-queries.  When not set, they might both be zero, or
    * both be -1 meaning "unknown".
    */
   int          stmt_location; /* start location, or -1 if unknown */
   int          stmt_len;     /* length in bytes; 0 means "rest of string" */
} Query;

四.解析流程

postgres\src\backend\tcop\postgres.c ,exec_simple_query 中进行解析

1.解析入口

static void
exec_simple_query(const char *query_string)
{
   CommandDest dest = whereToSendOutput;
   MemoryContext oldcontext;
   List      *parsetree_list;
   ListCell   *parsetree_item;
   querytrees (again,
       * these must outlive the execution context).
       */
      oldcontext = MemoryContextSwitchTo(MessageContext);

      querytree_list = pg_analyze_and_rewrite(parsetree, query_string,
                                    NULL, 0, NULL);

      ....

2. 查询语义分析和重写

分2部分,语义分析和查询重写

List *
pg_analyze_and_rewrite(RawStmt *parsetree, const char *query_string,
                  Oid *paramTypes, int numParams,
                  QueryEnvironment *queryEnv)
{
   Query     *query;
   List      *querytree_list;

   TRACE_POSTGRESQL_QUERY_REWRITE_START(query_string);

   /*
    * (1) Perform parse analysis.
    */
   if (log_parser_stats)
      ResetUsage();

   query = parse_analyze(parsetree, query_string, paramTypes, numParams,
                    queryEnv);

   if (log_parser_stats)
      ShowUsage("PARSE ANALYSIS STATISTICS");

   /*
    * (2) Rewrite the queries, as necessary
    */
   querytree_list = pg_rewrite_query(query);

   TRACE_POSTGRESQL_QUERY_REWRITE_DONE(query_string);

   return querytree_list;
}

3. 语义分析

代码实现在analyze.c中

3.1 parse_analyze

输入parseTree,返回Query查询内部的数据结构

Query *
parse_analyze(RawStmt *parseTree, const char *sourceText,
           Oid *paramTypes, int numParams,
           QueryEnvironment *queryEnv)
{
   ParseState *pstate = make_parsestate(NULL);
   Query     *query;

   Assert(sourceText != NULL); /* required as of 8.4 */

   pstate->p_sourcetext = sourceText;

   if (numParams > 0)
      parse_fixed_parameters(pstate, paramTypes, numParams);

   pstate->p_queryEnv = queryEnv;

   query = transformTopLevelStmt(pstate, parseTree);

   if (post_parse_analyze_hook)
      (*post_parse_analyze_hook) (pstate, query);

   free_parsestate(pstate);

   return query;
}

3.2 查询转化

通过nodeTag(parseTree)得到语句的类型,这里为T_SelectStmt

Query *
transformStmt(ParseState *pstate, Node *parseTree)
{
   Query     *result;

   /*
    * We apply RAW_EXPRESSION_COVERAGE_TEST testing to basic DML statements;
    * we can't just run it on everything because raw_expression_tree_walker()
    * doesn't claim to handle utility statements.
    */
#ifdef RAW_EXPRESSION_COVERAGE_TEST
   switch (nodeTag(parseTree))
   {
      case T_SelectStmt:
      case T_InsertStmt:
      case T_UpdateStmt:
      case T_DeleteStmt:
         (void) test_raw_expression_coverage(parseTree, NULL);
         break;
      default:
         break;
   }
#endif                   /* RAW_EXPRESSION_COVERAGE_TEST */

   switch (nodeTag(parseTree))
   {
         /*
          * Optimizable statements
          */
      case T_InsertStmt:
         result = transformInsertStmt(pstate, (InsertStmt *) parseTree);
         break;

      case T_DeleteStmt:
         result = transformDeleteStmt(pstate, (DeleteStmt *) parseTree);
         break;

      case T_UpdateStmt:
         result = transformUpdateStmt(pstate, (UpdateStmt *) parseTree);
         break;

      case T_SelectStmt:
         {
            SelectStmt *n = (SelectStmt *) parseTree;

            if (n->valuesLists)
               result = transformValuesClause(pstate, n);
            else if (n->op == SETOP_NONE)
               result = transformSelectStmt(pstate, n);
            else
               result = transformSetOperationStmt(pstate, n);
         }
         break;

         /*
          * Special cases
          */
      case T_DeclareCursorStmt:
         result = transformDeclareCursorStmt(pstate,
                                    (DeclareCursorStmt *) parseTree);
         break;

      case T_ExplainStmt:
         result = transformExplainStmt(pstate,
                                (ExplainStmt *) parseTree);
         break;

      case T_CreateTableAsStmt:
         result = transformCreateTableAsStmt(pstate,
                                    (CreateTableAsStmt *) parseTree);
         break;

      default:

         /*
          * other statements don't require any transformation; just return
          * the original parsetree with a Query node plastered on top.
          */
         result = makeNode(Query);
         result->commandType = CMD_UTILITY;
         result->utilityStmt = (Node *) parseTree;
         break;
   }

   /* Mark as original query until we learn differently */
   result->querySource = QSRC_ORIGINAL;
   result->canSetTag = true;

   return result;
}

3.3转化的具体实现

1)具体过程

序号 函数 说明
1 transformWithClause with子句
2 transformFromClause from子句
3 transformTargetList targetList子句
4 transformWhereClause where子句
5 transformGroupClause group子句
6 transformDistinctClause distinct子句
7 transformLimitClause limit子句
8 transformWindowDefinitions widow子句
9 transformLockingClause lock子句

2)对应的函数

static Query *
transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
{
   Query     *qry = makeNode(Query);
   Node      *qual;
   ListCell   *l;

   qry->commandType = CMD_SELECT;

   /* process the WITH clause independently of all else */
   if (stmt->withClause)
   {
      qry->hasRecursive = stmt->withClause->recursive;
      qry->cteList = transformWithClause(pstate, stmt->withClause);
      qry->hasModifyingCTE = pstate->p_hasModifyingCTE;
   }

   /* Complain if we get called from someplace where INTO is not allowed */
   if (stmt->intoClause)
      ereport(ERROR,
            (errcode(ERRCODE_SYNTAX_ERROR),
             errmsg("SELECT ... INTO is not allowed here"),
             parser_errposition(pstate,
                           exprLocation((Node *) stmt->intoClause))));

   /* make FOR UPDATE/FOR SHARE info available to addRangeTableEntry */
   pstate->p_locking_clause = stmt->lockingClause;

   /* make WINDOW info available for window functions, too */
   pstate->p_windowdefs = stmt->windowClause;

   /* process the FROM clause */
   transformFromClause(pstate, stmt->fromClause);

   /* transform targetlist */
   qry->targetList = transformTargetList(pstate, stmt->targetList,
                                EXPR_KIND_SELECT_TARGET);

   /* mark column origins */
   markTargetListOrigins(pstate, qry->targetList);

   /* transform WHERE */
   qual = transformWhereClause(pstate, stmt->whereClause,
                        EXPR_KIND_WHERE, "WHERE");

   /* initial processing of HAVING clause is much like WHERE clause */
   qry->havingQual = transformWhereClause(pstate, stmt->havingClause,
                                 EXPR_KIND_HAVING, "HAVING");

   /*
    * Transform sorting/grouping stuff.  Do ORDER BY first because both
    * transformGroupClause and transformDistinctClause need the results. Note
    * that these functions can also change the targetList, so it's passed to
    * them by reference.
    */
   qry->sortClause = transformSortClause(pstate,
                                stmt->sortClause,
                                &qry->targetList,
                                EXPR_KIND_ORDER_BY,
                                false /* allow SQL92 rules */ );

   qry->groupClause = transformGroupClause(pstate,
                                 stmt->groupClause,
                                 &qry->groupingSets,
                                 &qry->targetList,
                                 qry->sortClause,
                                 EXPR_KIND_GROUP_BY,
                                 false /* allow SQL92 rules */ );

   if (stmt->distinctClause == NIL)
   {
      qry->distinctClause = NIL;
      qry->hasDistinctOn = false;
   }
   else if (linitial(stmt->distinctClause) == NULL)
   {
      /* We had SELECT DISTINCT */
      qry->distinctClause = transformDistinctClause(pstate,
                                         &qry->targetList,
                                         qry->sortClause,
                                         false);
      qry->hasDistinctOn = false;
   }
   else
   {
      /* We had SELECT DISTINCT ON */
      qry->distinctClause = transformDistinctOnClause(pstate,
                                          stmt->distinctClause,
                                          &qry->targetList,
                                          qry->sortClause);
      qry->hasDistinctOn = true;
   }

   /* transform LIMIT */
   qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset,
                                 EXPR_KIND_OFFSET, "OFFSET");
   qry->limitCount = transformLimitClause(pstate, stmt->limitCount,
                                 EXPR_KIND_LIMIT, "LIMIT");

   /* transform window clauses after we have seen all window functions */
   qry->windowClause = transformWindowDefinitions(pstate,
                                       pstate->p_windowdefs,
                                       &qry->targetList);

   /* resolve any still-unresolved output columns as being type text */
   if (pstate->p_resolve_unknowns)
      resolveTargetListUnknowns(pstate, qry->targetList);

   qry->rtable = pstate->p_rtable;
   qry->jointree = makeFromExpr(pstate->p_joinlist, qual);

   qry->hasSubLinks = pstate->p_hasSubLinks;
   qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
   qry->hasTargetSRFs = pstate->p_hasTargetSRFs;
   qry->hasAggs = pstate->p_hasAggs;

   foreach(l, stmt->lockingClause)
   {
      transformLockingClause(pstate, qry,
                        (LockingClause *) lfirst(l), false);
   }

   assign_query_collations(pstate, qry);

   /* this must be done after collations, for reliable comparison of exprs */
   if (pstate->p_hasAggs || qry->groupClause || qry->groupingSets || qry->havingQual)
      parseCheckAggregates(pstate, qry);

   return qry;
}

五.其他

继续加油…

发布了6 篇原创文章 · 获赞 1 · 访问量 219

猜你喜欢

转载自blog.csdn.net/weixin_39939108/article/details/104455500