[Python] to extract the table name sql statement

Foreword

Recently learned python, sql statement needs to be extracted table name when writing a small tool, after some inquiries to find an article very good, mark it
PS. That article is reproduced, and there is no marked Reprinted from where

text

import ply.lex as lex, re


def extract_table_name_from_sql(sql_str):

    # remove the /* */ comments
    q = re.sub(r"/\*[^*]*\*+(?:[^*/][^*]*\*+)*/", "", sql_str)

    # remove whole line -- and # comments
    lines = [line for line in q.splitlines() if not re.match("^\s*(--|#)", line)]

    # remove trailing -- and # comments
    q = " ".join([re.split("--|#", line)[0] for line in lines])

    # split on blanks, parens and semicolons
    tokens = re.split(r"[\s)(;]+", q)

    # scan the tokens. if we see a FROM or JOIN, we set the get_next
    # flag, and grab the next one (unless it's SELECT).

    result = []
    get_next = False
    for token in tokens:
        if get_next:
            if token.lower() not in ["", "select"]:
                result.append(token)
            get_next = False
        get_next = token.lower() in ["from", "join"]

    return result

sql2="SELECT a.time_updated_server/1000,content,nick,name FROM      " \
     "table1 a JOIN   " \
     "table2 b ON a.sender_id = b.user_id JOIN table3 c ON a.channel_id = c.channel_id JOIN table4 d ON c.store_id = d.store_id WHERE sender_id NOT IN(SELECT user_id FROM table5 WHERE store_id IN ('agent_store:1', 'ask:1')) AND to_timestamp(a.time_updated_server/1000)::date >= '2014-05-01' GROUP BY 1,2,3,4 HAVING sum(1) > 500 ORDER BY 1 ASC"
print(extract_table_name_from_sql(sql2))

Guess you like

Origin www.cnblogs.com/NBDWDYS2214143926/p/11911595.html