Foreword
Recently learned python, sql statement needs to be extracted table name when writing a small tool, after some inquiries to find an article very good, mark it
PS. That article is reproduced, and there is no marked Reprinted from where
text
import ply.lex as lex, re
def extract_table_name_from_sql(sql_str):
# remove the /* */ comments
q = re.sub(r"/\*[^*]*\*+(?:[^*/][^*]*\*+)*/", "", sql_str)
# remove whole line -- and # comments
lines = [line for line in q.splitlines() if not re.match("^\s*(--|#)", line)]
# remove trailing -- and # comments
q = " ".join([re.split("--|#", line)[0] for line in lines])
# split on blanks, parens and semicolons
tokens = re.split(r"[\s)(;]+", q)
# scan the tokens. if we see a FROM or JOIN, we set the get_next
# flag, and grab the next one (unless it's SELECT).
result = []
get_next = False
for token in tokens:
if get_next:
if token.lower() not in ["", "select"]:
result.append(token)
get_next = False
get_next = token.lower() in ["from", "join"]
return result
sql2="SELECT a.time_updated_server/1000,content,nick,name FROM " \
"table1 a JOIN " \
"table2 b ON a.sender_id = b.user_id JOIN table3 c ON a.channel_id = c.channel_id JOIN table4 d ON c.store_id = d.store_id WHERE sender_id NOT IN(SELECT user_id FROM table5 WHERE store_id IN ('agent_store:1', 'ask:1')) AND to_timestamp(a.time_updated_server/1000)::date >= '2014-05-01' GROUP BY 1,2,3,4 HAVING sum(1) > 500 ORDER BY 1 ASC"
print(extract_table_name_from_sql(sql2))