from selenium import webdriver
url = "https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL"
browserDriver = webdriver.Chrome(executable_path='D:/chromedriver/chromedriver')
browserDriver.get(url)
print(browserDriver.page_source)
#If I want to look for Trailing P/E 17.10
#3 matches
#first snapshoot looks like a json file(Yes, it is a json file format) or a dictionary
#second snapshoot looks like a static html code(yes, it is a html code)
# find certain element
element = browserDriver.find_element_by_xpath("html")
print(element.text)
#pycharm
# find certain element
element = browserDriver.find_element_by_xpath("html")
print(element.get_attribute("textContent")) #without html elements
#3 matches
.
#find childElements #"/*" all children under the element "html"
elements = browserDriver.find_elements_by_xpath("html/*")
for childElement in elements:
print(childElement.tag_name)
elements = browserDriver.find_elements_by_xpath("html/head/*")
for childElement in elements:
print(childElement.tag_name)
#web structure (tags)
#If I want to look for TrailingPE
elements = browserDriver.find_elements_by_xpath("html")
counter = 1
for element in elements:
if "trailingPE" in element.get_attribute("textContent"):
print(counter)
counter +=1
# However, we know the data is in <script> data </script> then
def findXPath(element, target, path):
if target in element.get_attribute("textContent") and element.tag_name == "script" :
return path
#if not, I want to go deeper
#"./": current directory
childrenElements = element.find_elements_by_xpath("./*") #goes deeper
for childElement in childrenElements:
print(path+"/" +childElement.tag_name)
final = findXPath(childElement, target, path+"/" +childElement.tag_name)
if final !="":
return final
return ""
element = browserDriver.find_element_by_xpath("html")
print("The final path is: ", findXPath(element, "trailingPE","html"))
elements = browserDriver.find_elements_by_xpath("html/body/script")
counter = 1
for element in elements:
if "trailingPE" in element.get_attribute("textContent"):
print(counter)
counter +=1
#the first one
element = browserDriver.find_element_by_xpath("html/body/script[1]")
print(element.get_attribute("textContent"))
#only one match
#"trailingPE":{"raw":17.102716,"fmt":"17.10"} #json format
from selenium import webdriver
url = "https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL"
browserDriver = webdriver.Chrome(executable_path='D:/chromedriver/chromedriver')
browserDriver.get(url)
"trailingPE"
def findXPath(element, target, path):
if target in element.get_attribute("textContent") and element.tag_name == "script" :
return path
#"./": current directory
childrenElements = element.find_elements_by_xpath("./*") #goes deeper
for childElement in childrenElements:
print(path+"/" +childElement.tag_name)
final = findXPath(childElement, target, path+"/" +childElement.tag_name)
if final !="":
return final
return ""
import json
element = browserDriver.find_element_by_xpath("html/body/script[1]")
tempData = element.get_attribute("textContent").strip("(this));\n")
#the data is after "root.App.main =" #
tempData=tempData.split("root.App.main = ")[1][:-3]
jsonData = json.loads(tempData)
print(jsonData.keys()) #dict_keys(['context', 'plugins'])
matchType = type(jsonData)
print("Final Path is: ", findJsonPath(jsonData, "trailingPE", "", matchType))
#Final Path is: ,context,dispatcher,stores,QuoteSummaryStore,summaryDetail
matchType = type(jsonData)
#print("Final Path is: ", findJsonPath(jsonData, "trailingPE", "", matchType))
#Final Path is: ,context,dispatcher,stores,QuoteSummaryStore,summaryDetail
print(jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["summaryDetail"])
#whole dictionary
finalData=jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["summaryDetail"]
import pandas as pd
df = pd.DataFrame(data = finalData)
print(df)
browserDriver.quit()
####################################all codes###############################################
#!/usr/bin/python
#encoding:utf-8
"""
@author: LlQ
@contact:[email protected]
@file:appleSta.py
@time: 7/14/2019 3:18 PM
"""
from selenium import webdriver
import json
url = "https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL"
browserDriver = webdriver.Chrome(executable_path='D:/chromedriver/chromedriver')
browserDriver.get(url)
"trailingPE"
def findXPath(element, target, path):
if target in element.get_attribute("textContent") and element.tag_name == "script" :
return path
#"./": current directory
childrenElements = element.find_elements_by_xpath("./*") #goes deeper
for childElement in childrenElements:
print(path+"/" +childElement.tag_name)
final = findXPath(childElement, target, path+"/" +childElement.tag_name)
if final !="":
return final
return ""
def findJsonPath(jsonObject, target, path, matchType):
if type(jsonObject) == matchType:
if target in jsonObject:
return path
for newKey in jsonObject.keys():
final = findJsonPath(jsonObject[newKey], target, path + ","+newKey, matchType)
if final != "":
return final
return ""
# print(browserDriver.page_source)
# find certain element
# element = browserDriver.find_element_by_xpath("html")
# print(element.text)
#find certain element
# element = browserDriver.find_element_by_xpath("html")
# print(element.get_attribute("textContent"))
# find childElements # "/*":all children under the "html:
# elements = browserDriver.find_elements_by_xpath("html/*")
# for childElement in elements:
# print(childElement.tag_name)
# elements = browserDriver.find_elements_by_xpath("html/head/*")
# for childElement in elements:
# print(childElement.tag_name)
#17.10: "trailingPE"
# elements = browserDriver.find_elements_by_xpath("html")
# counter = 1
# for element in elements:
# if "trailingPE" in element.get_attribute("textContent"):
# print(counter)
# counter +=1
# element = browserDriver.find_element_by_xpath("html")
# print("The final path is: ", findXPath(element, "trailingPE","html"))
#The final path is: html/body/script
# elements = browserDriver.find_elements_by_xpath("html/body/script")
# counter = 1
# for element in elements:
# if "trailingPE" in element.get_attribute("textContent"):
# print(counter) #1
# counter +=1
#the first one
# element = browserDriver.find_element_by_xpath("html/body/script[1]")
# print(element.get_attribute("textContent"))
#only one match
#"trailingPE":{"raw":17.102716,"fmt":"17.10"} #json format
element = browserDriver.find_element_by_xpath("html/body/script[1]")
tempData = element.get_attribute("textContent").strip("(this));\n")
#the data is after "root.App.main =" #
tempData=tempData.split("root.App.main = ")[1][:-3]
jsonData = json.loads(tempData)
#print(jsonData.keys()) #dict_keys(['context', 'plugins'])
matchType = type(jsonData)
#print("Final Path is: ", findJsonPath(jsonData, "trailingPE", "", matchType))
#Final Path is: ,context,dispatcher,stores,QuoteSummaryStore,summaryDetail
#print(jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["summaryDetail"])
finalData=jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["summaryDetail"]
import pandas as pd
df = pd.DataFrame(data = finalData)
print(df)
browserDriver.quit()
###########api key
https://openweathermap.org/current
{"coord":{"lon":145.77,"lat":-16.92},"weather":[{"id":802,"main":"Clouds","description":"scattered clouds","icon":"03n"}],"base":"stations","main":{"temp":300.15,"pressure":1007,"humidity":74,"temp_min":300.15,"temp_max":300.15},"visibility":10000,"wind":{"speed":3.6,"deg":160},"clouds":{"all":40},"dt":1485790200,"sys":{"type":1,"id":8166,"message":0.2064,"country":"AU","sunrise":1485720272,"sunset":1485766550},"id":2172797,"name":"Cairns","cod":200}