Awais :
I tried to automate a website but when it goes to a page that has no elements, it's doesn't run anymore. For example this page: https://www.marks4sure.com/9A0-127-exam.html What I want to do is if it doesn't exist any details, it should return back and then proceed with the next one.
Thanks for the help. Here is my code:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
option = Options()
option.add_argument("--disable-infobars")
option.add_argument("start-maximized")
option.add_argument("--disable-extensions")
option.add_experimental_option("excludeSwitches", ['enable-automation'])
# Pass the argument 1 to allow and 2 to block
# option.add_experimental_option("prefs", {
# "profile.default_content_setting_values.notifications": 1
# })
driver = webdriver.Chrome(chrome_options=option, executable_path='C:\\Users\\Awais\\Desktop\\web crawling\\chromedriver.exe')
driver.implicitly_wait(100)
url = "https://www.marks4sure.com/allexams.html"
driver.get(url)
links = []
exam_code = []
exam_name = []
total_q = []
for x in range(70):
for i in range(1, 57):
more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
links.append(more_details.get_attribute('href'))
more_details.click()
try:
code = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[2]/div[2]')
exam_code.append(code.text)
except:
print('N/A')
try:
name = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[3]/div[2]/a')
exam_name.append(name.text)
except:
print('N/A')
try:
question = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[4]/div[2]/strong')
total_q.append(question.text)
except:
print('N/A')
driver.back()
next_page = driver.find_element_by_xpath('//*[@id="yw0"]/li[13]')
next_page.click()
all_info = list(zip(links, exam_name, exam_name, total_q))
print(all_info)
df = pd.DataFrame(all_info, columns = ["Links", "Exam Code", "Exam Name", "Total Question"])
df.to_csv("data.csv", encoding = 'utf-8')
driver.close()
4rigener :
You not check for more details elements
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time
option = Options()
option.add_argument("--disable-infobars")
option.add_argument("start-maximized")
option.add_argument("--disable-extensions")
option.add_experimental_option("excludeSwitches", ['enable-automation'])
# Pass the argument 1 to allow and 2 to block
# option.add_experimental_option("prefs", {
# "profile.default_content_setting_values.notifications": 1
# })
driver = webdriver.Chrome(chrome_options=option, executable_path='C:\\Users\\Awais\\Desktop\\web crawling\\chromedriver.exe')
driver.implicitly_wait(0.5)
url = "https://www.marks4sure.com/allexams.html"
driver.get(url)
links = []
exam_code = []
exam_name = []
total_q = []
for x in range(70):
for i in range(1, 57):
try:
more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
links.append(more_details.get_attribute('href'))
more_details.click()
except NoSuchElementException:
continue
try:
if driver.find_element_by_xpath('/html/body/div[4]/div').get_attribute('class') == 'alert alert-danger':
drier.back()
continue
except NoSuchElementException:
pass
try:
code = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[2]/div[2]')
exam_code.append(code.text)
except:
print('N/A')
try:
name = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[3]/div[2]/a')
exam_name.append(name.text)
except:
print('N/A')
try:
question = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[4]/div[2]/strong')
total_q.append(question.text)
except:
print('N/A')
driver.back()
try:
next_page = driver.find_element_by_xpath('//*[@id="yw0"]/li[13]')
next_page.click()
except NoSuchElementException:
driver.refresh()
all_info = list(zip(links, exam_name, exam_name, total_q))
print(all_info)
df = pd.DataFrame(all_info, columns=["Links", "Exam Code", "Exam Name", "Total Question"])
driver.close()