How to upload the scraped data from a website into excel and database at once?

stack :

Hi I am scraping the teacher jobs from this website https://www.indeed.co.in/?r=us now I want to upload them to excel and database at once how is it possible can anyone help me I want to get like the headers title, company, salary and the data My scrap.py code for scraping is :

import selenium.webdriver

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

url = 'https://www.indeed.co.in/?r=us'
driver = webdriver.Chrome(r"mypython/bin/chromedriver_linux64/chromedriver")


driver.get(url)

driver.find_element_by_xpath('//*[@id="text-input-what"]').send_keys("teacher")
driver.find_element_by_xpath('//*[@id="whatWhereFormId"]/div[3]/button').click()

# scrape data
data = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.ID, "resultsCol")))
result_set = WebDriverWait(data, 10).until(
    EC.presence_of_all_elements_located((By.CLASS_NAME, "jobsearch-SerpJobCard")))

for result in result_set:
    data = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "resultsCol")))
    result_set = WebDriverWait(data, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "jobsearch-SerpJobCard")))
    for result in result_set:

        title = result.find_element_by_class_name("title").text
        print(title)

        school = result.find_element_by_class_name("company").text
        print(school)

        try:
            salary = result.find_element_by_class_name("salary").text
            print(salary)
        except:
            pass
            print("--------")
    # move to next page
    next_page = result.find_elements_by_xpath("//span[@class='pn']")[-1]
    driver.execute_script("arguments[0].click();", next_page)
Prakhar Jhudele :

Hi i have made a few modifications to your code and below generates a csv with 200+ records from all the pages.

# -*- coding: utf-8 -*-
"""
Created on Thu Mar  5 15:12:04 2020

@author: prakh
"""

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine

def smallest(a, y, z):
    M = a
    if y < M:
        M = y    
    if z < M:
        M = z
        if y < z:
            M = y
    return M
url = 'https://www.indeed.co.in/?r=us'
driver = webdriver.Chrome(executable_path='C:/Users/prakh/Documents/PythonScripts/chromedriver.exe') 
driver.get(url)

driver.find_element_by_xpath('//*[@id="text-input-what"]').send_keys("teacher")
driver.find_element_by_xpath('//*[@id="whatWhereFormId"]/div[3]/button').click()

# scrape data
data = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.ID, "resultsCol")))
result_set = WebDriverWait(data, 10).until(
    EC.presence_of_all_elements_located((By.CLASS_NAME, "jobsearch-SerpJobCard")))

Resultfile = open("teacherr_jobs.csv",'a',encoding='utf-8',newline='')
head = csv.writer(Resultfile)
head.writerow(["Title","School","Salary"])
Resultfile.close() 
titles = []
company = []
salaries = []
resultlist = []
Resultfile = open("teacherr_jobs.csv",'a',encoding='utf-8',newline='')
wr = csv.writer(Resultfile) 
for result in result_set:
    data = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "resultsCol")))
    result_set = WebDriverWait(data, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "jobsearch-SerpJobCard")))
    for result in result_set:

        titles.append(result.find_element_by_class_name("title").text)
#        print(title)

        company.append(result.find_element_by_class_name("company").text)
#        print(school)

        try:
            salaries.append(result.find_element_by_class_name("salary").text)
#            print(salary)
        except:
            salaries.append('NA')
            pass
            print("--------")
    # move to next page
    next_page = result.find_elements_by_xpath("//span[@class='pn']")[-1]
    driver.execute_script("arguments[0].click();", next_page)
val = smallest(len(titles),len(company),len(salaries))
print(smallest(len(titles),len(company),len(salaries)))

for i in range(0,val-1):
    resultlist.append(titles[i])
    resultlist.append(company[i])
    resultlist.append(salaries[i]) 
    wr.writerow(resultlist) 
    resultlist = [] 
Resultfile.close() 
final_df = pd.DataFrame(
    {'Title': titles,
     'School': company,
     'Salary': salaries
    })

engine = create_engine('mysql+mysqldb://[user]:[pass]@[host]:[port]/[schema]', echo = False)
final_df.to_sql(name = 'my_table', con = engine, if_exists = 'append', index = False)    

driver.quit()

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=34447&siteId=1