スタック:
こんにちは、私はこのウェブサイトから、教師の仕事を掻き落としていますhttps://www.indeed.co.in/?r=us今、私はどのようにそれが可能な缶誰の助け私を私が取得したいされた後で、Excelやデータベースにアップロードしたいですヘッダのタイトルのように、掻き会社、給料やデータマイscrap.pyコードは次のとおりです。
import selenium.webdriver
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
url = 'https://www.indeed.co.in/?r=us'
driver = webdriver.Chrome(r"mypython/bin/chromedriver_linux64/chromedriver")
driver.get(url)
driver.find_element_by_xpath('//*[@id="text-input-what"]').send_keys("teacher")
driver.find_element_by_xpath('//*[@id="whatWhereFormId"]/div[3]/button').click()
# scrape data
data = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "resultsCol")))
result_set = WebDriverWait(data, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "jobsearch-SerpJobCard")))
for result in result_set:
data = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "resultsCol")))
result_set = WebDriverWait(data, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "jobsearch-SerpJobCard")))
for result in result_set:
title = result.find_element_by_class_name("title").text
print(title)
school = result.find_element_by_class_name("company").text
print(school)
try:
salary = result.find_element_by_class_name("salary").text
print(salary)
except:
pass
print("--------")
# move to next page
next_page = result.find_elements_by_xpath("//span[@class='pn']")[-1]
driver.execute_script("arguments[0].click();", next_page)
Prakhar Jhudele:
こんにちは私はあなたのコードにいくつかの変更を行い、以下のすべてのページから200件の以上のレコードを持つCSVファイルを生成しています。
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 5 15:12:04 2020
@author: prakh
"""
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine
def smallest(a, y, z):
M = a
if y < M:
M = y
if z < M:
M = z
if y < z:
M = y
return M
url = 'https://www.indeed.co.in/?r=us'
driver = webdriver.Chrome(executable_path='C:/Users/prakh/Documents/PythonScripts/chromedriver.exe')
driver.get(url)
driver.find_element_by_xpath('//*[@id="text-input-what"]').send_keys("teacher")
driver.find_element_by_xpath('//*[@id="whatWhereFormId"]/div[3]/button').click()
# scrape data
data = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "resultsCol")))
result_set = WebDriverWait(data, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "jobsearch-SerpJobCard")))
Resultfile = open("teacherr_jobs.csv",'a',encoding='utf-8',newline='')
head = csv.writer(Resultfile)
head.writerow(["Title","School","Salary"])
Resultfile.close()
titles = []
company = []
salaries = []
resultlist = []
Resultfile = open("teacherr_jobs.csv",'a',encoding='utf-8',newline='')
wr = csv.writer(Resultfile)
for result in result_set:
data = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "resultsCol")))
result_set = WebDriverWait(data, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "jobsearch-SerpJobCard")))
for result in result_set:
titles.append(result.find_element_by_class_name("title").text)
# print(title)
company.append(result.find_element_by_class_name("company").text)
# print(school)
try:
salaries.append(result.find_element_by_class_name("salary").text)
# print(salary)
except:
salaries.append('NA')
pass
print("--------")
# move to next page
next_page = result.find_elements_by_xpath("//span[@class='pn']")[-1]
driver.execute_script("arguments[0].click();", next_page)
val = smallest(len(titles),len(company),len(salaries))
print(smallest(len(titles),len(company),len(salaries)))
for i in range(0,val-1):
resultlist.append(titles[i])
resultlist.append(company[i])
resultlist.append(salaries[i])
wr.writerow(resultlist)
resultlist = []
Resultfile.close()
final_df = pd.DataFrame(
{'Title': titles,
'School': company,
'Salary': salaries
})
engine = create_engine('mysql+mysqldb://[user]:[pass]@[host]:[port]/[schema]', echo = False)
final_df.to_sql(name = 'my_table', con = engine, if_exists = 'append', index = False)
driver.quit()