Selenium.py
#!/usr/bin/env python
from selenium import webdriver
from selenium.webdriver.remote.webelement import WebElement
from lxml import html
#import requests
from lxml.cssselect import CSSSelector
from ftfy import fix_encoding
#from lxml import etree
#import sys
#from time import sleep
#from splinter.driver.webdriver.chrome.Options import Options
#import splinter
chrome_path = r"C:\Users\user\Desktop\scrape\chromedriver.exe"
options = webdriver.ChromeOptions()
browser = webdriver.Chrome(executable_path=chrome_path, options=options)
url = ""
browser.get(url)
username = browser.find_element_by_name("username") #username form field
password = browser.find_element_by_name("password") #password form field
username.send_keys("")
password.send_keys("")
submitButton = browser.find_element_by_id("login")
submitButton.click()
url = ""
browser.get(url) #navigate to the page
NumberOfApplicants = browser.find_element_by_tag_name('span').text
print(NumberOfApplicants)
#table csvFileName1
csvFile1 = "csvFileName1.csv" #what name you want to save your csv as
csv = open(csvFile1, "w") #create or open csv, "w" to write strings
colNames = "col1 , col2 , col3 , col4 , col5 \n" #column titles
csv.write(colNames)
#table csvFileName2
csvFileEmail = "csvFileName2.csv"
csv0 = open(csvFileEmail,"w")
colNames = "col1 , col2 \n"
csv0.write(colNames)
#table csvFileName3
csvFilename1 = "csvFileName3.csv"
csv1 = open(csvFilename1, "w")
colNames1 = "col1 , col2 , col3 , col4 , col5, col6 , col7 , col8 , col9 , col10
, col11 \n" #column titles
csv1.write(colNames1)
#table csvFileName4
csvFilename2 = "csvFileName4.csv"
csv2 = open(csvFilename2, "w")
colNames2 = "col1 , col2 , col3 , col4 , col5, col6 , col7 , col8 , col9 , col10
, col11, col12 \n" #column titles
csv2.write(colNames2)
#tale csvFileName5
csvFilename3 = "csvFileName5.csv"
csv3 = open(csvFilename3, "w")
colNames3 = "col1 , col2 , col3 , col4 , col5 , col6 , col7 , col8 , col9 ,
col10 \n" #column titles
csv3.write(colNames3)
#table csvFileName6
csvFilename4 = "csvFileName6.csv" #what name you want to save your csv as
csv4 = open(csvFilename4, "w") #create or open csv, "w" to write strings
colNames4 = "col1 , col2 , col3 , col4 , col5\n"
csv4.write(colNames4)
for j in range(16,1517) :
url = "" + str(j)
browser.get(url)
files_path ="Data_files2\ "
file = open(files_path + str(j)+'.txt','w')
row = browser.find_element_by_css_selector('div.modal-body')
#print(row.text)
file.write(row.text)
file.close()
#filedata = file.read()
with open(files_path + str(j)+'.txt','r') as file :
filedata = file.read()
liste = []
for l in liste :
filedata = filedata.replace(l,'')
for i in range(10):
filedata = filedata.replace('\nScore : '+str(i)+' (',':')
# Replace the target string
filedata = filedata.replace(l,'')
with open(files_path + str(j)+".txt","w") as file:
file.write(filedata)
FileData1
FileData2
FileData3
FileData4
FileData5
FileData6
=
=
=
=
=
=
filedata.split('&')
filedata.split('ù')
filedata.split('µ')
filedata.split('~')
filedata.split('%')
filedata.split('$')
#print(FileData2[1])
#1st list
list = FileData1[1].split('ù')
splitList = list[0].split("\n")
lnL = len(splitList)
#del(splitList1[lnL1-1])
#del(splitList1[0])
#print(lnL)
#print(splitList)
#2nd list
list1 = FileData2[1].split('µ')
splitList1 = list1[0].split("\n")
lnL1 = len(splitList1)
#del(splitList1[lnL1-1])
#del(splitList1[0])
#print(lnL1)
#print(splitList1)
#3rd list
list2 = FileData3[1].split('~')
splitList2 = list2[0].split("\n")
lnL2 = len(splitList2)
#del(splitList1[lnL2-1])
#del(splitList2[0])
#print(lnL2)
#print(splitList2)
#Linquistique list
list3 = FileData5[1].split('$')
splitList3 = list3[0].split("\n")
lnL3 = len(splitList3)
#del(splitList3[lnL1-1])
#del(splitList3[0])
#print(lnL3)
#print(splitList3)
#print(splitFileData)
#print(splitFileData[13].split(":")[1])
splitFileData1 = FileData1[1].split('\n')
splitFileData2 = FileData2[1].split('\n')
splitFileData3 = FileData3[1].split('\n')
splitFileData4 = FileData4[1].split('\n')
splitFileData5 = FileData5[1].split('\n')
total_score = browser.find_element_by_id("totalDemande").text
#print(total_score)
#print(splitList[8].split(":")[1] )
if lnL < 12 :
age = splitFileData4[1].split(":")[1].split("(")[1].split(")")[0]
csv.write("null" + "," + splitList[1].split(":")[1] + "," +
splitList[2].split(":")[1] + "," + "null" + ","
+ "null" + "\n")
#csv0.write(str(j) + "," + "null" + "\n")
csv0.write("null" + "," + "null" + "\n")
elif lnL >= 12 :
age = splitFileData4[1].split(":")[1].split("(")[1].split(")")[0]
csv.write(str(j) + "," + splitList[1].split(":")[1] + "," +
splitList[2].split(":")[1] + "," + splitList[3].split(":")[1] + ","
+ splitList[4].split(":")[1] + "\n")
csv0.write(str(j) + "," + splitList[9].split(":")[1].split(" ")[1]+
"\n")
# table 2 data
if lnL1 >= 20 :
n1 = splitList1[1].split(":")[1].split("-")[0]
d1 = splitList1[3].split(":")[1]
s1 = splitList1[4].split(":")[1]
dip1 = splitList1[6].split(":")[1]
a1 = splitList1[7].split(":")[1]
t1 = splitList1[8].split(":")[1]
n2 = splitList1[9].split(":")[1].split("-")[0]
d2 = splitList1[11].split(":")[1]
s2 = splitList1[12].split(":")[1]
dip2 = splitList1[14].split(":")[1]
a2 = splitList1[15].split(":")[1]
t2 = splitList1[16].split(":")[1]
csv1.write(str(j) + "," + fix_encoding(n1) + "," + fix_encoding(d1) +
"," + fix_encoding(s1) + "," + fix_encoding(dip1) + "," + fix_encoding(a1) + ","
+ fix_encoding(t1) + "," + fix_encoding(n2) + "," + fix_encoding(d2) + "," +
fix_encoding(s2) + "," + fix_encoding(dip2) + "," + fix_encoding(a2) + "," +
fix_encoding(t2) + "\n")
elif lnL1 == 12 :
n1 = splitList1[1].split(":")[1].split("-")[0]
d1 = splitList1[3].split(":")[1]
s1 = splitList1[4].split(":")[1]
dip1 = splitList1[6].split(":")[1]
a1 = splitList1[7].split(":")[1]
t1 = splitList1[8].split(":")[1]
csv1.write(str(j) + "," + fix_encoding(n1) + "," + fix_encoding(d1) +
"," + fix_encoding(s1) + "," + fix_encoding(dip1) + "," + fix_encoding(a1) + ","
+ fix_encoding(t1) + "," + "null" + "," + "null" + "," + "null" + "," + "null"
+ "," + "null" + "," + "null" + "\n")
elif lnL1 < 12 :
csv1.write(str(j) + "," + "null" + "," + "null" + "," + "null" + "," +
"null" + "," + "null" + "," + "null" + "," + "null" + "," + "null" + "," +
"null" + "," + "null" + "," + "null" + "," + "null" + "\n")
# table 3 :
if lnL2 >= 12 :
splitList2[1] = fix_encoding(splitList2[1])
D_1 = splitList2[1].split(":")[1].split("(")[0]
splitList2[3] = fix_encoding(splitList2[3])
Reg1 = splitList2[3].split(":")[1].split("(")[0]
splitList2[4] = fix_encoding(splitList2[4])
P_1 = splitList2[4].split(":")[1]
splitList2[5] = fix_encoding(splitList2[5])
D_2 = splitList2[5].split(":")[1].split("(")[0]
splitList2[7] = fix_encoding(splitList2[7])
Reg2 = splitList2[7].split(":")[1].split("(")[0]
splitList2[8] = fix_encoding(splitList2[8])
P_2 = splitList2[8].split(":")[1]
csv2.write(str(j) + "," + fix_encoding(D_1) + "," + fix_encoding(Reg1) +
"," + fix_encoding(P_1) + "," + fix_encoding(D_2) + "," + fix_encoding(Reg2) +
"," + fix_encoding(P_2) + "\n")
elif lnL2 == 8 :
splitList2[1] = fix_encoding(splitList2[1])
D_1 = splitList2[1].split(":")[1].split("(")[0]
splitList2[3] = fix_encoding(splitList2[3])
Reg1 = splitList2[3].split(":")[1].split("(")[0]
splitList2[4] = fix_encoding(splitList2[4])
P_1 = splitList2[4].split(":")[1]
csv2.write(str(j) + "," + fix_encoding(D_1) + "," + fix_encoding(Reg1) +
"," + fix_encoding(P_1) + "," + "null" + "," + "null" + "," + "null" + "\n")
elif lnL2 < 8 :
csv2.write(str(j) + "," + "null" + "," + "null" + "," + "null" + "," +
"null" + "," + "null" + "," + "null" + "\n")
# table 4 :
if lnL3 >= 23 :
C_O_Fr = splitList3[2].split(":")[1]
P_O_Fr = splitList3[4].split(":")[1]
C_E_Fr = splitList3[6].split(":")[1]
P_E_Fr = splitList3[8].split(":")[1]
Test_Fr = splitList3[10].split(":")[1]
C_O_En = splitList3[12].split(":")[1]
P_O_En = splitList3[14].split(":")[1]
C_E_En = splitList3[16].split(":")[1]
P_E_En = splitList3[18].split(":")[1]
Test_En = splitList3[20].split(":")[1]
csv3.write(str(j) + "," + fix_encoding(C_O_Fr) + "," +
fix_encoding(P_O_Fr) + "," + fix_encoding(C_E_Fr) + "," + fix_encoding(P_E_Fr) +
"," + fix_encoding(Test_Fr) + "," + fix_encoding(C_O_En) + "," +
fix_encoding(P_O_En) + "," + fix_encoding(C_E_En) + "," + fix_encoding(P_E_En) +
"," + fix_encoding(Test_En) + "\n")
elif lnL3 == 8 :
csv3.write(str(j) + "," + "null" + "," + "null" + "," + "null" + "," +
"null" + "," + "null" + "," + "null" + "," + "null" + "," + "null" + "," +
"null" + "," + "null" + "\n")
elif lnL3 < 8 :
csv3.write(str(j) + "," + "null" + "," + "null" + "," + "null" + "," +
"null" + "," + "null" + "," + "null" + "," + "null" + "," + "null" + "," +
"null" + "," + "null" + "\n")
# table 5 :
if splitList3.count("5 :") != 0 :
#print(browser.find_element_by_id("").text)
#totalC = browser.find_element_by_id("").text
#if totalC != 0 :
#print("it doesnt work")
list4 = FileData6[1].split('Seuil')
splitList4 = list4[0].split("\n")
lnL4 = len(splitList4)
#print(lnL4)
#print(lnL4)ds
#print(splitList4)
#splitFileData5 = FileData5[1].split('\n')
if lnL4 >= 21 :
n_sc_c = splitList4[1].split(":")[1]
d_f_c = splitList4[3].split(":")[1]
s_c = splitList4[4].split(":")[1]
a_ob_c = splitList4[6].split(":")[1]
t_f_c = splitList4[7].split(":")[1]
a_c = splitList4[8].split(":")[1][13:15]
#print(age_c)
#print(age_c.split(":")[1].split("(")[1].split(")")[0])
c_or_c = splitList4[10].split(":")[1]
p_or_c = splitList4[12].split(":")[1]
t_l_c = splitList4[14].split(":")[1]
csv4.write(str(j) + "," + "Oui" + "," + fix_encoding(n_sc_c) +
"," + fix_encoding(d_f_c) + ","
+ fix_encoding(s_c) + "," + fix_encoding(a_ob_c) +
"," + fix_encoding(t_f_c) + "," +
fix_encoding(a_c[0:2]) + "," + fix_encoding(c_or_c) +
"," + fix_encoding(p_or_c) + "," +
fix_encoding(t_l_c) + "\n")
elif lnL4 < 21:
csv4.write(str(j) + "," + "Non" + "," + "null" + "," + "null" +
"," + "null" + "," + "null" +
"," + "null" + "," + "null" + "," + "null" + "," +
"null" + "," + "null" +"\n")
#elif totalC == 0 :
#print("it works")
#csv4.write(str(j) + "," + " " + "," + " " + "," + " " + "," + " " +
"," + " " + "," + " " + "," + " " + "," + " " + "," + " " + "\n")
elif splitList3.count("") == 0 :
csv4.write(str(j) + "," + "Non" + "," + "null" + "," + "null" + "," +
"null" + "," + "null" + "," +
"null" + "," + "null" + "," + "null" + "," + "null" + "," +
"null" + "\n")
csv.close()
csv0.close()
csv1.close()
csv2.close()
csv3.close()
csv4.close()
browser.close()