Python Bytes - Web Scrapping Flight Prices to Excel BeautifulSoup Code in Descrptn

Published: 26 June 2023
on channel: AC
821
15

#Coded by Andrew C
from subprocess import call, sys
def install(package):
call([
sys.executable,
"-m",
"pip",
"--disable-pip-version-check",
"-q",
"install",
package
])
install('selenium')
install('beautifulsoup4')
install('datetime')
install('lxml')
install('openpyxl')

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from bs4 import BeautifulSoup
import re
import time
import pandas as pd
import re
import datetime
from openpyxl import load_workbook
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

def airline():

origin = "MIA"
destination = "SAV"
startdate = "2023-07-04"
enddate = "2023-07-11"
requests = 2

url = "https://www.TravelSite.com/flights/"+ origin + "-" + destination + "/" + startdate + "/" + enddate + "=stops=0"

#Chrome Driver Code
chrome_options = webdriver.ChromeOptions()
agents = ["Firefox/66.0.3","Chrome/73.0.3683.68","Edge/16.16299"]
print("User agent: " + agents[(requests%len(agents))])
chrome_options.add_argument('--user-agent=' + agents[(requests%len(agents))] + '"')
chrome_options.add_experimental_option('useAutomationExtension', False)

driver = webdriver.Chrome("chromedriver.exe", options=chrome_options, desired_capabilities=chrome_options.to_capabilities())
driver.implicitly_wait(20)
driver.get(url)

time.sleep(5)
soup=BeautifulSoup(driver.page_source, 'lxml')

if soup.find_all('p')[0].getText() == "Please confirm that you are a real user.":
print("No Access.Try Again.")
driver.close()
time.sleep(20)
return "failure"

time.sleep(20)

#Web Scrapping
soup=BeautifulSoup(driver.page_source, 'lxml')

deptimes = soup.find_all('div', attrs={'class': 'VY2U'})

deptime = []
for div in deptimes:
deptime.append(div.getText())
regex = re.compile('f8F1-price-text')
price_list = soup.find_all('div', attrs={'class': regex})
price = []
for div in price_list:
price.append(div.getText())
mylist = list(deptime)
INDAY = mylist[::2]
OUTDAY = mylist[1::2]
deptime_o = [s[0:7] for s in INDAY]
arrtime_d = [s[8:15] for s in INDAY]
deptime_d = [s[0:7] for s in OUTDAY]
arrtime_o = [s[8:15] for s in OUTDAY]
Airline_o = [s[15:] for s in INDAY]
Airline_d = [s[15:] for s in OUTDAY]

#Clean up Date data
file_lst_trimmed1 = [re.sub(r'^(–?)(\w\w?)(:)(\w\w)(\W)(a\w?)', r'\2\3\4 am', file) for file in deptime_o]
next_file_lst_trimmed1 = [re.sub(r'^(–?)(\w\w?)(:)(\w\w)(\W?)(p)(\w?)', r'\2\3\4 pm', file) for file in file_lst_trimmed1]
next_file_lst_trimmed11 = [re.sub(r'^(–?)(12)(:)(\w\w)', r'\2\3\4', file) for file in next_file_lst_trimmed1]
file_lst_trimmed2 = [re.sub(r'^(–?)(\w\w?)(:)(\w\w)(\W)(a\w?)', r'\2\3\4 am', file) for file in arrtime_d]
next_file_lst_trimmed2 = [re.sub(r'^(–?)(\w\w?)(:)(\w\w)(\W?)(p)(\w?)', r'\2\3\4 pm', file) for file in file_lst_trimmed2]
next_file_lst_trimmed22 = [re.sub(r'^(–?)(12)(:)(\w\w)', r'\2\3\4', file) for file in next_file_lst_trimmed2]
file_lst_trimmed3 = [re.sub(r'^(–?)(\w\w?)(:)(\w\w)(\W)(a\w?)', r'\2\3\4 am', file) for file in deptime_d]
next_file_lst_trimmed3 = [re.sub(r'^(–?)(\w\w?)(:)(\w\w)(\W?)(p)(\w?)', r'\2\3\4 pm', file) for file in file_lst_trimmed3]
next_file_lst_trimmed33 = [re.sub(r'^(–?)(12)(:)(\w\w)', r'\2\3\4', file) for file in next_file_lst_trimmed3]
file_lst_trimmed4 = [re.sub(r'^(–?)(\w\w?)(:)(\w\w)(\W)(a\w?)', r'\2\3\4 am', file) for file in arrtime_o]
next_file_lst_trimmed4 = [re.sub(r'^(–?)(\w\w?)(:)(\w\w)(\W?)(p)(\w?)', r'\2\3\4 pm', file) for file in file_lst_trimmed4]
next_file_lst_trimmed44 = [re.sub(r'^(–?)(12)(:)(\w\w)', r'\2\3\4 pm', file) for file in next_file_lst_trimmed4]
file_lst_trimmed1 = [re.sub(r'^([a-z]+)(\w*)', r'\2', file) for file in Airline_o]
file_lst_trimmed2 = [re.sub(r'^([a-z]+)(\w*)', r'\2', file) for file in Airline_d]
myprice = price[::2]

ct = datetime.datetime.now()
#Pull it all together in a dataframe
df = pd.DataFrame({"Timestamp": ct,
"Origin" : origin,
"Destination" : destination,
"Flyout_Date" : startdate,
"Flyback_Date" : enddate,
"Price": myprice,
"Flyout_D": next_file_lst_trimmed11,
"Flyout_A": next_file_lst_trimmed22,
"Flyback_D": next_file_lst_trimmed33,
"Flyback_A": next_file_lst_trimmed44,
"Flyout_Airline": file_lst_trimmed1,
"Flyback_Airline": file_lst_trimmed2
})

df.to_csv(r'E:\test.csv',index = False,header = True)
driver.close()
time.sleep(15)
return "success"
airline()


Watch video Python Bytes - Web Scrapping Flight Prices to Excel BeautifulSoup Code in Descrptn online without registration, duration hours minute second in high quality. This video was added by user AC 26 June 2023, don't forget to share it with your friends and acquaintances, it has been viewed on our site 821 once and liked it 15 people.