Мне нужно выполнить последовательность действий при парсинге веб-страниц, но это заняло много времени, поэтому вместо вызова одной функции одну за другой я хочу передать ее как разные процессы, и если я это сделаю, мне придется столкнуться с конфликтом нескольких драйверов, поэтому как я могу это преодолеть.
Без каких-либо конфликтов я хочу, чтобы несколько процессов выполняли задачу в собственном драйвере с загруженными в него предыдущими шагами.
< strong>Вот код:
#importing important libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from test_sheet import connection_sheet
options = Options()
# options.add_experimental_option("detach", True)
# options.add_argument('headless')
driver = webdriver.Firefox()
data = {
'Device':'-',
'Processor':'-',
'Memory Capacity':'-',
'Storage Capacity':'-',
'Condition':'-',
'Battery Health':'-',
'Include Charger':'-',
'Fully Functional':'-',
'Price':'-'
}
def find_and_fetch():
global data
try:
# Use JavaScript to hide the banner
driver.execute_script("""
var banner = document.querySelector('a.message-banner');
if (banner) {
banner.style.display = 'none';
}""")
except:
print("banner disabled")
time.sleep(1)
if driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text:
text = driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text
# find processor elements and do forloop for it and call this function recursively
if 'Processor' in text:
processor = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
processor.append(i.get_attribute('aria-label'))
for prces in processor:
time.sleep(1)
data['Processor']=prces
print("=> processor : ",prces)
find_and_click(prces)
find_and_fetch()
# break
except Exception as e :
# print("Error While passing to processor selection!")
print(e)
data['Processor']='-'
return
# find memory capacity elements and do forloop on it and call this function recursively
if "memory capacity" in text:
capas = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
capas.append(i.get_attribute('aria-label'))
for capa in capas:
time.sleep(1)
data['Memory Capacity']=capa
print("=> memory capacity : ",capa)
find_and_click(capa)
find_and_fetch()
# break
small_back_button()
except :
print("Error While passing to memory capacity!")
data['Memory Capacity']='-'
return
# find storage capacity elements and do forloop on it and call this function recursively
if "storage capacity" in text:
storage = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
storage.append(i.get_attribute('aria-label'))
for store in storage:
time.sleep(1)
start_time = time.time()
data['Storage Capacity']=store
print("=> storage capacity : ",store)
find_and_click(store)
find_and_fetch()
# break
end_time = time.time()
time_taken = end_time-start_time
print(f"The code block took {time_taken:.4f} seconds to execute.")
driver.quit()
break
small_back_button()
except :
print("Error While passing to storage selection!")
data['Storage Capacity']='-'
return
# find condition elemetns and do forloop on it and call this function recursively
if "condition" in text:
conditions = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
conditions.append(i.get_attribute('aria-label'))
for condition in conditions:
time.sleep(1)
data['Condition']=condition
print("=> condition : ",condition)
find_and_click(condition)
next_button()
find_and_fetch()
# break
small_back_button()
except :
print("Error While passing to condition selection!")
data['Condition']='-'
return
# find battery health elements and do forloop on it and call this function recursively
if "battery health" in text:
health = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
health.append(i.get_attribute('aria-label'))
for hlth in health:
time.sleep(1)
data['Battery Health']=hlth
print("=> bettery health : ",hlth)
find_and_click(hlth)
find_and_fetch()
hlth = None
time.sleep(1)
small_back_button()
except:
print("Error While passing to battery health selection!")
data["Battery Health"]='-'
return
# find including charger elements and do forloop on it and call this function recursively
if "charger" in text:
charger = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
charger.append(i.get_attribute('aria-label'))
for crgr in charger:
time.sleep(1)
data['Include Charger'] = crgr
print("=> include charger : ",crgr)
find_and_click(crgr)
find_and_fetch()
time.sleep(2)
small_back_button()
except :
print("Error While passing to including charger selection!")
data['Include Charger']='-'
return
# find fully functional elements and do forloop on it and call this function recursively
if "fully functional" in text:
functional = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
functional.append(i.get_attribute('aria-label'))
time.sleep(1)
data['Fully Functional']=functional[0]
find_and_click(functional[0])
find_and_fetch()
small_back_button()
except :
print("Error While passing to fully functional selection!")
data['Fully Functional']='-'
return
# find final price page and call fetch details function and return all details
time.sleep(1)
if driver.find_element(By.XPATH, "//h3[@class='your-offer']").text:
text = driver.find_element(By.XPATH, "//h3[@class='your-offer']").text
if "Your device is valued at" in text:
print('in the final page')
fetch_info()
time.sleep(1)
large_back_button()
return
return
# if nothing is found on page
else:
try:
small_back_button()
except:pass
return
def find_and_click(elem):
time.sleep(1)
try:
elem = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, f"//div[@aria-label='{elem}']"))
)
driver.execute_script("arguments[0].scrollIntoView(true);", elem)
action = ActionChains(driver)
action.move_to_element(elem).click().perform()
except:print("could not find any clickable element")
return
def next_button():
time.sleep(1)
next_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[@class='button success right']")
)
)
driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
action = ActionChains(driver)
action.move_to_element(next_button).click().perform()
return
def small_back_button():
time.sleep(1)
back = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[@class='button secondary left']")
)
)
# using this page will be scrolled to the element
driver.execute_script("arguments[0].scrollIntoView(true);", back)
# action chain to perform moving to the element and click it
action = ActionChains(driver)
action.move_to_element(back).click().perform()
return
def large_back_button():
time.sleep(1)
back = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable(
(
By.XPATH,
"//button[@class='button secondary large left no-margin']",
)
)
)
# using this page will be scrolled to the element
driver.execute_script("arguments[0].scrollIntoView(true);", back)
# action chain to perform moving to the element and click it
action = ActionChains(driver)
action.move_to_element(back).click().perform()
return
def fetch_info():
global data
time.sleep(1)
elem = driver.find_element(By.XPATH ,"//div[@class='pricing-form-final-offer']").text
text = elem.split('\n')
price = text[3]
text = text[0].split(',')
device_name = text[0].split(':')
data['Device']=device_name[0]
data['Price']=price
print(data)
connection_sheet(spreadsheet_id='1Ze7Uam6GhNGYPXvXYF3TLZPydkZQ6u5l4rmdc7CxLOU',data=data,user_sheet_name='MacInfo')
return
def load_page(url):
driver.get(url)
time.sleep(2)
find_and_fetch()
load_page(url='https://www.itsworthmore.com/sell/macbook-pro-m1/macbook-pro-16-m4')
Поэтому я хочу разделить память на разные драйверы, выполняющие собственную очистку без конфликта друг с другом.
Мне нужно выполнить последовательность действий при парсинге веб-страниц, но это заняло много времени, поэтому вместо вызова одной функции одну за другой я хочу передать ее как разные процессы, и если я это сделаю, мне придется столкнуться с конфликтом нескольких драйверов, поэтому как я могу это преодолеть. Без каких-либо конфликтов я хочу, чтобы несколько процессов выполняли задачу в собственном драйвере с загруженными в него предыдущими шагами. < strong>Вот код: [code]#importing important libraries from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.action_chains import ActionChains import time from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options from test_sheet import connection_sheet
def find_and_fetch(): global data try: # Use JavaScript to hide the banner driver.execute_script(""" var banner = document.querySelector('a.message-banner'); if (banner) { banner.style.display = 'none'; }""") except: print("banner disabled")
time.sleep(1) if driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text: text = driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text # find processor elements and do forloop for it and call this function recursively if 'Processor' in text: processor = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: processor.append(i.get_attribute('aria-label')) for prces in processor: time.sleep(1) data['Processor']=prces print("=> processor : ",prces) find_and_click(prces) find_and_fetch() # break except Exception as e : # print("Error While passing to processor selection!") print(e) data['Processor']='-' return
# find memory capacity elements and do forloop on it and call this function recursively if "memory capacity" in text: capas = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: capas.append(i.get_attribute('aria-label')) for capa in capas: time.sleep(1) data['Memory Capacity']=capa print("=> memory capacity : ",capa) find_and_click(capa) find_and_fetch() # break small_back_button() except : print("Error While passing to memory capacity!") data['Memory Capacity']='-' return
# find storage capacity elements and do forloop on it and call this function recursively if "storage capacity" in text: storage = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: storage.append(i.get_attribute('aria-label')) for store in storage: time.sleep(1) start_time = time.time() data['Storage Capacity']=store print("=> storage capacity : ",store) find_and_click(store) find_and_fetch() # break end_time = time.time() time_taken = end_time-start_time print(f"The code block took {time_taken:.4f} seconds to execute.") driver.quit() break small_back_button()
except : print("Error While passing to storage selection!") data['Storage Capacity']='-' return
# find condition elemetns and do forloop on it and call this function recursively if "condition" in text: conditions = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: conditions.append(i.get_attribute('aria-label')) for condition in conditions: time.sleep(1) data['Condition']=condition print("=> condition : ",condition) find_and_click(condition) next_button() find_and_fetch() # break small_back_button() except : print("Error While passing to condition selection!") data['Condition']='-' return
# find battery health elements and do forloop on it and call this function recursively if "battery health" in text: health = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: health.append(i.get_attribute('aria-label')) for hlth in health: time.sleep(1) data['Battery Health']=hlth print("=> bettery health : ",hlth) find_and_click(hlth) find_and_fetch() hlth = None time.sleep(1) small_back_button() except: print("Error While passing to battery health selection!") data["Battery Health"]='-' return
# find including charger elements and do forloop on it and call this function recursively if "charger" in text: charger = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: charger.append(i.get_attribute('aria-label')) for crgr in charger: time.sleep(1) data['Include Charger'] = crgr print("=> include charger : ",crgr) find_and_click(crgr) find_and_fetch() time.sleep(2) small_back_button() except : print("Error While passing to including charger selection!") data['Include Charger']='-' return
# find fully functional elements and do forloop on it and call this function recursively if "fully functional" in text: functional = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: functional.append(i.get_attribute('aria-label')) time.sleep(1) data['Fully Functional']=functional[0] find_and_click(functional[0]) find_and_fetch() small_back_button()
except : print("Error While passing to fully functional selection!") data['Fully Functional']='-' return # find final price page and call fetch details function and return all details time.sleep(1) if driver.find_element(By.XPATH, "//h3[@class='your-offer']").text: text = driver.find_element(By.XPATH, "//h3[@class='your-offer']").text if "Your device is valued at" in text: print('in the final page') fetch_info() time.sleep(1) large_back_button() return return
# if nothing is found on page else: try: small_back_button() except:pass return
def small_back_button(): time.sleep(1) back = WebDriverWait(driver, 5).until( EC.element_to_be_clickable( (By.XPATH, "//button[@class='button secondary left']") ) ) # using this page will be scrolled to the element driver.execute_script("arguments[0].scrollIntoView(true);", back) # action chain to perform moving to the element and click it action = ActionChains(driver) action.move_to_element(back).click().perform() return
def large_back_button(): time.sleep(1) back = WebDriverWait(driver, 5).until( EC.element_to_be_clickable( ( By.XPATH, "//button[@class='button secondary large left no-margin']", ) ) ) # using this page will be scrolled to the element driver.execute_script("arguments[0].scrollIntoView(true);", back) # action chain to perform moving to the element and click it action = ActionChains(driver) action.move_to_element(back).click().perform() return
def fetch_info(): global data time.sleep(1) elem = driver.find_element(By.XPATH ,"//div[@class='pricing-form-final-offer']").text text = elem.split('\n') price = text[3] text = text[0].split(',') device_name = text[0].split(':') data['Device']=device_name[0] data['Price']=price print(data) connection_sheet(spreadsheet_id='1Ze7Uam6GhNGYPXvXYF3TLZPydkZQ6u5l4rmdc7CxLOU',data=data,user_sheet_name='MacInfo') return
load_page(url='https://www.itsworthmore.com/sell/macbook-pro-m1/macbook-pro-16-m4') [/code] Поэтому я хочу разделить память на разные драйверы, выполняющие собственную очистку без конфликта друг с другом.