Anonymous
Процесс входа в систему Selenium для Exa.ai завершается с ошибкой «Неверный доступ к странице входа» после отправки кода
Сообщение
Anonymous » 23 янв 2026, 16:34
Код: Выделить всё
import os
import time
import requests
import random
import string
import re
from datetime import datetime
from docx import Document
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
# --- SETTINGS ---
DOWNLOAD_DIR = os.path.join(os.getcwd(), "downloads")
SPLITS_PATH = r"D:\Python Automation Scripts\Scraping Script\splits.docx"
PROGRESS_FILE = "progress.txt"
AUTH_TIMEOUT = 45
if not os.path.exists(DOWNLOAD_DIR):
os.makedirs(DOWNLOAD_DIR)
def get_last_progress():
if os.path.exists(PROGRESS_FILE):
with open(PROGRESS_FILE, "r") as f:
content = f.read().strip()
if content.isdigit():
return int(content)
return 0
def save_progress(index):
with open(PROGRESS_FILE, "w") as f:
f.write(str(index))
class MailTM:
def __init__(self, auth_timeout=60, retries=5):
self.api_url = "https://api.mail.tm"
self.auth_timeout = auth_timeout
self.retries = retries
self.address = None
self.password = "ExaScraper2026!"
self.token = None
self.domain = None
self.create_account_and_token()
def create_account_and_token(self):
try:
# Step 1: Get a domain
res = requests.get(f"{self.api_url}/domains").json()
self.domain = res['hydra:member'][0]['domain']
# Step 2: Generate random email address
self.address = f"{''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(12))}@{self.domain}"
# Step 3: Create account
resp = requests.post(f"{self.api_url}/accounts", json={"address": self.address, "password": self.password})
if resp.status_code not in [200, 201]:
raise Exception(f"Account creation failed: {resp.text}")
# Step 4: Get token with retries
for attempt in range(self.retries):
time.sleep(2)
token_res = requests.post(f"{self.api_url}/token", json={"address": self.address, "password": self.password}).json()
if 'token' in token_res:
self.token = token_res['token']
print(f"✅ MailTM token obtained for {self.address}")
return
raise Exception("Failed to retrieve token after retries")
except Exception as e:
print(f"❌ MailTM Initialization Error: {e}")
raise
def get_verification_code(self):
"""
Polls the mailbox for a 6-digit verification code.
Returns the code as soon as found, or None if timeout.
"""
if not self.token:
raise Exception("No MailTM token available")
headers = {"Authorization": f"Bearer {self.token}"}
start_time = time.time()
while time.time() - start_time < self.auth_timeout:
try:
time.sleep(2) # Poll every 2 seconds
resp = requests.get(f"{self.api_url}/messages", headers=headers).json()
messages = resp.get('hydra:member', [])
if not messages:
continue
for msg in messages:
msg_content = requests.get(f"{self.api_url}/messages/{msg['id']}", headers=headers).json()
full_text = str(msg_content.get('text', '')) + str(msg_content.get('html', ''))
# Debug: show email content
print(f"📩 Polling MailTM: {full_text[:100]}...") # first 100 chars
match = re.search(r'\b\d{6}\b', full_text)
if match:
code = match.group(0)
print(f"✅ Verification code found: {code}")
return code
except Exception as e:
print(f"⚠ MailTM Polling Error: {e}")
print("❌ Verification code not received within timeout")
return None
def get_driver():
options = Options()
# CRITICAL: For Headless interactability
# options.add_argument("--headless=new")
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
prefs = {"download.default_directory": DOWNLOAD_DIR, "download.prompt_for_download": False}
options.add_experimental_option("prefs", prefs)
service = Service(ChromeDriverManager().install())
return webdriver.Chrome(service=service, options=options)
def start_search_sequence(driver, split):
wait = WebDriverWait(driver, 45)
driver.get("https://websets.exa.ai/websets/search")
search_query = f"{split['title']} at {split['company']} in {split['geography']}"
box = wait.until(EC.presence_of_element_located((By.XPATH, "//textarea | //input[contains(@placeholder, 'Describe')]")))
box.send_keys(search_query)
box.send_keys(Keys.ENTER)
time.sleep(10)
try:
email_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Email')]")))
driver.execute_script("arguments[0].click();", email_btn)
except:
time.sleep(5)
email_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Email')]")))
driver.execute_script("arguments[0].click();", email_btn)
start_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Start search')]")))
driver.execute_script("arguments[0].click();", start_btn)
def monitor_and_export(driver, split_data, index):
wait = WebDriverWait(driver, 30)
while True:
try:
stops = driver.find_elements(By.XPATH, "//button[contains(., 'Stop')]")
if not stops or not stops[0].is_displayed():
break
except: break
time.sleep(10)
initial_files = set(os.listdir(DOWNLOAD_DIR))
export_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Export')] | //button[contains(., 'Everything')]")))
driver.execute_script("arguments[0].click();", export_btn)
download_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Download')] | //div[contains(text(), 'Download CSV')]")))
driver.execute_script("arguments[0].click();", download_btn)
timestamp = datetime.now().strftime("%H%M%S")
start_wait = time.time()
while time.time() - start_wait < 45:
new_files = set(os.listdir(DOWNLOAD_DIR)) - initial_files
if new_files:
file_name = list(new_files)[0]
if not file_name.endswith('.crdownload'):
safe_role = "".join([c for c in split_data['title'] if c.isalnum() or c == ' ']).strip()
new_path = os.path.join(DOWNLOAD_DIR, f"{index}_{safe_role}_{timestamp}.csv")
os.rename(os.path.join(DOWNLOAD_DIR, file_name), new_path)
return
time.sleep(2)
def main():
doc = Document(SPLITS_PATH)
all_splits = []
for para in doc.paragraphs:
text = para.text.strip()
if "Title:" in text and "Company:" in text:
try:
title = text.split("Title:")[1].split("Company:")[0].strip()
company = text.split("Company:")[1].split("Geography:")[0].strip()
geography = text.split("Geography:")[1].strip() if "Geography:" in text else "USA"
all_splits.append({'title': title, 'company': company, 'geography': geography})
except: continue
i = get_last_progress()
while i < len(all_splits):
driver = None
try:
driver = get_driver()
mail = MailTM()
driver.get("https://auth.exa.ai/?callbackUrl=https%3A%2F%2Fwebsets.exa.ai%2F")
wait = WebDriverWait(driver, 30)
# Step 1: Submit Email
email_field = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='email']")))
email_field.send_keys(mail.address)
driver.find_element(By.XPATH, "//form//button[@type='submit']").click()
# Step 2: Get Code
v_code = mail.get_verification_code()
if not v_code:
driver.quit()
continue
# Step 3: Interaction Fix (Based on image)
print(f"✅ Code {v_code} found. Entering into UI...")
# Target the specific placeholder shown in the image
code_input = wait.until(EC.visibility_of_element_located((By.XPATH, "//input[contains(@placeholder, 'Enter verification code')]")))
# Use ActionChains to ensure focus before typing
actions = ActionChains(driver)
actions.move_to_element(code_input).click().send_keys(v_code).perform()
time.sleep(1)
# Target "VERIFY CODE" button shown in image
verify_btn = driver.find_element(By.XPATH, "//button[contains(., 'VERIFY CODE')]")
driver.execute_script("arguments[0].click();", verify_btn)
time.sleep(10) # Allow dashboard to load
# Search logic
h1 = driver.current_window_handle
start_search_sequence(driver, all_splits[i])
if i + 1 < len(all_splits):
driver.execute_script("window.open('https://websets.exa.ai/websets/search', 'tab2');")
driver.switch_to.window(driver.window_handles[1])
h2 = driver.current_window_handle
start_search_sequence(driver, all_splits[i+1])
driver.switch_to.window(h1)
monitor_and_export(driver, all_splits[i], i+1)
save_progress(i + 1)
driver.switch_to.window(h2)
monitor_and_export(driver, all_splits[i+1], i+2)
save_progress(i + 2)
i += 2
else:
monitor_and_export(driver, all_splits[i], i+1)
save_progress(i + 1)
i += 1
except Exception as e:
print(f"⚠ Batch Error: {e}")
time.sleep(5)
finally:
if driver: driver.quit()
if __name__ == "__main__":
main()
Я пытаюсь автоматизировать процесс входа в систему для auth.exa.ai с помощью Selenium и API временной почты (Mail.TM).
Сценарий успешно:
Создает временное электронное письмо
Запрашивает вход в Exa
Получает код подтверждения по электронной почте
Заполняет код во входных данных
Нажимает ПОДТВЕРДИТЬ КОД
Однако в этот момент exa.ai возвращает страницу с ошибкой:
Начало работы с Exa
Неверный доступ к странице входа. Перейдите к приложению, в которое хотите войти, и повторите попытку.
Страница ошибки
Подробнее здесь:
https://stackoverflow.com/questions/798 ... -incorrect
1769175283
Anonymous
[code]import os import time import requests import random import string import re from datetime import datetime from docx import Document from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.action_chains import ActionChains from webdriver_manager.chrome import ChromeDriverManager # --- SETTINGS --- DOWNLOAD_DIR = os.path.join(os.getcwd(), "downloads") SPLITS_PATH = r"D:\Python Automation Scripts\Scraping Script\splits.docx" PROGRESS_FILE = "progress.txt" AUTH_TIMEOUT = 45 if not os.path.exists(DOWNLOAD_DIR): os.makedirs(DOWNLOAD_DIR) def get_last_progress(): if os.path.exists(PROGRESS_FILE): with open(PROGRESS_FILE, "r") as f: content = f.read().strip() if content.isdigit(): return int(content) return 0 def save_progress(index): with open(PROGRESS_FILE, "w") as f: f.write(str(index)) class MailTM: def __init__(self, auth_timeout=60, retries=5): self.api_url = "https://api.mail.tm" self.auth_timeout = auth_timeout self.retries = retries self.address = None self.password = "ExaScraper2026!" self.token = None self.domain = None self.create_account_and_token() def create_account_and_token(self): try: # Step 1: Get a domain res = requests.get(f"{self.api_url}/domains").json() self.domain = res['hydra:member'][0]['domain'] # Step 2: Generate random email address self.address = f"{''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(12))}@{self.domain}" # Step 3: Create account resp = requests.post(f"{self.api_url}/accounts", json={"address": self.address, "password": self.password}) if resp.status_code not in [200, 201]: raise Exception(f"Account creation failed: {resp.text}") # Step 4: Get token with retries for attempt in range(self.retries): time.sleep(2) token_res = requests.post(f"{self.api_url}/token", json={"address": self.address, "password": self.password}).json() if 'token' in token_res: self.token = token_res['token'] print(f"✅ MailTM token obtained for {self.address}") return raise Exception("Failed to retrieve token after retries") except Exception as e: print(f"❌ MailTM Initialization Error: {e}") raise def get_verification_code(self): """ Polls the mailbox for a 6-digit verification code. Returns the code as soon as found, or None if timeout. """ if not self.token: raise Exception("No MailTM token available") headers = {"Authorization": f"Bearer {self.token}"} start_time = time.time() while time.time() - start_time < self.auth_timeout: try: time.sleep(2) # Poll every 2 seconds resp = requests.get(f"{self.api_url}/messages", headers=headers).json() messages = resp.get('hydra:member', []) if not messages: continue for msg in messages: msg_content = requests.get(f"{self.api_url}/messages/{msg['id']}", headers=headers).json() full_text = str(msg_content.get('text', '')) + str(msg_content.get('html', '')) # Debug: show email content print(f"📩 Polling MailTM: {full_text[:100]}...") # first 100 chars match = re.search(r'\b\d{6}\b', full_text) if match: code = match.group(0) print(f"✅ Verification code found: {code}") return code except Exception as e: print(f"⚠ MailTM Polling Error: {e}") print("❌ Verification code not received within timeout") return None def get_driver(): options = Options() # CRITICAL: For Headless interactability # options.add_argument("--headless=new") options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") options.add_argument("--disable-blink-features=AutomationControlled") options.add_experimental_option("excludeSwitches", ["enable-automation"]) prefs = {"download.default_directory": DOWNLOAD_DIR, "download.prompt_for_download": False} options.add_experimental_option("prefs", prefs) service = Service(ChromeDriverManager().install()) return webdriver.Chrome(service=service, options=options) def start_search_sequence(driver, split): wait = WebDriverWait(driver, 45) driver.get("https://websets.exa.ai/websets/search") search_query = f"{split['title']} at {split['company']} in {split['geography']}" box = wait.until(EC.presence_of_element_located((By.XPATH, "//textarea | //input[contains(@placeholder, 'Describe')]"))) box.send_keys(search_query) box.send_keys(Keys.ENTER) time.sleep(10) try: email_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Email')]"))) driver.execute_script("arguments[0].click();", email_btn) except: time.sleep(5) email_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Email')]"))) driver.execute_script("arguments[0].click();", email_btn) start_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Start search')]"))) driver.execute_script("arguments[0].click();", start_btn) def monitor_and_export(driver, split_data, index): wait = WebDriverWait(driver, 30) while True: try: stops = driver.find_elements(By.XPATH, "//button[contains(., 'Stop')]") if not stops or not stops[0].is_displayed(): break except: break time.sleep(10) initial_files = set(os.listdir(DOWNLOAD_DIR)) export_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Export')] | //button[contains(., 'Everything')]"))) driver.execute_script("arguments[0].click();", export_btn) download_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(., 'Download')] | //div[contains(text(), 'Download CSV')]"))) driver.execute_script("arguments[0].click();", download_btn) timestamp = datetime.now().strftime("%H%M%S") start_wait = time.time() while time.time() - start_wait < 45: new_files = set(os.listdir(DOWNLOAD_DIR)) - initial_files if new_files: file_name = list(new_files)[0] if not file_name.endswith('.crdownload'): safe_role = "".join([c for c in split_data['title'] if c.isalnum() or c == ' ']).strip() new_path = os.path.join(DOWNLOAD_DIR, f"{index}_{safe_role}_{timestamp}.csv") os.rename(os.path.join(DOWNLOAD_DIR, file_name), new_path) return time.sleep(2) def main(): doc = Document(SPLITS_PATH) all_splits = [] for para in doc.paragraphs: text = para.text.strip() if "Title:" in text and "Company:" in text: try: title = text.split("Title:")[1].split("Company:")[0].strip() company = text.split("Company:")[1].split("Geography:")[0].strip() geography = text.split("Geography:")[1].strip() if "Geography:" in text else "USA" all_splits.append({'title': title, 'company': company, 'geography': geography}) except: continue i = get_last_progress() while i < len(all_splits): driver = None try: driver = get_driver() mail = MailTM() driver.get("https://auth.exa.ai/?callbackUrl=https%3A%2F%2Fwebsets.exa.ai%2F") wait = WebDriverWait(driver, 30) # Step 1: Submit Email email_field = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='email']"))) email_field.send_keys(mail.address) driver.find_element(By.XPATH, "//form//button[@type='submit']").click() # Step 2: Get Code v_code = mail.get_verification_code() if not v_code: driver.quit() continue # Step 3: Interaction Fix (Based on image) print(f"✅ Code {v_code} found. Entering into UI...") # Target the specific placeholder shown in the image code_input = wait.until(EC.visibility_of_element_located((By.XPATH, "//input[contains(@placeholder, 'Enter verification code')]"))) # Use ActionChains to ensure focus before typing actions = ActionChains(driver) actions.move_to_element(code_input).click().send_keys(v_code).perform() time.sleep(1) # Target "VERIFY CODE" button shown in image verify_btn = driver.find_element(By.XPATH, "//button[contains(., 'VERIFY CODE')]") driver.execute_script("arguments[0].click();", verify_btn) time.sleep(10) # Allow dashboard to load # Search logic h1 = driver.current_window_handle start_search_sequence(driver, all_splits[i]) if i + 1 < len(all_splits): driver.execute_script("window.open('https://websets.exa.ai/websets/search', 'tab2');") driver.switch_to.window(driver.window_handles[1]) h2 = driver.current_window_handle start_search_sequence(driver, all_splits[i+1]) driver.switch_to.window(h1) monitor_and_export(driver, all_splits[i], i+1) save_progress(i + 1) driver.switch_to.window(h2) monitor_and_export(driver, all_splits[i+1], i+2) save_progress(i + 2) i += 2 else: monitor_and_export(driver, all_splits[i], i+1) save_progress(i + 1) i += 1 except Exception as e: print(f"⚠ Batch Error: {e}") time.sleep(5) finally: if driver: driver.quit() if __name__ == "__main__": main() [/code] Я пытаюсь автоматизировать процесс входа в систему для auth.exa.ai с помощью Selenium и API временной почты (Mail.TM). Сценарий успешно: [list] [*]Создает временное электронное письмо [*]Запрашивает вход в Exa [*]Получает код подтверждения по электронной почте [*]Заполняет код во входных данных [*]Нажимает ПОДТВЕРДИТЬ КОД [/list] Однако в этот момент exa.ai возвращает страницу с ошибкой: [b]Начало работы с Exa[/b] Неверный доступ к странице входа. Перейдите к приложению, в которое хотите войти, и повторите попытку. Страница ошибки Подробнее здесь: [url]https://stackoverflow.com/questions/79874454/selenium-login-flow-for-exa-ai-fails-with-the-login-page-was-accessed-incorrect[/url]