Мне удалось написать сценарий, который делает именно то, что я хочу. для этого нужно:
- Открыть сайт
- Перейти к каждой строке таблицы
- Наведите указатель мыши на правую сторону, чтобы сделать видимой кнопку «Открыть».
- Нажмите кнопку «Открыть»,, чтобы открыть страницу с информацией.
- Получите содержимое концептуальной страницы.
- Перейдите к следующей ячейке ниже и повторите действия, начиная с шага 3.
Код: Выделить всё
def extract_content_from_cell(driver: webdriver.Chrome, row_number: int) -> str:
"""
Extracts the content from a single table cell and returns the text content.
"""
print(f"Processing cell {row_number}...")
cell_xpath = f"//*[@id='notion-app']/div/div[1]/div/div[1]/main/div/div/div[3]/div[2]/div/div/div/div[3]/div[2]/div[{row_number}]/div/div[1]/div/div[2]/div/div"
print(f"Locating cell {row_number}...")
try:
cell_element = WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.XPATH, cell_xpath))
)
print(f"Cell {row_number} located successfully.")
except Exception as e:
print(f"Error locating cell {row_number}: {e}")
return ""
# if the row number is greater than 16, start scrolling the notion container
if row_number > 16:
for _ in range(10): # scroll the container up to 10 times, each time by 40 pixels
try:
scroll_notion_container(driver, cell_element, 40)
print(f"Hovered over cell {row_number}.")
break # stop scrolling once we successfully hover over the cell
except Exception as e:
print(
f"Scrolling down to bring cell {row_number} into view: {e}")
# hover over the cell again after scrolling
hover_over_element(driver, cell_element)
# locate and click the 'Open in side peek' button
print(f"Locating the 'Open in side peek' button for cell {row_number}...")
try:
open_button = WebDriverWait(driver, 15).until(
EC.element_to_be_clickable(
(By.XPATH, "//div[@aria-label='Open in side peek']"))
)
print(
f"Clicking the 'Open in side peek' button for cell {row_number}...")
open_button.click()
except Exception as e:
print(f"Button not visible for cell {row_number}, error: {e}")
return ""
time.sleep(4)
# extract the text content from the opened page
print(f"Extracting text from the side page for cell {row_number}...")
try:
content_element = WebDriverWait(driver, 15).until(
EC.presence_of_element_located(
(By.CLASS_NAME, "notion-page-content"))
)
page_text = content_element.text
print(f"Extracted text content for cell {row_number}.")
return page_text
except Exception as e:
print(f"Error extracting content from cell {row_number}: {e}")
return ""
Код: Выделить всё
def get_total_rows(driver: webdriver.Chrome,
table_xpath: str) -> int:
"""
Returns the total number of rows in the Notion table.
"""
print("Determining the total number of rows in the table...")
rows = driver.find_elements(By.XPATH, table_xpath)
total_rows = len(rows)
print(f"Total rows in the table: {total_rows}")
return total_rows
Подробнее здесь: https://stackoverflow.com/questions/790 ... -with-sele