Веб-очистка панели мониторинга PowerBI с помощью PythonPython

Программы на Python
Ответить Пред. темаСлед. тема
Гость
 Веб-очистка панели мониторинга PowerBI с помощью Python

Сообщение Гость »

Я пытаюсь выполнить веб-очистку этой панели мониторинга Power BI. Мне нужна вся таблица внизу, содержащая всю информацию о требованиях к лицензированию.
Я использовал Scrapingdog, чтобы очистить веб-страницу. Код ниже успешно извлекает столбцы «Имя задания» и «Состояние». Однако все остальные фрагменты кода извлекают заголовки столбцов, а не фактические значения, хотя код не изменился. Ошибка также указана ниже.
import time
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver

option = webdriver.ChromeOptions()
option.add_argument("--start-maximized")
driver = webdriver.Chrome(options=option)
wait = WebDriverWait(driver, 10)

# Load the page
driver.get("https://api.scrapingdog.com/scrape?api_ ... amic=false")

job_name_data_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Job Name"]')))
# Extract the text from the Job Name data element
job_name_data = job_name_data_element.text

## state
state_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="State"]')))
state_data = state_column_element.text

## license
licensed_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Licensed"]')))
licensed_data = licensed_column_element.text

## education
education_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Education Requirement"]')))
education_data = education_column_element.text

## training
training_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Amount of Training Required [In Hours]"]')))
training_data = training_column_element.text

## experience
experience_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Amount of Experience Required"]')))
experience_data = experience_column_element.text

## pro exam
exam_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Professional Exam"]')))
exam_data = exam_column_element.text

## renewal time
renewal_time_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Required Time of License Renewal (In Years)"]')))
renewal_time_data = renewal_time_column_element.text

## continious education
continious_education_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Continuing Education Requirement"]')))
continious_education_column_element_data = continious_education_column_element.text

## additional exams
additional_exams_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Additional Required Exams"]')))
additional_exams_column_element_data = additional_exams_column_element.text

## continious education
cost_of_licensure_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Cost of Initial Licensure (In Dollars)"]')))
cost_of_licensure_column_element_data = cost_of_licensure_column_element.text

## license renewal
license_renewal_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Cost of License Renewal (In Dollars)"]')))
license_renewal_column_element_data = license_renewal_column_element.text

## reciprocity
reciprocity_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Reciprocity or Endorsement"]')))
reciprocity_column_element_data = reciprocity_column_element.text

## character
character_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Good Moral Character Requirement"]')))
character_column_element_data = character_column_element.text

## blanket ban
ban_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Blanket Ban for Ex-Offenders"]')))
ban_column_element_data = ban_column_element.text

## rehab
rehab_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Rehabilitation Requirement"]')))
rehab_column_element_data = rehab_column_element.text

## rehab
rehab_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Rehabilitation Requirement"]')))
rehab_column_element_data = rehab_column_element.text

## relationship
relationship_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Relationship between Offense and Occupation"]')))
relationship_column_element_data = relationship_column_element.text

## Limitations
limitations_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Limitations on Scope of Inquiry"]')))
limitations_column_element_data = limitations_column_element.text

## age
age_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Minimum Age (In Years)"]')))
age_column_element_data = age_column_element.text

# Define the column names
columns = [
"Job Name", "State", "Licensed", "Education Requirement",
"Amount of Training Required [In Hours]", "Amount of Experience Required",
"Professional Exam", "Required Time of License Renewal (In Years)",
"Continuing Education Requirement", "Additional Required Exams",
"Cost of Initial Licensure (In Dollars)", "Cost of License Renewal (In Dollars)",
"Reciprocity or Endorsement", "Good Moral Character Requirement",
"Blanket Ban for Ex-Offenders", "Rehabilitation Requirement",
"Relationship between Offense and Occupation",
"Limitations on Scope of Inquiry", "Minimum Age (In Years)"
]

# Define the CSS selectors for each column
css_selectors = {
"Job Name": 'div[aria-label="Job Name"]',
"State": 'div[aria-label="State"]'}

"Licensed": 'div[aria-label="Licensed"]',
"Education Requirement": 'div[aria-label="Education Requirement"]',
"Amount of Training Required [In Hours]": 'div[aria-label="Amount of Training Required [In Hours]"]',
"Amount of Experience Required": 'div[aria-label="Amount of Experience Required"]',
"Professional Exam": 'div[aria-label="Professional Exam"]',
"Required Time of License Renewal (In Years)": 'div[aria-label="Required Time of License Renewal (In Years)"]',
"Continuing Education Requirement": 'div[aria-label="Continuing Education Requirement"]',
"Additional Required Exams": 'div[aria-label="Additional Required Exams"]',
"Cost of Initial Licensure (In Dollars)": 'div[aria-label="Cost of Initial Licensure (In Dollars)"]',
"Cost of License Renewal (In Dollars)": 'div[aria-label="Cost of License Renewal (In Dollars)"]',
"Reciprocity or Endorsement": 'div[aria-label="Reciprocity or Endorsement"]',
"Good Moral Character Requirement": 'div[aria-label="Good Moral Character Requirement"]',
"Blanket Ban for Ex-Offenders": 'div[aria-label="Blanket Ban for Ex-Offenders"]',
"Rehabilitation Requirement": 'div[aria-label="Rehabilitation Requirement"]',
"Relationship between Offense and Occupation": 'div[aria-label="Relationship between Offense and Occupation"]',
"Limitations on Scope of Inquiry": 'div[aria-label="Limitations on Scope of Inquiry"]',
"Minimum Age (In Years)": 'div[aria-label="Minimum Age (In Years)"]'
}

# Initialize an empty dictionary to store column data
data = {}

# Scrape data for each column
for column_name, selector in css_selectors.items():
element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, selector)))
data[column_name] = element.text

import pandas as pd
# Create a DataFrame from the scraped data
df = pd.DataFrame([data])


job_name_data
'Athletic Trainer\nAuctioneer\nBarber\nBuilding Inspector\nCertified Nursing Assistant\nChild, Family, and School Social Workers\nCommercial Fisherman\nCosmetologist\nDental Hygentist\nDietician\nDrinking Water Treatment Plant Operator (Grade 1)\nElectrician\nEmergency Medical Technicians (EMT)\nFuneral Service Director\nGeneral Contractor\nHeavy Tractor Trailer Truck Drivers'
>>> education_column_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Education Requirement"]')))
education_data = education_column_element.text
education_data
Traceback (most recent call last):
File "", line 1, in
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/selenium/webdriver/support/wait.py", line 105, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
Stacktrace:
0 chromedriver 0x00000001050683e8 chromedriver + 4326376
1 chromedriver 0x00000001050608b0 chromedriver + 4294832
2 chromedriver 0x0000000104c8c088 chromedriver + 278664
3 chromedriver 0x0000000104ccea80 chromedriver + 551552
4 chromedriver 0x0000000104d074f8 chromedriver + 783608
5 chromedriver 0x0000000104cc34e4 chromedriver + 505060
6 chromedriver 0x0000000104cc3f5c chromedriver + 507740
7 chromedriver 0x000000010502b984 chromedriver + 4077956
8 chromedriver 0x000000010503073c chromedriver + 4097852
9 chromedriver 0x0000000105012528 chromedriver + 3974440
10 chromedriver 0x0000000105031054 chromedriver + 4100180
11 chromedriver 0x0000000105003b18 chromedriver + 3914520
12 chromedriver 0x000000010505165c chromedriver + 4232796
13 chromedriver 0x00000001050517d8 chromedriver + 4233176
14 chromedriver 0x0000000105060524 chromedriver + 4293924
15 libsystem_pthread.dylib 0x000000019e78826c _pthread_start + 148
16 libsystem_pthread.dylib 0x000000019e78308c thread_start + 8

>>> education_data = education_column_element.text
Traceback (most recent call last):
File "", line 1, in
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/selenium/webdriver/remote/webelement.py", line 90, in text
return self._execute(Command.GET_ELEMENT_TEXT)["value"]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/selenium/webdriver/remote/webelement.py", line 395, in _execute
return self._parent.execute(command, params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/selenium/webdriver/remote/webdriver.py", line 347, in execute
self.error_handler.check_response(response)
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/selenium/webdriver/remote/errorhandler.py", line 229, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: disconnected: not connected to DevTools
(failed to check if window was closed: disconnected: not connected to DevTools)
(Session info: chrome=123.0.6312.59)
Stacktrace:
0 chromedriver 0x0000000100f543e8 chromedriver + 4326376
1 chromedriver 0x0000000100f4c8b0 chromedriver + 4294832
2 chromedriver 0x0000000100b78088 chromedriver + 278664
3 chromedriver 0x0000000100b60da4 chromedriver + 183716
4 chromedriver 0x0000000100b60ce0 chromedriver + 183520
5 chromedriver 0x0000000100bf2c28 chromedriver + 781352
6 chromedriver 0x0000000100baf4e4 chromedriver + 505060
7 chromedriver 0x0000000100baff5c chromedriver + 507740
8 chromedriver 0x0000000100f17984 chromedriver + 4077956
9 chromedriver 0x0000000100f1c73c chromedriver + 4097852
10 chromedriver 0x0000000100efe528 chromedriver + 3974440
11 chromedriver 0x0000000100f1d054 chromedriver + 4100180
12 chromedriver 0x0000000100eefb18 chromedriver + 3914520
13 chromedriver 0x0000000100f3d65c chromedriver + 4232796
14 chromedriver 0x0000000100f3d7d8 chromedriver + 4233176
15 chromedriver 0x0000000100f4c524 chromedriver + 4293924
16 libsystem_pthread.dylib 0x000000019e78826c _pthread_start + 148
17 libsystem_pthread.dylib 0x000000019e78308c thread_start + 8

>>> education_data
'Education Requirement'
>>>


Подробнее здесь: https://stackoverflow.com/questions/782 ... ith-python
Реклама
Ответить Пред. темаСлед. тема

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

  • Похожие темы
    Ответы
    Просмотры
    Последнее сообщение
  • Очистка отчета PowerBI, встроенного в веб-сайт, с помощью Python
    Anonymous » » в форуме Python
    0 Ответы
    9 Просмотры
    Последнее сообщение Anonymous
  • Изменение панели управления PowerBI с помощью Python
    Anonymous » » в форуме Python
    0 Ответы
    15 Просмотры
    Последнее сообщение Anonymous
  • Как использовать itables с Quarto при создании панели мониторинга в Python?
    Anonymous » » в форуме Python
    0 Ответы
    8 Просмотры
    Последнее сообщение Anonymous
  • Как программно скопировать макет панели мониторинга в другую в Grafana?
    Anonymous » » в форуме Python
    0 Ответы
    26 Просмотры
    Последнее сообщение Anonymous
  • Как обновить метки осей на панели мониторинга Plotly?
    Anonymous » » в форуме Python
    0 Ответы
    22 Просмотры
    Последнее сообщение Anonymous

Вернуться в «Python»