Проблемы с сохранением данных в действиях GitHub

Проблемы с сохранением данных в действиях GitHub ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Проблемы с сохранением данных в действиях GitHub

Цитата

Сообщение Anonymous » 15 янв 2026, 16:02

Пытаюсь развернуть свой код Python в GitHub Actions для удаленного выполнения, но файлы не создаются. После успешного запуска рабочего процесса я получаю сообщение об ошибке, в котором говорится, что фиксировать нечего. Я выполнил необходимую настройку рабочего процесса для фиксации и отправки целевой папки.
Я доказал, что рабочий процесс работает так, как задумано, создав фиктивный код и запустив его в отдельном репозитории, который успешно работает, создает и сохраняет файлы по назначению. Полагая, что проблемы есть в моем коде - он отлично работает локально, - я подключил фиктивный код к основной функции и получаю тот же результат: ничего не создается.
Чего мне не хватает?
Вот код Python:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin
import time
import random
import datetime
import os

def scrap_page(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
items=soup.find_all('article', class_='prd _fb _p col c-prd')

products = []
for soup_item in items:
stock = float(soup_item.find("div", class_ = "stk").text.split()[0]) if soup_item.find("div", class_="stk") else 0
#stock = float(stock.split()[0])
for item in soup_item.find_all('div', class_='info'):

# Extract Name (h3 tag)
name = item.find('h3', class_='name').text.strip() if item.find('h3', class_='name') else "None"

# Extract Current Price
price_raw = item.find('div', class_='prc')
# Clean price: remove 'KSh' and ','
price = int(price_raw.text.replace('KSh', '').replace(',', '').split('-')[0].strip()) if item.find('div', class_='prc') else 0

# Extract Old Price (may not exist)
old_price_tag = item.find('div', class_='old')
old_price = int(old_price_tag.text.replace('KSh', '').replace(',', '').split('-')[0].strip()) if old_price_tag else 0

# Extract Discount %
discount_tag = item.find('div', class_='bdg _dsct _sm')
discount = int(discount_tag.text.replace('%', '').strip()) if discount_tag else 0

# Extract Rating & Review Count
rev_div = item.find('div', class_='rev')
if rev_div:
# The text before the "(" is the rating (e.g., "3.9 out of 5")
rating = list(rev_div.stripped_strings)[0]
rating = float(rating.split()[0])
# The text inside the "()" is the review count
# We strip the parentheses and convert to int
reviews = rev_div.text.split('(')[-1].replace(')', '').strip()
else:
rating, reviews = 0,0

products.append({
'datetime':datetime.datetime.now(),
"name": name,
"price": price,
"old_price": old_price,
"discount": discount,
"rating": rating,
"reviews": int(reviews),
"stock_left": stock
})
page_links = soup.find("div", class_="pg-w -ptm -pbxl")
next_page_url = None
if page_links:
next_tag = page_links.find('a', attrs={'aria-label': 'Next Page'})
if next_tag:
next_page_url = urljoin("https://www.jumia.co.ke", next_tag.get('href'))

return products, next_page_url

def main():
base_url = "https://www.jumia.co.ke/flash-sales/"
current_url = base_url

while current_url:
wait_time = random.uniform(1, 5)
print(f"Waiting for {wait_time:.2f} seconds...")
time.sleep(wait_time)
print(f"Scraping: {current_url}")
products, next_page_url = scrap_page(current_url)

df = pd.DataFrame(products)
df.to_csv('scripts/data/Jumiaproducts.csv', index=False)

#dummy code
my_data = {
'name': None,
'age': None,
'city': None,
}

# with open('scripts/data/data.pkl', 'wb') as f:
# pickle.dump(my_data, f)
df2 = pd.DataFrame(my_data,index=[0])
df2.to_csv("scripts/data/data.csv")

current_url = next_page_url

try:
main()
print("Scraping completed successfully!")
except KeyboardInterrupt:
print("\nScraper stopped by user.")
except Exception as e:
print(f"An error occurred: {e}")

Вот рабочий процесс YAML:
name: scrapper
on:
workflow_dispatch:
permissions:
contents: write
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11.4"
cache: "pip"
- name: Install dependencies
run: |
pip install -r ./scripts/requirements.txt
- name: run script
run: python ./scripts/scraper.py
- name: save doc
uses: actions/upload-artifact@v4
with:
name: save-data
path: |
scripts/data
- name: Commit and Push changes
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git add scripts/data
git commit -m "Add generated artifact"
git push

Подробнее здесь: https://stackoverflow.com/questions/798 ... ub-actions

1768482154

Anonymous

Пытаюсь развернуть свой код Python в GitHub Actions для удаленного выполнения, но файлы не создаются. После успешного запуска рабочего процесса я получаю сообщение об ошибке, в котором говорится, что фиксировать нечего. Я выполнил необходимую настройку рабочего процесса для фиксации и отправки целевой папки.
Я доказал, что рабочий процесс работает так, как задумано, создав фиктивный код и запустив его в отдельном репозитории, который успешно работает, создает и сохраняет файлы по назначению.  Полагая, что проблемы есть в моем коде - он отлично работает локально, - я подключил фиктивный код к основной функции и получаю тот же результат: ничего не создается.
Чего мне не хватает?
Вот код Python:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin
import time
import random
import datetime
import os

def scrap_page(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
items=soup.find_all('article', class_='prd _fb _p col c-prd')

products = []
for soup_item in  items:
stock = float(soup_item.find("div", class_ = "stk").text.split()[0]) if soup_item.find("div", class_="stk") else 0
#stock = float(stock.split()[0])
for item in soup_item.find_all('div', class_='info'):

# Extract Name (h3 tag)
name = item.find('h3', class_='name').text.strip() if item.find('h3', class_='name') else "None"

# Extract Current Price
price_raw = item.find('div', class_='prc')
# Clean price: remove 'KSh' and ','
price = int(price_raw.text.replace('KSh', '').replace(',', '').split('-')[0].strip()) if item.find('div', class_='prc') else 0

# Extract Old Price (may not exist)
old_price_tag = item.find('div', class_='old')
old_price = int(old_price_tag.text.replace('KSh', '').replace(',', '').split('-')[0].strip()) if old_price_tag else 0

# Extract Discount %
discount_tag = item.find('div', class_='bdg _dsct _sm')
discount = int(discount_tag.text.replace('%', '').strip()) if discount_tag else 0

# Extract Rating & Review Count
rev_div = item.find('div', class_='rev')
if rev_div:
# The text before the "(" is the rating (e.g., "3.9 out of 5")
rating = list(rev_div.stripped_strings)[0]
rating = float(rating.split()[0])
# The text inside the "()"  is the review count
# We strip the parentheses and convert to int
reviews = rev_div.text.split('(')[-1].replace(')', '').strip()
else:
rating, reviews = 0,0

products.append({
'datetime':datetime.datetime.now(),
"name": name,
"price": price,
"old_price": old_price,
"discount": discount,
"rating": rating,
"reviews": int(reviews),
"stock_left": stock
})
page_links = soup.find("div", class_="pg-w -ptm -pbxl")
next_page_url = None
if page_links:
next_tag = page_links.find('a', attrs={'aria-label': 'Next Page'})
if next_tag:
next_page_url = urljoin("https://www.jumia.co.ke", next_tag.get('href'))

return products, next_page_url

def main():
base_url = "https://www.jumia.co.ke/flash-sales/"
current_url = base_url

while current_url:
wait_time = random.uniform(1, 5)
print(f"Waiting for {wait_time:.2f} seconds...")
time.sleep(wait_time)
print(f"Scraping: {current_url}")
products, next_page_url = scrap_page(current_url)

df = pd.DataFrame(products)
df.to_csv('scripts/data/Jumiaproducts.csv', index=False)

#dummy code
my_data = {
'name': None,
'age': None,
'city': None,
}

# with open('scripts/data/data.pkl', 'wb') as f:
#     pickle.dump(my_data, f)
df2 = pd.DataFrame(my_data,index=[0])
df2.to_csv("scripts/data/data.csv")

current_url = next_page_url

try:
main()
print("Scraping completed successfully!")
except KeyboardInterrupt:
print("\nScraper stopped by user.")
except Exception as e:
print(f"An error occurred: {e}")

Вот рабочий процесс YAML:
name: scrapper
on:
workflow_dispatch:
permissions:
contents: write
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11.4"
cache: "pip"
- name: Install dependencies
run: |
pip install -r ./scripts/requirements.txt
- name: run script
run: python ./scripts/scraper.py
- name: save doc
uses: actions/upload-artifact@v4
with:
name: save-data
path: |
scripts/data
- name: Commit and Push changes
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git add scripts/data
git commit -m "Add generated artifact"
git push
 

Подробнее здесь: [url]https://stackoverflow.com/questions/79868581/trouble-saving-data-in-github-actions[/url]