Как использовать библиотеку win32com для изменения документа Word и сохранения его в новом файле Word

Как использовать библиотеку win32com для изменения документа Word и сохранения его в новом файле Word ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Как использовать библиотеку win32com для изменения документа Word и сохранения его в новом файле Word

Цитата

Сообщение Anonymous » 12 дек 2025, 05:41

Я использую Python с pywin32 для поиска и замены текста в документе Word (a.docx).
Код находит целевую строку «123», но когда я сохраняю документ с замененными строками в новый файл (b.docx), содержимое остается неизменным — вместо замененного текста появляется исходный текст.
Мой текущий код перебирает все диапазоны историй, а также проверяет фигуры в верхние/нижние колонтитулы:
Проблемное поведение:

Входной файл (a.docx) содержит: "Dahio1213dnuaidhaoi123455566-090-3211231sdasdo" + символы новой строки + "123" + новые строки + "123"
После запуска сценария выходной файл (b.docx) имеет идентичное содержимое (без замен)
Напечатанный счетчик замен показывает ненулевое число, что указывает на то, что совпадения были найдены.
Разрешения файла нормальные; диалоговые окна Word не блокируются

Что я пробовал:

Использование doc.SaveAs2() с FileFormat=16 (wdFormatDocx)
Итерация по StoryRange с помощью NextStoryRange
Включение замены Shape.TextFrame в верхние и нижние колонтитулы
Закрытие документа без сохранения перед SaveAs2

Вызов Find.Execute() с replace=2 (wdReplaceAll) кажется успешным, но фактические замены не сохраняются в сохраненном файле.
Как я могу гарантировать поиск и изменения «Заменить» действительно сохраняются?
Вот мой код
import win32com.client
from pathlib import Path

def replace_text(input_file: str, output_file: str, search_text: str, replace_text: str) -> int:
word = None
doc = None
try:
word = win32com.client.Dispatch("Word.Application")
word.Visible = False
word.DisplayAlerts = False

doc = word.Documents.Open(str(Path(input_file)))

wd_find_continue = 1
wd_replace_all = 2
wd_format_docx = 16

total_replaced = 0
story = doc.StoryRanges(1)
while story is not None:
rng = story
finder = rng.Find
finder.ClearFormatting()
finder.Text = search_text
finder.Replacement.Text = replace_text
finder.MatchCase = False
finder.MatchWholeWord = False
finder.MatchWildcards = False
finder.MatchSoundsLike = False
finder.MatchAllWordForms = False
finder.Forward = True
finder.Wrap = wd_find_continue

occurrences = rng.Text.lower().count(search_text.lower())
if occurrences:
finder.Execute(FindText=search_text, ReplaceWith=replace_text, Replace=wd_replace_all)
total_replaced += occurrences

story = story.NextStoryRange

doc.SaveAs2(str(Path(output_file)), FileFormat=wd_format_docx)
return total_replaced
except Exception as e:
print(f"Error: Failed to replace text in document. Details: {str(e)}")
return 0
finally:
if doc is not None:
try:
doc.Close(SaveChanges=False)
except Exception:
pass
if word is not None:
try:
word.Quit()
except Exception:
pass

def main():
input_file = r"E:\Document\\a.docx"
output_file = r"E:\Document\\b.docx"
search_text = "123"
replace_text_value = "abc"

count = replace_text(input_file, output_file, search_text, replace_text_value)
print(f"count: {count}")

if __name__ == "__main__":
main()

Я пытаюсь обрабатывать документы Word с помощью Python-docx, и производительность хорошая.
Вот мой код.
from pathlib import Path
import re
from typing import Iterable

from docx import Document
from docx.text.paragraph import Paragraph
from docx.text.run import Run

def replace_text(input_file: str, output_file: str, search_text: str, replace_with: str) -> int:
"""Case-insensitive find/replace for paragraphs, tables, headers, and footers using python-docx."""
pattern = re.compile(re.escape(search_text), re.IGNORECASE)
total_replaced = 0

def replace_in_runs(runs: Iterable[Run]):
nonlocal total_replaced
for run in runs:
new_text, found = pattern.subn(replace_with, run.text)
if found:
run.text = new_text
total_replaced += found

def replace_in_paragraphs(paragraphs: Iterable[Paragraph]):
for para in paragraphs:
replace_in_runs(para.runs)

try:
doc = Document(str(Path(input_file)))

replace_in_paragraphs(doc.paragraphs)
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
replace_in_paragraphs(cell.paragraphs)

for section in doc.sections:
replace_in_paragraphs(section.header.paragraphs)
for table in section.header.tables:
for row in table.rows:
for cell in row.cells:
replace_in_paragraphs(cell.paragraphs)

replace_in_paragraphs(section.footer.paragraphs)
for table in section.footer.tables:
for row in table.rows:
for cell in row.cells:
replace_in_paragraphs(cell.paragraphs)

doc.save(str(Path(output_file)))
return total_replaced
except Exception as e:
print(f"Error: Failed to replace text in document. Details: {str(e)}")
return 0

def main():
input_file = r"E:\Document\\a.docx"
output_file = r"E:\Document\\b.docx"
search_text = "123"
replace_text_value = "abc"

count = replace_text(input_file, output_file, search_text, replace_text_value)
print(f"count: {count}")

if __name__ == "__main__":
main()

Подробнее здесь: https://stackoverflow.com/questions/798 ... to-a-new-w

1765507282

Anonymous

Я использую Python с pywin32 для поиска и замены текста в документе Word (a.docx).
Код находит целевую строку «123», но когда я сохраняю документ с замененными строками в новый файл (b.docx), содержимое остается неизменным — вместо замененного текста появляется исходный текст.
Мой текущий код перебирает все диапазоны историй, а также проверяет фигуры в верхние/нижние колонтитулы:
Проблемное поведение:
[list]
[*]Входной файл (a.docx) содержит: "Dahio1213dnuaidhaoi123455566-090-3211231sdasdo" + символы новой строки + "123" + новые строки + "123"

[*]После запуска сценария выходной файл (b.docx) имеет идентичное содержимое (без замен)

[*]Напечатанный счетчик замен показывает ненулевое число, что указывает на то, что совпадения были найдены.

[*]Разрешения файла нормальные; диалоговые окна Word не блокируются

[/list]
Что я пробовал:
[list]
[*]Использование doc.SaveAs2() с FileFormat=16 (wdFormatDocx)

[*]Итерация по StoryRange с помощью NextStoryRange

[*]Включение замены Shape.TextFrame в верхние и нижние колонтитулы

[*]Закрытие документа без сохранения перед SaveAs2

[/list]
Вызов Find.Execute() с replace=2 (wdReplaceAll) кажется успешным, но фактические замены не сохраняются в сохраненном файле.
Как я могу гарантировать поиск и изменения «Заменить» действительно сохраняются?
Вот мой код
import win32com.client
from pathlib import Path

def replace_text(input_file: str, output_file: str, search_text: str, replace_text: str) -> int:
word = None
doc = None
try:
word = win32com.client.Dispatch("Word.Application")
word.Visible = False
word.DisplayAlerts = False

doc = word.Documents.Open(str(Path(input_file)))

wd_find_continue = 1
wd_replace_all = 2
wd_format_docx = 16

total_replaced = 0
story = doc.StoryRanges(1)
while story is not None:
rng = story
finder = rng.Find
finder.ClearFormatting()
finder.Text = search_text
finder.Replacement.Text = replace_text
finder.MatchCase = False
finder.MatchWholeWord = False
finder.MatchWildcards = False
finder.MatchSoundsLike = False
finder.MatchAllWordForms = False
finder.Forward = True
finder.Wrap = wd_find_continue

occurrences = rng.Text.lower().count(search_text.lower())
if occurrences:
finder.Execute(FindText=search_text, ReplaceWith=replace_text, Replace=wd_replace_all)
total_replaced += occurrences

story = story.NextStoryRange

doc.SaveAs2(str(Path(output_file)), FileFormat=wd_format_docx)
return total_replaced
except Exception as e:
print(f"Error: Failed to replace text in document.  Details: {str(e)}")
return 0
finally:
if doc is not None:
try:
doc.Close(SaveChanges=False)
except Exception:
pass
if word is not None:
try:
word.Quit()
except Exception:
pass

def main():
input_file = r"E:\Document\\a.docx"
output_file = r"E:\Document\\b.docx"
search_text = "123"
replace_text_value = "abc"

count = replace_text(input_file, output_file, search_text, replace_text_value)
print(f"count: {count}")

if __name__ == "__main__":
main()

Я пытаюсь обрабатывать документы Word с помощью Python-docx, и производительность хорошая.
Вот мой код.
from pathlib import Path
import re
from typing import Iterable

from docx import Document
from docx.text.paragraph import Paragraph
from docx.text.run import Run

def replace_text(input_file: str, output_file: str, search_text: str, replace_with: str) -> int:
"""Case-insensitive find/replace for paragraphs, tables, headers, and footers using python-docx."""
pattern = re.compile(re.escape(search_text), re.IGNORECASE)
total_replaced = 0

def replace_in_runs(runs: Iterable[Run]):
nonlocal total_replaced
for run in runs:
new_text, found = pattern.subn(replace_with, run.text)
if found:
run.text = new_text
total_replaced += found

def replace_in_paragraphs(paragraphs: Iterable[Paragraph]):
for para in paragraphs:
replace_in_runs(para.runs)

try:
doc = Document(str(Path(input_file)))

replace_in_paragraphs(doc.paragraphs)
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
replace_in_paragraphs(cell.paragraphs)

for section in doc.sections:
replace_in_paragraphs(section.header.paragraphs)
for table in section.header.tables:
for row in table.rows:
for cell in row.cells:
replace_in_paragraphs(cell.paragraphs)

replace_in_paragraphs(section.footer.paragraphs)
for table in section.footer.tables:
for row in table.rows:
for cell in row.cells:
replace_in_paragraphs(cell.paragraphs)

doc.save(str(Path(output_file)))
return total_replaced
except Exception as e:
print(f"Error: Failed to replace text in document. Details: {str(e)}")
return 0

def main():
input_file = r"E:\Document\\a.docx"
output_file = r"E:\Document\\b.docx"
search_text = "123"
replace_text_value = "abc"

count = replace_text(input_file, output_file, search_text, replace_text_value)
print(f"count: {count}")

if __name__ == "__main__":
main()
 

Подробнее здесь: [url]https://stackoverflow.com/questions/79843781/how-to-use-the-win32com-library-to-modify-a-word-document-and-save-it-to-a-new-w[/url]