def processHtml2(html, entryName): """ html and entryName are string objects """ soup = BeautifulSoup(html, 'html.parser') for d in soup.select('section[data-mw-section-id] > h2[id]'): if d.text != 'English': d.parent.extract() else: d.clear() d.string = entryName
for d in soup.select('[id^="Derived_terms"], [id^="Descendants"], [id^="Related_terms"], [id^="See_also"], [id^="Anagrams"], [id^="Alternative_forms"], [property="mw:PageProp/toc"], .disambig-see-also, .audiotable, [id^="Translations"], [id^="Usage_notes"], [id^="Pronunciation_notes"], [id^="Particle"], [id^="Interjection"], [id^="Hyponyms"], [id^="Notes"], [id^="Further_reading"]'): d.parent.extract()
style = ' '
content = str(soup) content = "\n".join(line.strip() for line in content.split('\n') if line) content = content.replace('\n', ' ') content = entryName + ' ' + style + ' ' + content + ' \n'
return(content)
with open(outputHtmlDir, "w", encoding = "utf-8") as g: html = processHtml2(response.text, entryName) g.write(html) [/code] Выше я удаляю родителей определенных элементов из DOM с помощью [code] for d in soup.select('[id^="Derived_terms"], [id^="Alternative_forms"], [id^="Descendants"], [id^="Related_terms"], [id^="See_also"], [id^="Anagrams"], [property="mw:PageProp/toc"], .disambig-see-also, .audiotable, [id^="Translations"], [id^="Usage_notes"], [id^="Pronunciation_notes"], [id^="Particle"], [id^="Interjection"], [id^="Hyponyms"], [id^="Notes"], [id^="Further_reading"]'): d.parent.extract() [/code] Как мы можем добиться того же эффекта с помощью Javascript? Таким образом, нам не нужно изменять HTML. Я пробовал [code]var matches = document.querySelectorAll('[id^="Alternative_forms"]'); matches.forEach(node => node.parentNode?.remove()); [/code] Но это не имеет никакого эффекта: [img]https://i.sstatic.net/OlvJT0Q1.png[/img]