from jinja2 import Environment, PackageLoader, select_autoescape
environment = Environment()
EXAMPLE_BODY_1 = """
Example
Example Contract Term
Sometimes the example_field is in the middle so needs to be lowercase,
{{ example_field }} is a good example.
- All terms in this example are binding.
The terms are made up as this is an example.
Terms can always change.
{{ example_field }} can also be at the start of a sentence to therefore
needs to be capitalised.
- Sometimes it's like the first example.
The terms are made up as this is an example.
Terms can always change.
"""
def capitalize_if_start_of_html_sentence(value, html_content):
# Normalize whitespace and remove HTML tags
content = ' '.join(html_content.split())
# Split the content into sentences using common sentence-ending punctuation
sentences = content.split('. ')
for sentence in sentences:
# Further split by other sentence-ending punctuation
sub_sentences = sentence.split('? ')
for sub_sentence in sub_sentences:
sub_sub_sentences = sub_sentence.split('! ')
for sub_sub_sentence in sub_sub_sentences:
# Check if the sub-sentence starts with the example_field placeholder
if sub_sub_sentence.startswith('{{ example_field }}'):
return value.capitalize()
return value.lower()
# Add the custom filter to the environment
environment.filters['capitalize_if_start_of_html_sentence'] = lambda value: capitalize_if_start_of_html_sentence(value, EXAMPLE_BODY_1)
# Create a template from string
template = environment.from_string(EXAMPLE_BODY_1)
example_merge_field = "pots and pans"
# Render the template with the custom filter applied
print(template.render(example_field=example_merge_field))
# Apply the filter manually to the example_field
capitalized_example_field = capitalize_if_start_of_html_sentence(example_merge_field, EXAMPLE_BODY_1)
# Render the template with the manually capitalized field
print(template.render(example_field=capitalized_example_field))
Я также попытался использовать htmlparser, так как содержание данных действительно имеет правильный {{exater_field}}, затем извлеченный текст, с которым я мог бы начать, но я не могу его разобраться.
from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
print("Encountered a start tag:", tag)
def handle_data(self, data):
print("Data :", data)
< /code>
Это то, чего я пытаюсь достичь: < /p>
# Expected output
Example
Example Contract Term
Sometimes the example_field is in the middle so needs to be lowercase,
pots and pans is a good example.
- All terms in this example are binding.
The terms are made up as this is an example.
Terms can always change.
Pots and pans can also be at the start of a sentence to therefore
needs to be capitalised.
- Sometimes it's like the first example.
The terms are made up as this is an example.
Terms can always change.
Подробнее здесь: https://stackoverflow.com/questions/793 ... nding-on-f