Короче говоря, мой код сопоставляет все экземпляры специальных слов, чтобы найти диапазон. Затем я сопоставляю символы, чтобы найти их диапазон, а затем перебираю список совпадений и удаляю все символы, которые также существуют в диапазоне найденных слов.
код
Код: Выделить всё
import re
from string import punctuation
sentence = "I am going to run over to Q&A and ask them a ton of questions about this & that & that & this while surfacing the internet! with my raccoon buddy @ the bar."
# my attempt to remove punctuation
class SentenceHolder:
sentence = None
protected_words = ["Q&A"]
def __init__(sentence):
self.sentence = sentence
def remove_punctuation(self):
for punct in punctuation:
symbol_matches: List[re.Match] = [i for i in re.finditer(punct, self.sentence)]
remove_able_matches = self._protected_word_overlap(symbol_matches)
for word in reversed(remove_able_word_matches):
self.sentence = (self.modified_string[:word.start()] + " " + self.sentence[word.end():])
def _protected_word_overlap(symbol_matches)
protected_word_locations = []
for protected_word in self.protected_words :
protected_word_locations.extend([i for i in re.finditer(protected_word, self.sentence)])
protected_matches = []
for protected_word in protected_word_locations:
for symbol_inst in symbol_matches:
symbol_range: range = range(symbol_inst.start(), symbol_inst.end())
protested_word_set = set(range(protected_word.start(), protected_word.end()))
if len(protested_word_set.intersection(symbol_range)) != 0:
protected_matches.append(symbol_inst)
remove_able_matches = [sm for sm in symbol_matches if sm not in protected_matches]
return remove_able_matches
Код: Выделить всё
my_string = SentenceHolder(sentence)
my_string.remove_punctuation()
Код: Выделить всё
"I am going to run over to Q&A and ask them a ton of questions about this that that this while surfacing the internet with my raccoon buddy the bar"
Подробнее здесь: https://stackoverflow.com/questions/787 ... rds-intact