TP1 : fichier words.py
words.py — 0 KB
Contenu du fichier
# -*- coding: iso-8859-1 -*- DELIMITERS = '.,;:!?()"-+' SEPARATORS = '.!?' def words(source): """ Generator that iterates on the lines of source, and returns words. """ for line in source: words = line.split() for word in words: yield word.strip() def clean_words(source): """ Clean the words, and adds SEP at the beginning and end of sentences """ yield "SEP" for word in words(source): sep = False word = word.lower() if word[-1] in SEPARATORS: sep = True word = word.strip(DELIMITERS) if word: yield word if sep: yield "SEP"