TP1 : fichier words.py
words.py
— 0 KB
Contenu du fichier
# -*- coding: iso-8859-1 -*-
DELIMITERS = '.,;:!?()"-+'
SEPARATORS = '.!?'
def words(source):
"""
Generator that iterates on the lines of source, and returns words.
"""
for line in source:
words = line.split()
for word in words:
yield word.strip()
def clean_words(source):
"""
Clean the words, and adds SEP at the beginning and end of sentences
"""
yield "SEP"
for word in words(source):
sep = False
word = word.lower()
if word[-1] in SEPARATORS:
sep = True
word = word.strip(DELIMITERS)
if word:
yield word
if sep:
yield "SEP"