TP1 : fichier words.py

text/x-python words.py — 0 KB

Contenu du fichier

# -*- coding: iso-8859-1 -*-

DELIMITERS = '.,;:!?()"-+'
SEPARATORS = '.!?'

def words(source):
    """
    Generator that iterates on the lines of source, and returns words.
    """
    for line in source:
        words = line.split()
        for word in words:
            yield word.strip()


def clean_words(source):
    """
    Clean the words, and adds SEP at the beginning and end of sentences
    """
    yield "SEP"
    for word in words(source):
        sep = False
        word = word.lower()
        if word[-1] in SEPARATORS:
            sep = True
        word = word.strip(DELIMITERS)
        if word:
            yield word
        if sep:
            yield "SEP"