TP1 : fichier

text/x-python — 0 KB

Contenu du fichier

# -*- coding: iso-8859-1 -*-

DELIMITERS = '.,;:!?()"-+'

def words(source):
    Generator that iterates on the lines of source, and returns words.
    for line in source:
        words = line.split()
        for word in words:
            yield word.strip()

def clean_words(source):
    Clean the words, and adds SEP at the beginning and end of sentences
    yield "SEP"
    for word in words(source):
        sep = False
        word = word.lower()
        if word[-1] in SEPARATORS:
            sep = True
        word = word.strip(DELIMITERS)
        if word:
            yield word
        if sep:
            yield "SEP"