Skip to content

evaluators

Evaluate spaCy tokens.

This module contains classes that assist with evaluating spaCy tokens.

A typical usage example

import spacy
from spacy_cleaner.processing import evaluators

nlp = spacy.load("en_core_web_md")
doc = nlp("and")
tok = doc[0]

evaluator = evaluators.StopwordsEvaluator()
evaluator.evaluate(tok)
Calling evaluate returns True as and is a stopword.

EmailEvaluator

Bases: Evaluator

Evaluates emails.

Source code in spacy_cleaner/processing/evaluators.py
class EmailEvaluator(Evaluator):
    """Evaluates emails."""

    def evaluate(self, tok: tokens.Token) -> bool:
        """If the given token is like an email.

        Args:
            tok: Token to evaluate.

        Returns:
            `True` if the token is like an email. `False` if not.
        """
        return tok.like_email

evaluate(tok)

If the given token is like an email.

Parameters:

Name Type Description Default
tok Token

Token to evaluate.

required

Returns:

Type Description
bool

True if the token is like an email. False if not.

Source code in spacy_cleaner/processing/evaluators.py
def evaluate(self, tok: tokens.Token) -> bool:
    """If the given token is like an email.

    Args:
        tok: Token to evaluate.

    Returns:
        `True` if the token is like an email. `False` if not.
    """
    return tok.like_email

Evaluator

Bases: ABC

Base class for evaluators.

Source code in spacy_cleaner/processing/evaluators.py
class Evaluator(abc.ABC):
    """Base class for evaluators."""

    @abc.abstractmethod
    def evaluate(self, tok: tokens.Token) -> bool:
        """Evaluates a `spaCy` token.

        Args:
            tok: Token to evaluate.

        Returns:
           Whether the token is evaluated to `True` or `False`.
        """

evaluate(tok) abstractmethod

Evaluates a spaCy token.

Parameters:

Name Type Description Default
tok Token

Token to evaluate.

required

Returns:

Type Description
bool

Whether the token is evaluated to True or False.

Source code in spacy_cleaner/processing/evaluators.py
@abc.abstractmethod
def evaluate(self, tok: tokens.Token) -> bool:
    """Evaluates a `spaCy` token.

    Args:
        tok: Token to evaluate.

    Returns:
       Whether the token is evaluated to `True` or `False`.
    """

NumberEvaluator

Bases: Evaluator

Evaluates Numbers.

Source code in spacy_cleaner/processing/evaluators.py
class NumberEvaluator(Evaluator):
    """Evaluates Numbers."""

    def evaluate(self, tok: tokens.Token) -> bool:
        """If the given token is like a number.

        Args:
            tok: Token to evaluate.

        Returns:
            `True` if the token is like a number. `False` if not.
        """
        return tok.like_num

evaluate(tok)

If the given token is like a number.

Parameters:

Name Type Description Default
tok Token

Token to evaluate.

required

Returns:

Type Description
bool

True if the token is like a number. False if not.

Source code in spacy_cleaner/processing/evaluators.py
def evaluate(self, tok: tokens.Token) -> bool:
    """If the given token is like a number.

    Args:
        tok: Token to evaluate.

    Returns:
        `True` if the token is like a number. `False` if not.
    """
    return tok.like_num

PunctuationEvaluator

Bases: Evaluator

Evaluates emails.

Source code in spacy_cleaner/processing/evaluators.py
class PunctuationEvaluator(Evaluator):
    """Evaluates emails."""

    def evaluate(self, tok: tokens.Token) -> bool:
        """If the given token is like an email.

        Args:
            tok: Token to evaluate.

        Returns:
            `True` if the token is punctuation. `False` if not.
        """
        return tok.is_punct

evaluate(tok)

If the given token is like an email.

Parameters:

Name Type Description Default
tok Token

Token to evaluate.

required

Returns:

Type Description
bool

True if the token is punctuation. False if not.

Source code in spacy_cleaner/processing/evaluators.py
def evaluate(self, tok: tokens.Token) -> bool:
    """If the given token is like an email.

    Args:
        tok: Token to evaluate.

    Returns:
        `True` if the token is punctuation. `False` if not.
    """
    return tok.is_punct

StopwordsEvaluator

Bases: Evaluator

Evaluates stopwords.

Source code in spacy_cleaner/processing/evaluators.py
class StopwordsEvaluator(Evaluator):
    """Evaluates stopwords."""

    def evaluate(self, tok: tokens.Token) -> bool:
        """If the given token is a stopword.

        Args:
            tok: Token to evaluate.

        Returns:
            `True` if the token is a stopword. `False` if not.
        """
        return tok.is_stop

evaluate(tok)

If the given token is a stopword.

Parameters:

Name Type Description Default
tok Token

Token to evaluate.

required

Returns:

Type Description
bool

True if the token is a stopword. False if not.

Source code in spacy_cleaner/processing/evaluators.py
def evaluate(self, tok: tokens.Token) -> bool:
    """If the given token is a stopword.

    Args:
        tok: Token to evaluate.

    Returns:
        `True` if the token is a stopword. `False` if not.
    """
    return tok.is_stop

URLEvaluator

Bases: Evaluator

Evaluates URLs.

Source code in spacy_cleaner/processing/evaluators.py
class URLEvaluator(Evaluator):
    """Evaluates URLs."""

    def evaluate(self, tok: tokens.Token) -> bool:
        """If the given token is like a URL.

        Args:
            tok: Token to evaluate.

        Returns:
            `True` if the token is like a URL. `False` if not.
        """
        return tok.like_url

evaluate(tok)

If the given token is like a URL.

Parameters:

Name Type Description Default
tok Token

Token to evaluate.

required

Returns:

Type Description
bool

True if the token is like a URL. False if not.

Source code in spacy_cleaner/processing/evaluators.py
def evaluate(self, tok: tokens.Token) -> bool:
    """If the given token is like a URL.

    Args:
        tok: Token to evaluate.

    Returns:
        `True` if the token is like a URL. `False` if not.
    """
    return tok.like_url