Skip to content

replacers

Replace spaCy tokens.

This module contains functions that assist with replace spaCy tokens.

A typical usage example

import spacy
from spacy_cleaner import processing

nlp = spacy.load("en_core_web_md")
doc = nlp(",")
tok = doc[0]

processing.replace_punctuation_token(tok)
, is replaced with _IS_PUNCT_.

replace_email_token(tok, replace='_LIKE_EMAIL_')

If the token is like an email, replace it with the string _LIKE_EMAIL_.

Parameters:

Name Type Description Default
tok Token

A spaCy token.

required
replace str

The replacement string.

'_LIKE_EMAIL_'

Returns:

Type Description
Union[str, Token]

The replacement string or the original token.

Source code in spacy_cleaner/processing/replacers.py
def replace_email_token(
    tok: tokens.Token, replace: str = "_LIKE_EMAIL_"
) -> Union[str, tokens.Token]:
    """If the token is like an email, replace it with the string `_LIKE_EMAIL_`.

    Args:
      tok: A `spaCy` token.
      replace: The replacement string.

    Returns:
      The replacement string or the original token.
    """
    return transformers.Transformer(
        evaluators.EmailEvaluator(), replace
    ).transform(tok)

replace_number_token(tok, replace='_LIKE_NUM_')

If the token is like a number, replace it with the string _LIKE_NUM_.

Parameters:

Name Type Description Default
tok Token

A spaCy token.

required
replace str

The replacement string.

'_LIKE_NUM_'

Returns:

Type Description
Union[str, Token]

The replacement string or the original token.

Source code in spacy_cleaner/processing/replacers.py
def replace_number_token(
    tok: tokens.Token, replace: str = "_LIKE_NUM_"
) -> Union[str, tokens.Token]:
    """If the token is like a number, replace it with the string `_LIKE_NUM_`.

    Args:
      tok: A `spaCy` token.
      replace: The replacement string.

    Returns:
      The replacement string or the original token.
    """
    return transformers.Transformer(
        evaluators.NumberEvaluator(), replace
    ).transform(tok)

replace_punctuation_token(tok, replace='_IS_PUNCT_')

If the token is punctuation, replace it with the string _IS_PUNCT_.

Parameters:

Name Type Description Default
tok Token

A spaCy token.

required
replace str

The replacement string.

'_IS_PUNCT_'

Returns:

Type Description
Union[str, Token]

The replacement string or the original token.

Source code in spacy_cleaner/processing/replacers.py
def replace_punctuation_token(
    tok: tokens.Token, replace: str = "_IS_PUNCT_"
) -> Union[str, tokens.Token]:
    """If the token is punctuation, replace it with the string `_IS_PUNCT_`.

    Args:
      tok: A `spaCy` token.
      replace: The replacement string.

    Returns:
      The replacement string or the original token.
    """
    return transformers.Transformer(
        evaluators.PunctuationEvaluator(), replace
    ).transform(tok)

replace_stopword_token(tok, replace='_IS_STOP_')

If the token is a stopword, replace it with the string _IS_STOP_.

Parameters:

Name Type Description Default
tok Token

A spaCy token.

required
replace str

The replacement string.

'_IS_STOP_'

Returns:

Type Description
Union[str, Token]

The replacement string or the original token.

Source code in spacy_cleaner/processing/replacers.py
def replace_stopword_token(
    tok: tokens.Token, replace: str = "_IS_STOP_"
) -> Union[str, tokens.Token]:
    """If the token is a stopword, replace it with the string `_IS_STOP_`.

    Args:
      tok: A `spaCy` token.
      replace: The replacement string.

    Returns:
      The replacement string or the original token.
    """
    return transformers.Transformer(
        evaluators.StopwordsEvaluator(), replace
    ).transform(tok)

replace_url_token(tok, replace='_LIKE_URL_')

If the token is like a URL, replace it with the string _LIKE_URL_.

Parameters:

Name Type Description Default
tok Token

A spaCy token.

required
replace str

The replacement string.

'_LIKE_URL_'

Returns:

Type Description
Union[str, Token]

The replacement string or the original token.

Source code in spacy_cleaner/processing/replacers.py
def replace_url_token(
    tok: tokens.Token, replace: str = "_LIKE_URL_"
) -> Union[str, tokens.Token]:
    """If the token is like a URL, replace it with the string `_LIKE_URL_`.

    Args:
      tok: A `spaCy` token.
      replace: The replacement string.

    Returns:
      The replacement string or the original token.
    """
    return transformers.Transformer(
        evaluators.URLEvaluator(), replace
    ).transform(tok)