Skip to content

Preprocessors

bells_o.preprocessors

Initialize submodule.

PreProcessing

Bases: ABC

Abstract PreProcessing class that needs to be concretised.

Source code in src/bells_o/preprocessors/preprocessing.py
class PreProcessing(ABC):
    """Abstract PreProcessing class that needs to be concretised."""

    @abstractmethod
    def __init__(self, *args, **kwargs):
        pass

    def __call__(self, string: str, *args, **kwargs) -> str:
        """Call `process` function of class.

        Args:
            string (str|list): Message to preprocess.
            *args (Any): Any other arguments that concrete classes need.
            **kwargs (Any): Any other keyword arguments that concrete classes need.

        Returns:
            str|list: Preprocessed message.

        """
        return self.process(string, *args, **kwargs)

    @abstractmethod
    def process(self, string: str, *args, **kwargs) -> str:
        """Preprocess a given string.

        Args:
            string (str): Text string to preprocess.
            *args (Any): Any other arguments that concrete classes need.
            **kwargs (Any): Any other keyword arguments that concrete classes need.

        Returns:
            str: Preprocessed string.

        """
        pass

process abstractmethod

process(string: str, *args, **kwargs) -> str

Preprocess a given string.

Parameters:

Name Type Description Default
string str

Text string to preprocess.

required
*args Any

Any other arguments that concrete classes need.

()
**kwargs Any

Any other keyword arguments that concrete classes need.

{}

Returns:

Name Type Description
str str

Preprocessed string.

Source code in src/bells_o/preprocessors/preprocessing.py
@abstractmethod
def process(self, string: str, *args, **kwargs) -> str:
    """Preprocess a given string.

    Args:
        string (str): Text string to preprocess.
        *args (Any): Any other arguments that concrete classes need.
        **kwargs (Any): Any other keyword arguments that concrete classes need.

    Returns:
        str: Preprocessed string.

    """
    pass

RoleWrapper

Bases: PreProcessing

Implement the wrapper to acommodate message dictionaries.

Maps from string to message dictionary.

Source code in src/bells_o/preprocessors/role_wrapper.py
class RoleWrapper(PreProcessing):
    """Implement the wrapper to acommodate message dictionaries.

    Maps from string to message dictionary.

    """

    def __init__(
        self,
        role: Literal["user", "assistant"] = "user",
        system_prompt: str | None = None,
        opposite_prompt: str | None = None,
        starts_with: Literal["user", "assistant"] | None = None,
        force_system: bool = False,
    ):
        """Initialize the RoleWrapper.

        This pre processor maps a string to the dictionary list template that specified messages in HF.
        You can choose for what role the prompt string should be used (`assistant` or `user`), if/what the other opposite role should start with,
        and a system prompt string.

        Args:
            role (Literal["user", "assistant"], optional): The role of the prompt string. Defaults to "user".
            system_prompt (str, optional): The system prompt string for the conversation. Defaults to None.
            opposite_prompt (str | None, optional): The string for the opposite role than specified in `role`. Defaults to None.
            starts_with (Literal["user", "assistant"], optional): With which role's message the conversation should start with. Defaults to the same as `role`.
            force_system (bool, optional): Forces a system prompt message at the beginning of the chat. If `True` and the system prompt is empty, it adds an empty string..

        """
        self.role = role
        self.system_prompt = system_prompt
        self.opposite_prompt = opposite_prompt
        self.starts_with = starts_with
        self.force_system = force_system

    def process(self, string) -> list[dict[str, str]]:
        """Wrap a string to conform to the dictionary list template necessary for IT models on HF.

        Args:
            string (str): Text string to preprocess.

        Returns:
            list: Wrapped string.

        """
        # set variables that depend on others
        if self.starts_with is None:
            self.starts_with = self.role
        if self.force_system and not self.system_prompt:
            self.system_prompt = ""

        message_list = []
        if self.system_prompt is not None:
            message_list.append({"role": "system", "content": self.system_prompt})
        if self.starts_with != self.role:
            assert self.opposite_prompt, (
                "If the first turn is supposed to be the other role, `opposite_prompt` has to be specified."
            )
            message_list.append({"role": OPPOSITES[self.role], "content": self.opposite_prompt})
            message_list.append({"role": self.role, "content": string})
        else:
            message_list.append({"role": self.role, "content": string})
            if self.opposite_prompt is not None:
                message_list.append({"role": OPPOSITES[self.role], "content": self.opposite_prompt})

        return message_list

process

process(string) -> list[dict[str, str]]

Wrap a string to conform to the dictionary list template necessary for IT models on HF.

Parameters:

Name Type Description Default
string str

Text string to preprocess.

required

Returns:

Name Type Description
list list[dict[str, str]]

Wrapped string.

Source code in src/bells_o/preprocessors/role_wrapper.py
def process(self, string) -> list[dict[str, str]]:
    """Wrap a string to conform to the dictionary list template necessary for IT models on HF.

    Args:
        string (str): Text string to preprocess.

    Returns:
        list: Wrapped string.

    """
    # set variables that depend on others
    if self.starts_with is None:
        self.starts_with = self.role
    if self.force_system and not self.system_prompt:
        self.system_prompt = ""

    message_list = []
    if self.system_prompt is not None:
        message_list.append({"role": "system", "content": self.system_prompt})
    if self.starts_with != self.role:
        assert self.opposite_prompt, (
            "If the first turn is supposed to be the other role, `opposite_prompt` has to be specified."
        )
        message_list.append({"role": OPPOSITES[self.role], "content": self.opposite_prompt})
        message_list.append({"role": self.role, "content": string})
    else:
        message_list.append({"role": self.role, "content": string})
        if self.opposite_prompt is not None:
            message_list.append({"role": OPPOSITES[self.role], "content": self.opposite_prompt})

    return message_list

TemplateWrapper

Bases: PreProcessing

Implement a preprocessor that wraps a prompt in a template.

It is essentially a PreProcessing-wrapper class for formatting of this nature: "foo {prompt} foo".format(prompt=sample_prompt).

Make sure to have the {prompt} label in your template string. ```

Source code in src/bells_o/preprocessors/template_wrapper.py
class TemplateWrapper(PreProcessing):
    """Implement a preprocessor that wraps a prompt in a template.

    It is essentially a `PreProcessing`-wrapper class for formatting of this nature: `"foo {prompt} foo".format(prompt=sample_prompt)`.

    Make sure to have the `{prompt}` label in your template string.
    ```


    """

    def __init__(
        self,
        template: str,
    ):
        """Initialize the TemplateWrapper.

        This pre processor fills out `{prompt}` label in a template.

        Args:
            template (str): The template string that includes the `{prompt}` label.

        """
        self.template = template

    def process(self, string) -> str:
        """Wrap fill in the `{prompt}` label for a template in .

        Args:
            string (str): Text string to fill into template.

        Returns:
            list: Filled in template string.

        """
        return self.template.format(prompt=string)

process

process(string) -> str

Wrap fill in the {prompt} label for a template in .

Parameters:

Name Type Description Default
string str

Text string to fill into template.

required

Returns:

Name Type Description
list str

Filled in template string.

Source code in src/bells_o/preprocessors/template_wrapper.py
def process(self, string) -> str:
    """Wrap fill in the `{prompt}` label for a template in .

    Args:
        string (str): Text string to fill into template.

    Returns:
        list: Filled in template string.

    """
    return self.template.format(prompt=string)