Skip to content

Utils Submodule

SRToolkit.utils

Utilities for expression representation, compilation, generation, and evaluation.

Modules:

Name Description
symbol_library

The SymbolLibrary class — manages the token vocabulary and token properties.

expression_tree

The Node binary-tree representation and conversion utilities for expressions.

expression_compiler

Compiles token-list or tree expressions into executable Python callables.

expression_simplifier

SymPy-backed algebraic simplification, including constant folding.

expression_generator

PCFG construction from a SymbolLibrary and Monte-Carlo expression sampling.

measures

Distance and similarity measures: edit distance, tree edit distance, and Behavior-aware Expression Distance (BED).

serialization

Internal JSON serialization utilities for numpy types.

Node

Node(symbol: str, right: Optional[Node] = None, left: Optional[Node] = None)

A node in a binary expression tree.

  • Binary operators ("op") set both left and right.
  • Unary functions ("fn") set only left; right is None.
  • Leaves (variables, constants, literals, numeric values) have both children as None.

Examples:

>>> node = Node("+", Node("x"), Node("1"))
>>> len(node)
3

Parameters:

Name Type Description Default
symbol str

Token string stored at this node.

required
right Optional[Node]

Right operand (binary operators only).

None
left Optional[Node]

Left operand (operators and unary functions).

None
Source code in SRToolkit/utils/expression_tree.py
def __init__(self, symbol: str, right: Optional["Node"] = None, left: Optional["Node"] = None) -> None:
    """
    A node in a binary expression tree.

    - Binary operators (``"op"``) set both ``left`` and ``right``.
    - Unary functions (``"fn"``) set only ``left``; ``right`` is ``None``.
    - Leaves (variables, constants, literals, numeric values) have both children as ``None``.

    Examples:
        >>> node = Node("+", Node("x"), Node("1"))
        >>> len(node)
        3

    Args:
        symbol: Token string stored at this node.
        right: Right operand (binary operators only).
        left: Left operand (operators and unary functions).
    """
    self.symbol = symbol
    self.right = right
    self.left = left

to_list

to_list(
    symbol_library: Optional[SymbolLibrary] = None, notation: str = "infix"
) -> List[str]

Transforms the tree rooted at this node into a list of tokens.

Examples:

>>> node = Node("+", Node("X_0"), Node("1"))
>>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
['1', '+', 'X_0']
>>> node.to_list(notation="postfix")
['1', 'X_0', '+']
>>> node.to_list(notation="prefix")
['+', '1', 'X_0']
>>> node = Node("+", Node("*", Node("X_0"), Node("X_1")), Node("1"))
>>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
['1', '+', 'X_1', '*', 'X_0']
>>> node.to_list(notation="infix")
['1', '+', '(', 'X_1', '*', 'X_0', ')']
>>> node = Node("sin", None, Node("X_0"))
>>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
['sin', '(', 'X_0', ')']
>>> node = Node("^2", None, Node("X_0"))
>>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
['X_0', '^2']
>>> node.to_list()
['(', 'X_0', ')', '^2']
>>> node = Node("*", Node("*", Node("X_0"), Node("X_0")),  Node("X_0"))
>>> node.to_list(symbol_library=SymbolLibrary.default_symbols(),notation="infix")
['X_0', '*', '(', 'X_0', '*', 'X_0', ')']

Parameters:

Name Type Description Default
symbol_library Optional[SymbolLibrary]

Symbol library used to determine token types and precedences during infix reconstruction. If None with "infix" notation, the output may contain redundant parentheses.

None
notation str

Output notation: "infix", "prefix", or "postfix". Default "infix".

'infix'

Returns:

Type Description
List[str]

Token list representing the subtree rooted at this node.

Raises:

Type Description
Exception

If notation is not one of the accepted values, or if a token's type cannot be resolved during infix reconstruction.

Source code in SRToolkit/utils/expression_tree.py
def to_list(self, symbol_library: Optional[SymbolLibrary] = None, notation: str = "infix") -> List[str]:
    """
    Transforms the tree rooted at this node into a list of tokens.

    Examples:
        >>> node = Node("+", Node("X_0"), Node("1"))
        >>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
        ['1', '+', 'X_0']
        >>> node.to_list(notation="postfix")
        ['1', 'X_0', '+']
        >>> node.to_list(notation="prefix")
        ['+', '1', 'X_0']
        >>> node = Node("+", Node("*", Node("X_0"), Node("X_1")), Node("1"))
        >>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
        ['1', '+', 'X_1', '*', 'X_0']
        >>> node.to_list(notation="infix")
        ['1', '+', '(', 'X_1', '*', 'X_0', ')']
        >>> node = Node("sin", None, Node("X_0"))
        >>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
        ['sin', '(', 'X_0', ')']
        >>> node = Node("^2", None, Node("X_0"))
        >>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
        ['X_0', '^2']
        >>> node.to_list()
        ['(', 'X_0', ')', '^2']
        >>> node = Node("*", Node("*", Node("X_0"), Node("X_0")),  Node("X_0"))
        >>> node.to_list(symbol_library=SymbolLibrary.default_symbols(),notation="infix")
        ['X_0', '*', '(', 'X_0', '*', 'X_0', ')']

    Args:
        symbol_library: Symbol library used to determine token types and precedences
            during infix reconstruction. If ``None`` with ``"infix"`` notation, the
            output may contain redundant parentheses.
        notation: Output notation: ``"infix"``, ``"prefix"``, or ``"postfix"``.
            Default ``"infix"``.

    Returns:
        Token list representing the subtree rooted at this node.

    Raises:
        Exception: If ``notation`` is not one of the accepted values, or if a token's
            type cannot be resolved during infix reconstruction.
    """
    # if symbol_library is None:
    #     symbol_library = SymbolLibrary.default_symbols()

    left = [] if self.left is None else self.left.to_list(symbol_library, notation)
    right = [] if self.right is None else self.right.to_list(symbol_library, notation)

    if notation == "prefix":
        return [self.symbol] + left + right

    elif notation == "postfix":
        return left + right + [self.symbol]

    elif notation == "infix" and symbol_library is None:
        warnings.warn(
            "Symbol library not provided. Generated expression may contain unnecessary parentheses and"
            " have other issues."
        )
        if self.left is None and self.right is None:
            return [self.symbol]
        if self.right is None and self.left is not None:
            if self.symbol[0] == "^":
                return ["("] + left + [")", self.symbol]
            else:
                return [self.symbol, "("] + left + [")"]
        else:
            if len(left) > 1:
                left = ["("] + left + [")"]
            if len(right) > 1:
                right = ["("] + right + [")"]
            return left + [self.symbol] + right

    elif notation == "infix":
        assert symbol_library is not None, "[Node.to_list] parameter symbol_library should be of type SymbolLibrary"
        if is_float(self.symbol):
            return [self.symbol]
        if symbol_library.get_type(self.symbol) in ["var", "const", "lit"]:
            return [self.symbol]
        elif symbol_library.get_type(self.symbol) == "fn":
            if symbol_library.get_precedence(self.symbol) > 0:
                return [self.symbol, "("] + left + [")"]
            else:
                if len(left) > 1:
                    left = ["("] + left + [")"]
                return left + [self.symbol]
        elif symbol_library.get_type(self.symbol) == "op":
            if (
                self.left is not None
                and not is_float(self.left.symbol)
                and -1
                < symbol_library.get_precedence(self.left.symbol)
                <= symbol_library.get_precedence(self.symbol)
            ):
                left = ["("] + left + [")"]
            if (
                self.right is not None
                and not is_float(self.right.symbol)
                and -1
                < symbol_library.get_precedence(self.right.symbol)
                <= symbol_library.get_precedence(self.symbol)
            ):
                right = ["("] + right + [")"]
            return left + [self.symbol] + right
        else:
            raise Exception(f"Invalid symbol type for symbol {self.symbol}.")
    else:
        raise Exception(
            "Invalid notation selected. Use 'infix', 'prefix', 'postfix', or leave blank (defaults to 'infix')."
        )

to_latex

to_latex(symbol_library: SymbolLibrary) -> str

Transforms the tree rooted at this node into a LaTeX expression.

Examples:

>>> node = Node("+", Node("X_0"), Node("1"))
>>> node.to_latex(symbol_library=SymbolLibrary.default_symbols())
'$1 + X_{0}$'
>>> node = Node("+", Node("*", Node("X_0"), Node("X_1")), Node("1"))
>>> print(node.to_latex(symbol_library=SymbolLibrary.default_symbols()))
$1 + X_{1} \cdot X_{0}$
>>> node = Node("sin", None, Node("X_0"))
>>> print(node.to_latex(symbol_library=SymbolLibrary.default_symbols()))
$\sin X_{0}$
>>> node = Node("+", Node("*", Node("X_0"), Node("C")), Node("C"))
>>> print(node.to_latex(symbol_library=SymbolLibrary.default_symbols()))
$C_{0} + C_{1} \cdot X_{0}$

Parameters:

Name Type Description Default
symbol_library SymbolLibrary

Symbol library providing the LaTeX template for each token.

required

Returns:

Type Description
str

A LaTeX string of the form $...$.

Raises:

Type Description
Exception

If the tree contains a token whose type cannot be resolved in symbol_library.

Source code in SRToolkit/utils/expression_tree.py
def to_latex(self, symbol_library: SymbolLibrary) -> str:
    r"""
    Transforms the tree rooted at this node into a LaTeX expression.

    Examples:
        >>> node = Node("+", Node("X_0"), Node("1"))
        >>> node.to_latex(symbol_library=SymbolLibrary.default_symbols())
        '$1 + X_{0}$'
        >>> node = Node("+", Node("*", Node("X_0"), Node("X_1")), Node("1"))
        >>> print(node.to_latex(symbol_library=SymbolLibrary.default_symbols()))
        $1 + X_{1} \cdot X_{0}$
        >>> node = Node("sin", None, Node("X_0"))
        >>> print(node.to_latex(symbol_library=SymbolLibrary.default_symbols()))
        $\sin X_{0}$
        >>> node = Node("+", Node("*", Node("X_0"), Node("C")), Node("C"))
        >>> print(node.to_latex(symbol_library=SymbolLibrary.default_symbols()))
        $C_{0} + C_{1} \cdot X_{0}$

    Args:
        symbol_library: Symbol library providing the LaTeX template for each token.

    Returns:
        A LaTeX string of the form ``$...$``.

    Raises:
        Exception: If the tree contains a token whose type cannot be resolved in
            ``symbol_library``.
    """
    assert symbol_library is not None, "[Node.to_latex] parameter symbol_library should be of type SymbolLibrary"
    return f"${self.__to_latex_rec(symbol_library)[0]}$"

height

height() -> int

Return the height of the subtree rooted at this node.

A single-node tree has height 1.

Examples:

>>> node = Node("+", Node("x"), Node("1"))
>>> node.height()
2

Returns:

Type Description
int

Height of the subtree.

Source code in SRToolkit/utils/expression_tree.py
def height(self) -> int:
    """
    Return the height of the subtree rooted at this node.

    A single-node tree has height 1.

    Examples:
        >>> node = Node("+", Node("x"), Node("1"))
        >>> node.height()
        2

    Returns:
        Height of the subtree.
    """
    return 1 + max(
        (self.left.height() if self.left is not None else 0),
        (self.right.height() if self.right is not None else 0),
    )

__len__

__len__() -> int

Return the number of nodes in the subtree rooted at this node.

Examples:

>>> node = Node("+", Node("x"), Node("1"))
>>> len(node)
3

Returns:

Type Description
int

Total node count of the subtree.

Source code in SRToolkit/utils/expression_tree.py
def __len__(self) -> int:
    """
    Return the number of nodes in the subtree rooted at this node.

    Examples:
        >>> node = Node("+", Node("x"), Node("1"))
        >>> len(node)
        3

    Returns:
        Total node count of the subtree.
    """
    return 1 + (len(self.left) if self.left is not None else 0) + (len(self.right) if self.right is not None else 0)

__str__

__str__() -> str

Return the expression as a concatenated string using default infix notation that may contain redundant parentheses.

Examples:

>>> node = Node("+", Node("x"), Node("1"))
>>> str(node)
'1+x'

Returns:

Type Description
str

Concatenated token string with no spaces.

Source code in SRToolkit/utils/expression_tree.py
def __str__(self) -> str:
    """
    Return the expression as a concatenated string using default infix notation that may contain redundant parentheses.

    Examples:
        >>> node = Node("+", Node("x"), Node("1"))
        >>> str(node)
        '1+x'

    Returns:
        Concatenated token string with no spaces.
    """
    return "".join(self.to_list())

__copy__

__copy__() -> Node

Return a deep copy of the subtree rooted at this node.

Examples:

>>> node = Node("+", Node("X_0"), Node("1"))
>>> new_node = copy(node)
>>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
['1', '+', 'X_0']
>>> new_node.to_list(symbol_library=SymbolLibrary.default_symbols())
['1', '+', 'X_0']
>>> node == node
True
>>> node == new_node
False

Returns:

Type Description
Node

An independent copy of the subtree.

Source code in SRToolkit/utils/expression_tree.py
def __copy__(self) -> "Node":
    """
    Return a deep copy of the subtree rooted at this node.

    Examples:
        >>> node = Node("+", Node("X_0"), Node("1"))
        >>> new_node = copy(node)
        >>> node.to_list(symbol_library=SymbolLibrary.default_symbols())
        ['1', '+', 'X_0']
        >>> new_node.to_list(symbol_library=SymbolLibrary.default_symbols())
        ['1', '+', 'X_0']
        >>> node == node
        True
        >>> node == new_node
        False

    Returns:
        An independent copy of the subtree.
    """
    if self.left is not None:
        left = copy(self.left)
    else:
        left = None
    if self.right is not None:
        right = copy(self.right)
    else:
        right = None
    return Node(copy(self.symbol), left=left, right=right)

SymbolLibrary

SymbolLibrary(
    symbols: Optional[List[str]] = None,
    num_variables: int = 0,
    preamble: Optional[List[str]] = None,
)

A registry of tokens and their properties, used throughout the toolkit to parse, compile, and generate symbolic expressions.

By default, the library uses NumPy for operator and function evaluation. To use a different backend, pass the required import statements via preamble.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x", "x")
>>> library.get_type("x")
'var'
>>> library.get_precedence("x")
0
>>> library.get_np_fn("x")
'x'
>>> library.remove_symbol("x")
>>> library = SymbolLibrary.default_symbols()
>>> # You can also initialize the library with a list of symbols (listed in SymbolLibrary.default_symbols)
>>> # and the number of variables.
>>> library2 = SymbolLibrary(["+", "*", "sin"], num_variables=2)
>>> len(library2)
5

Parameters:

Name Type Description Default
symbols Optional[List[str]]

Symbols to pre-populate from the default set. None produces an empty library. See default_symbols for the supported names.

None
num_variables int

Number of variable tokens to add, labeled X_0 through X_{num_variables-1}. Default is 0.

0
preamble Optional[List[str]]

Import statements prepended to compiled expression functions. Defaults to ["import numpy as np"].

None

Attributes:

Name Type Description
symbols

Mapping from token string to its property dict (type, precedence, NumPy function string, LaTeX template).

Source code in SRToolkit/utils/symbol_library.py
def __init__(
    self, symbols: Optional[List[str]] = None, num_variables: int = 0, preamble: Optional[List[str]] = None
) -> None:
    """
    A registry of tokens and their properties, used throughout the toolkit to parse,
    compile, and generate symbolic expressions.

    By default, the library uses NumPy for operator and function evaluation. To use a
    different backend, pass the required import statements via ``preamble``.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x", "x")
        >>> library.get_type("x")
        'var'
        >>> library.get_precedence("x")
        0
        >>> library.get_np_fn("x")
        'x'
        >>> library.remove_symbol("x")
        >>> library = SymbolLibrary.default_symbols()
        >>> # You can also initialize the library with a list of symbols (listed in SymbolLibrary.default_symbols)
        >>> # and the number of variables.
        >>> library2 = SymbolLibrary(["+", "*", "sin"], num_variables=2)
        >>> len(library2)
        5

    Args:
        symbols: Symbols to pre-populate from the default set. ``None`` produces an empty
            library. See [default_symbols][SRToolkit.utils.symbol_library.SymbolLibrary.default_symbols] for the supported names.
        num_variables: Number of variable tokens to add, labeled ``X_0`` through
            ``X_{num_variables-1}``. Default is ``0``.
        preamble: Import statements prepended to compiled expression functions.
            Defaults to ``["import numpy as np"]``.

    Attributes:
        symbols: Mapping from token string to its property dict (type, precedence,
            NumPy function string, LaTeX template).
    """
    if preamble is None:
        self.preamble = ["import numpy as np"]
    else:
        self.preamble = preamble

    if symbols is None and num_variables == 0:
        self.symbols: Dict[str, Any] = dict()
        self.num_variables = 0
    else:
        if symbols is None:
            symbols = []

        self.symbols = SymbolLibrary.from_symbol_list(symbols, num_variables).symbols
        self.num_variables = num_variables

add_symbol

add_symbol(
    symbol: str,
    symbol_type: str,
    precedence: int,
    np_fn: str,
    latex_str: Optional[str] = None,
)

Add a token to the library with its associated type, precedence, NumPy function string, and LaTeX template.

Symbol types:

  • "op": binary operator (e.g. +, *).
  • "fn": unary function (e.g. sin, sqrt).
  • "lit": literal with a fixed value (e.g. pi, e).
  • "const": free constant whose value is optimised during parameter estimation (e.g. C). Using a single "const" token is recommended; multiple tokens increase complexity and reduce readability.
  • "var": input variable whose values are read from the data array X.

If latex_str is omitted, a default template is generated: "{} \text{symb} {}" for operators, "\text{symb} {}" for functions, and "\text{symb}" otherwise.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x")
>>> library.add_symbol("sin", "fn", 5, "np.sin({})", r"\sin {}")
>>> library.add_symbol("C", "const", 5, "C[{}]", r"c_{}")
>>> library.add_symbol("X_0", "var", 5, "X[:, 0]", r"X_0")
>>> library.add_symbol("pi", "lit", 5, "np.pi", r"\pi")

Parameters:

Name Type Description Default
symbol str

Token string to register.

required
symbol_type str

One of "op", "fn", "lit", "const", or "var".

required
precedence int

Operator precedence, used for infix reconstruction and PCFG generation.

required
np_fn str

Python/NumPy expression string used in compiled callables (e.g. "{} = np.sin({})") .

required
latex_str Optional[str]

LaTeX template string with {} placeholders for operands. Auto-generated if omitted.

None

Raises:

Type Description
ValueError

If symbol_type is not one of the valid types.

Source code in SRToolkit/utils/symbol_library.py
def add_symbol(
    self,
    symbol: str,
    symbol_type: str,
    precedence: int,
    np_fn: str,
    latex_str: Optional[str] = None,
):
    r"""
    Add a token to the library with its associated type, precedence, NumPy function
    string, and LaTeX template.

    Symbol types:

    - ``"op"``: binary operator (e.g. ``+``, ``*``).
    - ``"fn"``: unary function (e.g. ``sin``, ``sqrt``).
    - ``"lit"``: literal with a fixed value (e.g. ``pi``, ``e``).
    - ``"const"``: free constant whose value is optimised during parameter estimation
      (e.g. ``C``). Using a single ``"const"`` token is recommended; multiple tokens
      increase complexity and reduce readability.
    - ``"var"``: input variable whose values are read from the data array ``X``.

    If ``latex_str`` is omitted, a default template is generated: ``"{} \text{symb} {}"``
    for operators, ``"\text{symb} {}"`` for functions, and ``"\text{symb}"`` otherwise.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x")
        >>> library.add_symbol("sin", "fn", 5, "np.sin({})", r"\sin {}")
        >>> library.add_symbol("C", "const", 5, "C[{}]", r"c_{}")
        >>> library.add_symbol("X_0", "var", 5, "X[:, 0]", r"X_0")
        >>> library.add_symbol("pi", "lit", 5, "np.pi", r"\pi")

    Args:
        symbol: Token string to register.
        symbol_type: One of ``"op"``, ``"fn"``, ``"lit"``, ``"const"``, or ``"var"``.
        precedence: Operator precedence, used for infix reconstruction and PCFG generation.
        np_fn: Python/NumPy expression string used in compiled callables
            (e.g. ``"{} = np.sin({})"``) .
        latex_str: LaTeX template string with ``{}`` placeholders for operands.
            Auto-generated if omitted.

    Raises:
        ValueError: If ``symbol_type`` is not one of the valid types.
    """
    if symbol_type not in VALID_SYMBOL_TYPES:
        raise ValueError(f"Invalid symbol type '{symbol_type}'. Must be one of: {sorted(VALID_SYMBOL_TYPES)}")

    if latex_str is None:
        if symbol_type == "op":
            latex_str = f"{{}} \text{{{symbol}}} {{}}"
        elif symbol_type == "fn":
            latex_str = f"\text{{{symbol}}} {{}}"
        else:
            latex_str = f"\text{{{symbol}}}"

    if symbol_type == "var" and (np_fn is None or np_fn == ""):
        np_fn = "X[:, {}]".format(self.num_variables)

    if symbol_type == "var":
        self.num_variables += 1

    self.symbols[symbol] = {
        "symbol": symbol,
        "type": symbol_type,
        "precedence": precedence,
        "np_fn": np_fn,
        "latex_str": latex_str,
    }

remove_symbol

remove_symbol(symbol: str)

Remove a token from the library.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x")
>>> len(library.symbols)
1
>>> library.remove_symbol("x")
>>> len(library.symbols)
0

Parameters:

Name Type Description Default
symbol str

Token string to remove.

required

Raises:

Type Description
KeyError

If symbol is not present in the library.

Source code in SRToolkit/utils/symbol_library.py
def remove_symbol(self, symbol: str):
    """
    Remove a token from the library.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x")
        >>> len(library.symbols)
        1
        >>> library.remove_symbol("x")
        >>> len(library.symbols)
        0

    Args:
        symbol: Token string to remove.

    Raises:
        KeyError: If ``symbol`` is not present in the library.
    """
    del self.symbols[symbol]

get_type

get_type(symbol: str) -> str

Return the type of a symbol.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x")
>>> library.get_type("x")
'var'

Parameters:

Name Type Description Default
symbol str

Token to look up.

required

Returns:

Type Description
str

The type string ("op", "fn", "lit", "const", or "var") if the symbol is in the library, otherwise an empty string.

Source code in SRToolkit/utils/symbol_library.py
def get_type(self, symbol: str) -> str:
    """
    Return the type of a symbol.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x")
        >>> library.get_type("x")
        'var'

    Args:
        symbol: Token to look up.

    Returns:
        The type string (``"op"``, ``"fn"``, ``"lit"``, ``"const"``, or ``"var"``) if the symbol is in the library, otherwise an empty string.
    """
    if symbol in self.symbols:
        return self.symbols[symbol]["type"]
    else:
        return ""

get_precedence

get_precedence(symbol: str) -> int

Return the precedence of a symbol.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x")
>>> library.get_precedence("x")
0

Parameters:

Name Type Description Default
symbol str

Token to look up.

required

Returns:

Type Description
int

The precedence value if the symbol is in the library, otherwise -1.

Source code in SRToolkit/utils/symbol_library.py
def get_precedence(self, symbol: str) -> int:
    """
    Return the precedence of a symbol.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x")
        >>> library.get_precedence("x")
        0

    Args:
        symbol: Token to look up.

    Returns:
        The precedence value if the symbol is in the library, otherwise ``-1``.
    """
    if symbol in self.symbols:
        return self.symbols[symbol]["precedence"]
    else:
        return -1

get_np_fn

get_np_fn(symbol: str) -> str

Return the NumPy function string for a symbol.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x")
>>> library.get_np_fn("x")
'x'

Parameters:

Name Type Description Default
symbol str

Token to look up.

required

Returns:

Type Description
str

The NumPy function string if the symbol is in the library, otherwise an empty string.

Source code in SRToolkit/utils/symbol_library.py
def get_np_fn(self, symbol: str) -> str:
    """
    Return the NumPy function string for a symbol.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x")
        >>> library.get_np_fn("x")
        'x'

    Args:
        symbol: Token to look up.

    Returns:
        The NumPy function string if the symbol is in the library, otherwise an empty string.
    """
    if symbol in self.symbols:
        return self.symbols[symbol]["np_fn"]
    else:
        return ""

get_latex_str

get_latex_str(symbol: str) -> str

Return the LaTeX template string for a symbol.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x", "test")
>>> library.get_latex_str("x")
'test'

Parameters:

Name Type Description Default
symbol str

Token to look up.

required

Returns:

Type Description
str

The LaTeX template string if the symbol is in the library, otherwise an empty string.

Source code in SRToolkit/utils/symbol_library.py
def get_latex_str(self, symbol: str) -> str:
    """
    Return the LaTeX template string for a symbol.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x", "test")
        >>> library.get_latex_str("x")
        'test'

    Args:
        symbol: Token to look up.

    Returns:
        The LaTeX template string if the symbol is in the library, otherwise an empty string.
    """
    if symbol in self.symbols:
        return self.symbols[symbol]["latex_str"]
    else:
        return ""

get_symbols_of_type

get_symbols_of_type(symbol_type: str) -> List[str]

Return all symbols of a given type.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x")
>>> library.add_symbol("y", "var", 0, "y")
>>> library.get_symbols_of_type("var")
['x', 'y']

Parameters:

Name Type Description Default
symbol_type str

Type to filter by. One of "op", "fn", "var", "const", "lit".

required

Returns:

Type Description
List[str]

List of token strings matching the requested type.

Source code in SRToolkit/utils/symbol_library.py
def get_symbols_of_type(self, symbol_type: str) -> List[str]:
    """
    Return all symbols of a given type.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x")
        >>> library.add_symbol("y", "var", 0, "y")
        >>> library.get_symbols_of_type("var")
        ['x', 'y']

    Args:
        symbol_type: Type to filter by. One of ``"op"``, ``"fn"``, ``"var"``,
            ``"const"``, ``"lit"``.

    Returns:
        List of token strings matching the requested type.
    """
    symbols = list()
    for symbol in self.symbols.keys():
        if self.get_type(symbol) == symbol_type:
            symbols.append(symbol)

    return symbols

symbols2index

symbols2index() -> Dict[str, int]

Return a mapping from each token to its index in insertion order.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x")
>>> library.add_symbol("y", "var", 0, "y")
>>> print(library.symbols2index())
{'x': 0, 'y': 1}
>>> library.remove_symbol("x")
>>> print(library.symbols2index())
{'y': 0}

Returns:

Type Description
Dict[str, int]

Dict mapping each token string to its zero-based position in the library.

Source code in SRToolkit/utils/symbol_library.py
def symbols2index(self) -> Dict[str, int]:
    """
    Return a mapping from each token to its index in insertion order.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x")
        >>> library.add_symbol("y", "var", 0, "y")
        >>> print(library.symbols2index())
        {'x': 0, 'y': 1}
        >>> library.remove_symbol("x")
        >>> print(library.symbols2index())
        {'y': 0}

    Returns:
        Dict mapping each token string to its zero-based position in the library.
    """
    return {s: i for i, s in enumerate(self.symbols.keys())}

from_symbol_list staticmethod

from_symbol_list(symbols: List[str], num_variables: int = 25) -> SymbolLibrary

Create a SymbolLibrary containing only the specified subset of default symbols.

The supported token names are those defined in default_symbols.

Examples:

>>> library = SymbolLibrary().from_symbol_list(["+", "*", "C"], num_variables=2)
>>> len(library.symbols)
5

Parameters:

Name Type Description Default
symbols List[str]

Token strings to include. Must be a subset of the default symbol names.

required
num_variables int

Number of variable tokens (X_0 through X_{num_variables-1}). Default is 25.

25

Returns:

Type Description
SymbolLibrary

A SymbolLibrary restricted to the requested symbols and variables.

Source code in SRToolkit/utils/symbol_library.py
@staticmethod
def from_symbol_list(symbols: List[str], num_variables: int = 25) -> "SymbolLibrary":
    """
    Create a [SymbolLibrary][SRToolkit.utils.symbol_library.SymbolLibrary] containing only the specified subset of default symbols.

    The supported token names are those defined in [default_symbols][SRToolkit.utils.symbol_library.SymbolLibrary.default_symbols].

    Examples:
        >>> library = SymbolLibrary().from_symbol_list(["+", "*", "C"], num_variables=2)
        >>> len(library.symbols)
        5

    Args:
        symbols: Token strings to include. Must be a subset of the default symbol names.
        num_variables: Number of variable tokens (``X_0`` through ``X_{num_variables-1}``).
            Default is ``25``.

    Returns:
        A [SymbolLibrary][SRToolkit.utils.symbol_library.SymbolLibrary] restricted to the requested symbols and variables.
    """
    variables = [f"X_{i}" for i in range(num_variables)]
    symbols = symbols + variables

    sl = SymbolLibrary.default_symbols(num_variables)

    all_symbols = list(sl.symbols.keys())
    for symbol in all_symbols:
        if symbol not in symbols:
            sl.remove_symbol(symbol)

    return sl

default_symbols staticmethod

default_symbols(num_variables: int = 25) -> SymbolLibrary

Return a SymbolLibrary pre-populated with standard mathematical symbols.

Supported tokens:

  • Operators ("op"): +, -, *, /, ^
  • Functions ("fn"): u-, sqrt, sin, cos, exp, tan, arcsin, arccos, arctan, sinh, cosh, tanh, floor, ceil, ln, log, ^-1, ^2, ^3, ^4, ^5
  • Literals ("lit"): pi, e
  • Free constant ("const"): C
  • Variables ("var"): X_0 through X_{num_variables-1}, mapped to columns of the input array in order.

Examples:

>>> library = SymbolLibrary.default_symbols()
>>> len(library.symbols)
54

Parameters:

Name Type Description Default
num_variables int

Number of variable tokens to include. Default is 25.

25

Returns:

Type Description
SymbolLibrary

A SymbolLibrary populated with the symbols listed above.

Source code in SRToolkit/utils/symbol_library.py
@staticmethod
def default_symbols(num_variables: int = 25) -> "SymbolLibrary":
    """
    Return a [SymbolLibrary][SRToolkit.utils.symbol_library.SymbolLibrary] pre-populated with standard mathematical symbols.

    Supported tokens:

    - **Operators** (``"op"``): ``+``, ``-``, ``*``, ``/``, ``^``
    - **Functions** (``"fn"``): ``u-``, ``sqrt``, ``sin``, ``cos``, ``exp``, ``tan``,
      ``arcsin``, ``arccos``, ``arctan``, ``sinh``, ``cosh``, ``tanh``, ``floor``,
      ``ceil``, ``ln``, ``log``, ``^-1``, ``^2``, ``^3``, ``^4``, ``^5``
    - **Literals** (``"lit"``): ``pi``, ``e``
    - **Free constant** (``"const"``): ``C``
    - **Variables** (``"var"``): ``X_0`` through ``X_{num_variables-1}``,
      mapped to columns of the input array in order.

    Examples:
        >>> library = SymbolLibrary.default_symbols()
        >>> len(library.symbols)
        54

    Args:
        num_variables: Number of variable tokens to include. Default is ``25``.

    Returns:
        A [SymbolLibrary][SRToolkit.utils.symbol_library.SymbolLibrary] populated with the symbols listed above.
    """
    sl = SymbolLibrary()
    sl.add_symbol(
        "+",
        symbol_type="op",
        precedence=0,
        np_fn="{} = {} + {}",
        latex_str=r"{} + {}",
    )
    sl.add_symbol(
        "-",
        symbol_type="op",
        precedence=0,
        np_fn="{} = {} - {}",
        latex_str=r"{} - {}",
    )
    sl.add_symbol(
        "*",
        symbol_type="op",
        precedence=1,
        np_fn="{} = {} * {}",
        latex_str=r"{} \cdot {}",
    )
    sl.add_symbol(
        "/",
        symbol_type="op",
        precedence=1,
        np_fn="{} = {} / {}",
        latex_str=r"\frac{{{}}}{{{}}}",
    )
    sl.add_symbol(
        "^",
        symbol_type="op",
        precedence=2,
        np_fn="{} = np.power({},{})",
        latex_str=r"{}^{{{}}}",
    )
    sl.add_symbol("u-", symbol_type="fn", precedence=5, np_fn="{} = -{}", latex_str=r"- {}")
    sl.add_symbol(
        "sqrt",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.sqrt({})",
        latex_str=r"\sqrt {{{}}}",
    )
    sl.add_symbol(
        "sin",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.sin({})",
        latex_str=r"\sin {}",
    )
    sl.add_symbol(
        "cos",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.cos({})",
        latex_str=r"\cos {}",
    )
    sl.add_symbol(
        "exp",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.exp({})",
        latex_str=r"e^{{{}}}",
    )
    sl.add_symbol(
        "tan",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.tan({})",
        latex_str=r"\tan {}",
    )
    sl.add_symbol(
        "arcsin",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.arcsin({})",
        latex_str=r"\arcsin {}",
    )
    sl.add_symbol(
        "arccos",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.arccos({})",
        latex_str=r"\arccos {}",
    )
    sl.add_symbol(
        "arctan",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.arctan({})",
        latex_str=r"\arctan {}",
    )
    sl.add_symbol(
        "sinh",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.sinh({})",
        latex_str=r"\sinh {}",
    )
    sl.add_symbol(
        "cosh",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.cosh({})",
        latex_str=r"\cosh {}",
    )
    sl.add_symbol(
        "tanh",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.tanh({})",
        latex_str=r"\tanh {}",
    )
    sl.add_symbol(
        "floor",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.floor({})",
        latex_str=r"\lfloor {} \rfloor",
    )
    sl.add_symbol(
        "ceil",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.ceil({})",
        latex_str=r"\lceil {} \rceil",
    )
    sl.add_symbol(
        "ln",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.log({})",
        latex_str=r"\ln {}",
    )
    sl.add_symbol(
        "log",
        symbol_type="fn",
        precedence=5,
        np_fn="{} = np.log10({})",
        latex_str=r"\log_{{10}} {}",
    )
    sl.add_symbol(
        "^-1",
        symbol_type="fn",
        precedence=-1,
        np_fn="{} = 1/{}",
        latex_str=r"{}^{{-1}}",
    )
    sl.add_symbol("^2", symbol_type="fn", precedence=-1, np_fn="{} = {}**2", latex_str=r"{}^2")
    sl.add_symbol("^3", symbol_type="fn", precedence=-1, np_fn="{} = {}**3", latex_str=r"{}^3")
    sl.add_symbol("^4", symbol_type="fn", precedence=-1, np_fn="{} = {}**4", latex_str=r"{}^4")
    sl.add_symbol("^5", symbol_type="fn", precedence=-1, np_fn="{} = {}**5", latex_str=r"{}^5")
    sl.add_symbol(
        "pi",
        symbol_type="lit",
        precedence=5,
        np_fn="np.full(X.shape[0], np.pi)",
        latex_str=r"\pi",
    )
    sl.add_symbol(
        "e",
        symbol_type="lit",
        precedence=5,
        np_fn="np.full(X.shape[0], np.e)",
        latex_str=r"e",
    )
    sl.add_symbol(
        "C",
        symbol_type="const",
        precedence=5,
        np_fn="np.full(X.shape[0], C[{}])",
        latex_str=r"C_{{{}}}",
    )

    if num_variables > 0:
        for i in range(num_variables):
            sl.add_symbol(f"X_{i}", "var", 5, "X[:, {}]".format(i), "X_{{{}}}".format(i))

    return sl

to_dict

to_dict() -> dict

Serialize the library to a JSON-safe dictionary.

Returns:

Type Description
dict

A dictionary suitable for passing to from_dict.

Source code in SRToolkit/utils/symbol_library.py
def to_dict(self) -> dict:
    """
    Serialize the library to a JSON-safe dictionary.

    Returns:
        A dictionary suitable for passing to [from_dict][SRToolkit.utils.symbol_library.SymbolLibrary.from_dict].
    """
    return {
        "format_version": 1,
        "type": "SymbolLibrary",
        "symbols": self.symbols,
        "preamble": self.preamble,
        "num_variables": self.num_variables,
    }

from_dict staticmethod

from_dict(d: dict) -> SymbolLibrary

Reconstruct a SymbolLibrary from a dictionary produced by to_dict.

Parameters:

Name Type Description Default
d dict

Dictionary representation of the library.

required

Returns:

Type Description
SymbolLibrary

The reconstructed SymbolLibrary.

Raises:

Type Description
ValueError

If d["format_version"] is not 1.

Source code in SRToolkit/utils/symbol_library.py
@staticmethod
def from_dict(d: dict) -> "SymbolLibrary":
    """
    Reconstruct a [SymbolLibrary][SRToolkit.utils.symbol_library.SymbolLibrary] from a dictionary produced by [to_dict][SRToolkit.utils.symbol_library.SymbolLibrary.to_dict].

    Args:
        d: Dictionary representation of the library.

    Returns:
        The reconstructed [SymbolLibrary][SRToolkit.utils.symbol_library.SymbolLibrary].

    Raises:
        ValueError: If ``d["format_version"]`` is not ``1``.
    """
    if d.get("format_version", 1) != 1:
        raise ValueError(
            f"[SymbolLibrary.from_dict] Unsupported format_version: {d.get('format_version')!r}. Expected 1."
        )
    sl = SymbolLibrary()
    sl.symbols = d["symbols"]
    sl.preamble = d["preamble"]
    sl.num_variables = d["num_variables"]
    return sl

__len__

__len__() -> int

Return the number of symbols currently in the library.

Examples:

>>> library = SymbolLibrary.default_symbols(5)
>>> len(library)
34
>>> library.add_symbol("a", "lit", 5, "a", "a")
>>> len(library)
35

Returns:

Type Description
int

Number of tokens registered in the library.

Source code in SRToolkit/utils/symbol_library.py
def __len__(self) -> int:
    """
    Return the number of symbols currently in the library.

    Examples:
         >>> library = SymbolLibrary.default_symbols(5)
         >>> len(library)
         34
         >>> library.add_symbol("a", "lit", 5, "a", "a")
         >>> len(library)
         35

    Returns:
        Number of tokens registered in the library.
    """
    return len(self.symbols)

__str__

__str__() -> str

Return a comma-separated string of all registered token strings.

Examples:

>>> library = SymbolLibrary()
>>> library.add_symbol("x", "var", 0, "x", "x")
>>> str(library)
'x'
>>> library.add_symbol("sin", "fn", 5, "{} = np.sin({})", r"\sin {}")
>>> str(library)
'x, sin'

Returns:

Type Description
str

All token names joined by ", ", in insertion order.

Source code in SRToolkit/utils/symbol_library.py
def __str__(self) -> str:
    r"""
    Return a comma-separated string of all registered token strings.

    Examples:
        >>> library = SymbolLibrary()
        >>> library.add_symbol("x", "var", 0, "x", "x")
        >>> str(library)
        'x'
        >>> library.add_symbol("sin", "fn", 5, "{} = np.sin({})", r"\sin {}")
        >>> str(library)
        'x, sin'

    Returns:
        All token names joined by ``", "``, in insertion order.
    """
    return ", ".join(self.symbols.keys())

__copy__

__copy__() -> SymbolLibrary

Return a copy of the library with independent copies of all attributes.

Examples:

>>> old_symbols = SymbolLibrary()
>>> old_symbols.add_symbol("x", "var", 0, "x", "x")
>>> print(old_symbols)
x
>>> new_symbols = copy.copy(old_symbols)
>>> new_symbols.add_symbol("sin", "fn", 5, "{} = np.sin({})", r"\sin {}")
>>> print(old_symbols)
x
>>> print(new_symbols)
x, sin

Returns:

Type Description
SymbolLibrary

A new SymbolLibrary instance with deep-copied symbols and preamble.

Source code in SRToolkit/utils/symbol_library.py
def __copy__(self) -> "SymbolLibrary":
    r"""
    Return a copy of the library with independent copies of all attributes.

    Examples:
        >>> old_symbols = SymbolLibrary()
        >>> old_symbols.add_symbol("x", "var", 0, "x", "x")
        >>> print(old_symbols)
        x
        >>> new_symbols = copy.copy(old_symbols)
        >>> new_symbols.add_symbol("sin", "fn", 5, "{} = np.sin({})", r"\sin {}")
        >>> print(old_symbols)
        x
        >>> print(new_symbols)
        x, sin

    Returns:
        A new [SymbolLibrary][SRToolkit.utils.symbol_library.SymbolLibrary] instance with deep-copied symbols and preamble.
    """
    sl = SymbolLibrary()
    sl.symbols = copy.deepcopy(self.symbols)
    sl.preamble = copy.deepcopy(self.preamble)
    sl.num_variables = self.num_variables
    return sl

EstimationSettings

Bases: TypedDict

Shared settings for parameter estimation and BED evaluation.

Passed as **kwargs to SR_dataset, SR_evaluator, and ParameterEstimator. All fields are optional.

Examples:

>>> settings: EstimationSettings = {"method": "L-BFGS-B", "max_iter": 200}
>>> settings.get("method")
'L-BFGS-B'
>>> settings.get("tol", 1e-6)
1e-06

Attributes:

Name Type Description
method str

Optimization algorithm for parameter fitting. Default: "L-BFGS-B".

tol float

Termination tolerance for the optimizer. Default: 1e-6.

gtol float

Gradient-norm termination tolerance. Default: 1e-3.

max_iter int

Maximum optimizer iterations. Default: 100.

constant_bounds Union[Tuple[float, float]]

(lower, upper) bounds for sampled constant values. Default: (-5, 5).

initialization str

Constant initialization strategy — "random" samples uniformly within constant_bounds; "mean" sets all constants to the midpoint. Default: "random".

max_constants int

Maximum number of free constants permitted in a single expression. Expressions exceeding this limit score NaN. Default: 8.

max_expr_length int

Maximum expression length in tokens. -1 disables the limit. Default: -1.

num_points_sampled int

Number of domain points used when evaluating expression behavior for BED. -1 uses all points in X. Default: 64.

bed_X Optional[ndarray]

Fixed evaluation points for BED. If None, points are sampled from domain_bounds or selected randomly from X. Default: None.

num_consts_sampled int

Number of constant vectors sampled per expression for BED. Default: 32.

domain_bounds Optional[List[Tuple[float, float]]]

Per-variable (lower, upper) bounds used to sample bed_X when it is None. Default: None.

EvalResult dataclass

EvalResult(
    min_error: float,
    best_expr: str,
    num_evaluated: int,
    evaluation_calls: int,
    top_models: List[ModelResult],
    all_models: List[ModelResult],
    approach_name: str,
    success: bool,
    dataset_name: Optional[str] = None,
    metadata: Optional[dict] = None,
    augmentations: Dict[str, Dict[str, Any]] = dict(),
)

Result for a single SR experiment, as returned by SR_results[i].

Examples:

>>> model = ModelResult(expr=["X_0"], error=0.05)
>>> result = EvalResult(
...     min_error=0.05,
...     best_expr="X_0",
...     num_evaluated=500,
...     evaluation_calls=612,
...     top_models=[model],
...     all_models=[model],
...     approach_name="MyApproach",
...     success=True,
... )
>>> result.min_error
0.05
>>> result.success
True
>>> result.dataset_name is None
True

Attributes:

Name Type Description
min_error float

Lowest error achieved across all evaluated expressions.

best_expr str

String representation of the best expression found.

num_evaluated int

Number of unique expressions evaluated.

evaluation_calls int

Number of times evaluate_expr was called (includes cache hits).

top_models List[ModelResult]

Top-k models sorted by error.

all_models List[ModelResult]

All evaluated models sorted by error.

approach_name str

Name of the SR approach, or empty string if not provided.

success bool

Whether min_error is below the configured success_threshold.

dataset_name Optional[str]

Name of the dataset, extracted from metadata. None if not provided.

metadata Optional[dict]

Remaining metadata dict after dataset_name is popped. None if empty.

augmentations Dict[str, Dict[str, Any]]

Per-augmenter data keyed by augmenter name. Populated by ResultAugmenter subclasses via add_augmentation.

add_augmentation

add_augmentation(name: str, data: Dict[str, Any], aug_type: str) -> None

Attach augmentation data produced by a :class:ResultAugmenter to this result.

If name is already present in :attr:augmentations, a numeric suffix is appended (name_1, name_2, …) to avoid overwriting existing data.

Examples:

>>> model = ModelResult(expr=["X_0"], error=0.05)
>>> result = EvalResult(
...     min_error=0.05, best_expr="X_0", num_evaluated=10,
...     evaluation_calls=10, top_models=[model], all_models=[model],
...     approach_name="MyApproach", success=True,
... )
>>> result.add_augmentation("complexity", {"value": 3}, "ComplexityAugmenter")
>>> result.augmentations["complexity"]["value"]
3
>>> result.add_augmentation("complexity", {"value": 5}, "ComplexityAugmenter")
>>> "complexity_1" in result.augmentations
True

Parameters:

Name Type Description Default
name str

Key under which the augmentation is stored in :attr:augmentations. A suffix is added automatically if the key already exists.

required
data Dict[str, Any]

Arbitrary dict of augmentation data. A "_type" key is injected automatically and should not be included.

required
aug_type str

Augmenter class name, stored as data["_type"].

required
Source code in SRToolkit/utils/types.py
def add_augmentation(self, name: str, data: Dict[str, Any], aug_type: str) -> None:
    """
    Attach augmentation data produced by a :class:`ResultAugmenter` to this result.

    If ``name`` is already present in :attr:`augmentations`, a numeric suffix is
    appended (``name_1``, ``name_2``, …) to avoid overwriting existing data.

    Examples:
        >>> model = ModelResult(expr=["X_0"], error=0.05)
        >>> result = EvalResult(
        ...     min_error=0.05, best_expr="X_0", num_evaluated=10,
        ...     evaluation_calls=10, top_models=[model], all_models=[model],
        ...     approach_name="MyApproach", success=True,
        ... )
        >>> result.add_augmentation("complexity", {"value": 3}, "ComplexityAugmenter")
        >>> result.augmentations["complexity"]["value"]
        3
        >>> result.add_augmentation("complexity", {"value": 5}, "ComplexityAugmenter")
        >>> "complexity_1" in result.augmentations
        True

    Args:
        name: Key under which the augmentation is stored in :attr:`augmentations`.
            A suffix is added automatically if the key already exists.
        data: Arbitrary dict of augmentation data. A ``"_type"`` key is injected
            automatically and should not be included.
        aug_type: Augmenter class name, stored as ``data["_type"]``.
    """
    resolved = name
    counter = 1
    while resolved in self.augmentations:
        resolved = f"{name}_{counter}"
        counter += 1
    data["_type"] = aug_type
    self.augmentations[resolved] = data

to_dict

to_dict() -> dict

Serialize this evaluation result to a JSON-safe dictionary.

NumPy arrays and scalars within nested :class:ModelResult entries are converted to native Python types so the result can be passed directly to json.dump.

Examples:

>>> model = ModelResult(expr=["X_0"], error=0.05)
>>> result = EvalResult(
...     min_error=0.05, best_expr="X_0", num_evaluated=10,
...     evaluation_calls=10, top_models=[model], all_models=[model],
...     approach_name="MyApproach", success=True,
... )
>>> d = result.to_dict()
>>> d["min_error"]
0.05
>>> d["approach_name"]
'MyApproach'
>>> len(d["top_models"])
1

Returns:

Type Description
dict

A JSON-safe dictionary suitable for passing to :meth:from_dict.

Source code in SRToolkit/utils/types.py
def to_dict(self) -> dict:
    """
    Serialize this evaluation result to a JSON-safe dictionary.

    NumPy arrays and scalars within nested :class:`ModelResult` entries are
    converted to native Python types so the result can be passed directly
    to ``json.dump``.

    Examples:
        >>> model = ModelResult(expr=["X_0"], error=0.05)
        >>> result = EvalResult(
        ...     min_error=0.05, best_expr="X_0", num_evaluated=10,
        ...     evaluation_calls=10, top_models=[model], all_models=[model],
        ...     approach_name="MyApproach", success=True,
        ... )
        >>> d = result.to_dict()
        >>> d["min_error"]
        0.05
        >>> d["approach_name"]
        'MyApproach'
        >>> len(d["top_models"])
        1

    Returns:
        A JSON-safe dictionary suitable for passing to :meth:`from_dict`.
    """
    return {
        "min_error": float(self.min_error),
        "best_expr": self.best_expr,
        "num_evaluated": int(self.num_evaluated),
        "evaluation_calls": int(self.evaluation_calls),
        "top_models": [m.to_dict() for m in self.top_models],
        "all_models": [m.to_dict() for m in self.all_models],
        "approach_name": self.approach_name,
        "success": bool(self.success),
        "dataset_name": self.dataset_name,
        "metadata": self.metadata,
        "augmentations": _to_json_safe(self.augmentations),
    }

from_dict staticmethod

from_dict(data: dict) -> EvalResult

Reconstruct an :class:EvalResult from a dictionary produced by :meth:to_dict.

Examples:

>>> model = ModelResult(expr=["X_0"], error=0.05)
>>> result = EvalResult(
...     min_error=0.05, best_expr="X_0", num_evaluated=10,
...     evaluation_calls=10, top_models=[model], all_models=[model],
...     approach_name="MyApproach", success=True,
... )
>>> result2 = EvalResult.from_dict(result.to_dict())
>>> result2.min_error
0.05
>>> result2.best_expr
'X_0'
>>> len(result2.top_models)
1

Parameters:

Name Type Description Default
data dict

Dictionary representation of an :class:EvalResult, as produced by :meth:to_dict.

required

Returns:

Type Description
EvalResult

The reconstructed :class:EvalResult.

Source code in SRToolkit/utils/types.py
@staticmethod
def from_dict(data: dict) -> "EvalResult":
    """
    Reconstruct an :class:`EvalResult` from a dictionary produced by :meth:`to_dict`.

    Examples:
        >>> model = ModelResult(expr=["X_0"], error=0.05)
        >>> result = EvalResult(
        ...     min_error=0.05, best_expr="X_0", num_evaluated=10,
        ...     evaluation_calls=10, top_models=[model], all_models=[model],
        ...     approach_name="MyApproach", success=True,
        ... )
        >>> result2 = EvalResult.from_dict(result.to_dict())
        >>> result2.min_error
        0.05
        >>> result2.best_expr
        'X_0'
        >>> len(result2.top_models)
        1

    Args:
        data: Dictionary representation of an :class:`EvalResult`, as produced
            by :meth:`to_dict`.

    Returns:
        The reconstructed :class:`EvalResult`.
    """
    return EvalResult(
        min_error=data["min_error"],
        best_expr=data["best_expr"],
        num_evaluated=data["num_evaluated"],
        evaluation_calls=data["evaluation_calls"],
        top_models=[ModelResult.from_dict(m) for m in data["top_models"]],
        all_models=[ModelResult.from_dict(m) for m in data["all_models"]],
        approach_name=data["approach_name"],
        success=data["success"],
        dataset_name=data.get("dataset_name"),
        metadata=data.get("metadata"),
        augmentations=_from_json_safe(data["augmentations"]),
    )

ModelResult dataclass

ModelResult(
    expr: List[str],
    error: float,
    parameters: Optional[ndarray] = None,
    augmentations: Dict[str, Dict[str, Any]] = dict(),
)

A single model entry in EvalResult.top_models and EvalResult.all_models.

Examples:

>>> result = ModelResult(expr=["C", "*", "X_0"], error=0.42)
>>> result.expr
['C', '*', 'X_0']
>>> result.error
0.42
>>> result.parameters is None
True

Attributes:

Name Type Description
expr List[str]

Token list representing the expression, e.g. ["C", "*", "X_0"].

error float

Numeric error under the ranking function (RMSE or BED).

parameters Optional[ndarray]

Fitted constant values. Present for RMSE ranking only, None otherwise.

augmentations Dict[str, Dict[str, Any]]

Per-augmenter data keyed by augmenter name. Populated by ResultAugmenter subclasses via add_augmentation.

add_augmentation

add_augmentation(name: str, data: Dict[str, Any], aug_type: str) -> None

Attach augmentation data produced by a :class:ResultAugmenter to this result.

If name is already present in :attr:augmentations, a numeric suffix is appended (name_1, name_2, …) to avoid overwriting existing data.

Examples:

>>> result = ModelResult(expr=["X_0"], error=0.1)
>>> result.add_augmentation("latex", {"value": "$X_0$"}, "LaTeXAugmenter")
>>> result.augmentations["latex"]["value"]
'$X_0$'
>>> result.add_augmentation("latex", {"value": "$X_0$"}, "LaTeXAugmenter")
>>> "latex_1" in result.augmentations
True

Parameters:

Name Type Description Default
name str

Key under which the augmentation is stored in :attr:augmentations. A suffix is added automatically if the key already exists.

required
data Dict[str, Any]

Arbitrary dict of augmentation data. A "_type" key is injected automatically and should not be included.

required
aug_type str

Augmenter class name, stored as data["_type"].

required
Source code in SRToolkit/utils/types.py
def add_augmentation(self, name: str, data: Dict[str, Any], aug_type: str) -> None:
    """
    Attach augmentation data produced by a :class:`ResultAugmenter` to this result.

    If ``name`` is already present in :attr:`augmentations`, a numeric suffix is
    appended (``name_1``, ``name_2``, …) to avoid overwriting existing data.

    Examples:
        >>> result = ModelResult(expr=["X_0"], error=0.1)
        >>> result.add_augmentation("latex", {"value": "$X_0$"}, "LaTeXAugmenter")
        >>> result.augmentations["latex"]["value"]
        '$X_0$'
        >>> result.add_augmentation("latex", {"value": "$X_0$"}, "LaTeXAugmenter")
        >>> "latex_1" in result.augmentations
        True

    Args:
        name: Key under which the augmentation is stored in :attr:`augmentations`.
            A suffix is added automatically if the key already exists.
        data: Arbitrary dict of augmentation data. A ``"_type"`` key is injected
            automatically and should not be included.
        aug_type: Augmenter class name, stored as ``data["_type"]``.
    """
    resolved = name
    counter = 1
    while resolved in self.augmentations:
        resolved = f"{name}_{counter}"
        counter += 1
    data["_type"] = aug_type
    self.augmentations[resolved] = data

to_dict

to_dict() -> dict

Serialize this model result to a JSON-safe dictionary.

NumPy arrays and scalars are converted to native Python types so the result can be passed directly to json.dump.

Examples:

>>> result = ModelResult(expr=["X_0", "+", "C"], error=0.25)
>>> d = result.to_dict()
>>> d["expr"]
['X_0', '+', 'C']
>>> d["error"]
0.25
>>> d["parameters"] is None
True

Returns:

Type Description
dict

A JSON-safe dictionary suitable for passing to :meth:from_dict.

Source code in SRToolkit/utils/types.py
def to_dict(self) -> dict:
    """
    Serialize this model result to a JSON-safe dictionary.

    NumPy arrays and scalars are converted to native Python types so the
    result can be passed directly to ``json.dump``.

    Examples:
        >>> result = ModelResult(expr=["X_0", "+", "C"], error=0.25)
        >>> d = result.to_dict()
        >>> d["expr"]
        ['X_0', '+', 'C']
        >>> d["error"]
        0.25
        >>> d["parameters"] is None
        True

    Returns:
        A JSON-safe dictionary suitable for passing to :meth:`from_dict`.
    """
    return {
        "expr": self.expr,
        "error": float(self.error),
        "parameters": _to_json_safe(self.parameters),
        "augmentations": _to_json_safe(self.augmentations),
    }

from_dict staticmethod

from_dict(data: dict) -> ModelResult

Reconstruct a :class:ModelResult from a dictionary produced by :meth:to_dict.

Examples:

>>> result = ModelResult(expr=["X_0", "+", "C"], error=0.25)
>>> result2 = ModelResult.from_dict(result.to_dict())
>>> result2.expr
['X_0', '+', 'C']
>>> result2.error
0.25

Parameters:

Name Type Description Default
data dict

Dictionary representation of a :class:ModelResult, as produced by :meth:to_dict.

required

Returns:

Type Description
ModelResult

The reconstructed :class:ModelResult.

Source code in SRToolkit/utils/types.py
@staticmethod
def from_dict(data: dict) -> "ModelResult":
    """
    Reconstruct a :class:`ModelResult` from a dictionary produced by :meth:`to_dict`.

    Examples:
        >>> result = ModelResult(expr=["X_0", "+", "C"], error=0.25)
        >>> result2 = ModelResult.from_dict(result.to_dict())
        >>> result2.expr
        ['X_0', '+', 'C']
        >>> result2.error
        0.25

    Args:
        data: Dictionary representation of a :class:`ModelResult`, as produced
            by :meth:`to_dict`.

    Returns:
        The reconstructed :class:`ModelResult`.
    """
    return ModelResult(
        expr=data["expr"],
        error=data["error"],
        parameters=_from_json_safe(data["parameters"]),
        augmentations=_from_json_safe(data["augmentations"]),
    )

expr_to_error_function

expr_to_error_function(
    expr: Union[List[str], Node],
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> Callable[[np.ndarray, np.ndarray, np.ndarray], float]

Compile an expression into a callable that computes the RMSE against target values.

To use a backend other than NumPy, set symbol_library.preamble to the required import statements.

Examples:

>>> executable_fun = expr_to_error_function(["X_0", "+", "1"])
>>> print(float(executable_fun(np.array([[1], [2], [3], [4]]), np.array([]), np.array([2, 3, 4, 5]))))
0.0
>>> tree = tokens_to_tree(["X_0", "+", "1"], SymbolLibrary.default_symbols(1))
>>> executable_fun = expr_to_error_function(tree)
>>> print(float(executable_fun(np.array([[1], [2], [3], [4]]), np.array([]), np.array([2, 3, 4, 5]))))
0.0
>>> # In case you need libraries other than numpy for the evaluation of your expressions,
>>> # you can add them to the preamble in the SymbolLibrary. Here is how a preamble would look like:
>>> symbol_library = SymbolLibrary.default_symbols(1)
>>> symbol_library.preamble = ["import numpy as np"]
>>> # Usually this is done when initializing the SymbolLibrary as SymbolLibrary(preamble=preamble)
>>> executable_fun = expr_to_error_function(tree, symbol_library)
>>> print(float(executable_fun(np.array([[1], [2], [3], [4]]), np.array([]), np.array([2, 3, 4, 5]))))
0.0

Parameters:

Name Type Description Default
expr Union[List[str], Node]

Expression as a token list in infix notation or a Node tree.

required
symbol_library SymbolLibrary

Defines token semantics (NumPy function strings, preamble imports). Defaults to SymbolLibrary.default_symbols.

default_symbols()

Returns:

Type Description
Callable[[ndarray, ndarray, ndarray], float]

A callable f(X, C, y) where X is a 2-D array of shape (n_samples, n_features), C is a 1-D array of constant values, and y is a 1-D target array. Returns the scalar RMSE as a float.

Raises:

Type Description
Exception

If expr is neither a list nor a Node.

Source code in SRToolkit/utils/expression_compiler.py
def expr_to_error_function(
    expr: Union[List[str], Node],
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> Callable[[np.ndarray, np.ndarray, np.ndarray], float]:
    """
    Compile an expression into a callable that computes the RMSE against target values.

    To use a backend other than NumPy, set ``symbol_library.preamble`` to the required
    import statements.

    Examples:
        >>> executable_fun = expr_to_error_function(["X_0", "+", "1"])
        >>> print(float(executable_fun(np.array([[1], [2], [3], [4]]), np.array([]), np.array([2, 3, 4, 5]))))
        0.0
        >>> tree = tokens_to_tree(["X_0", "+", "1"], SymbolLibrary.default_symbols(1))
        >>> executable_fun = expr_to_error_function(tree)
        >>> print(float(executable_fun(np.array([[1], [2], [3], [4]]), np.array([]), np.array([2, 3, 4, 5]))))
        0.0
        >>> # In case you need libraries other than numpy for the evaluation of your expressions,
        >>> # you can add them to the preamble in the SymbolLibrary. Here is how a preamble would look like:
        >>> symbol_library = SymbolLibrary.default_symbols(1)
        >>> symbol_library.preamble = ["import numpy as np"]
        >>> # Usually this is done when initializing the SymbolLibrary as SymbolLibrary(preamble=preamble)
        >>> executable_fun = expr_to_error_function(tree, symbol_library)
        >>> print(float(executable_fun(np.array([[1], [2], [3], [4]]), np.array([]), np.array([2, 3, 4, 5]))))
        0.0

    Args:
        expr: Expression as a token list in infix notation or a [Node][SRToolkit.utils.expression_tree.Node] tree.
        symbol_library: Defines token semantics (NumPy function strings, preamble imports).
            Defaults to [SymbolLibrary.default_symbols][SRToolkit.utils.symbol_library.SymbolLibrary.default_symbols].

    Returns:
        A callable ``f(X, C, y)`` where ``X`` is a 2-D array of shape ``(n_samples, n_features)``, ``C`` is a 1-D array of constant values, and ``y`` is a 1-D target array. Returns the scalar RMSE as a float.

    Raises:
        Exception: If ``expr`` is neither a list nor a [Node][SRToolkit.utils.expression_tree.Node].
    """
    if not (isinstance(expr, list) or isinstance(expr, Node)):
        raise Exception(
            "Expression must be given as either a list of tokens or a tree (an instance of the "
            "SRToolkit.utils.expression_tree.Node class)"
        )

    if isinstance(expr, list):
        tree = tokens_to_tree(expr, symbol_library)
    else:
        tree = expr
    code, symbol, var_counter, const_counter = tree_to_function_rec(tree, symbol_library)

    fun_string = "\n".join(symbol_library.preamble) + "\ndef _executable_expression_(X, C, y):\n"
    for c in code:
        fun_string += "\t" + c + "\n"
    fun_string += f"\treturn np.sqrt(np.mean(({symbol}-y)**2))"

    fun_assignment_dict: Dict[str, Callable] = {}
    exec(fun_string, {"np": np}, fun_assignment_dict)
    return fun_assignment_dict["_executable_expression_"]

expr_to_executable_function

expr_to_executable_function(
    expr: Union[List[str], Node],
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> Callable[[np.ndarray, Optional[np.ndarray]], np.ndarray]

Compile an expression into an executable Python function.

The returned callable evaluates the expression over a batch of inputs and a vector of constant values. To use a backend other than NumPy, set symbol_library.preamble to the required import statements.

Examples:

>>> executable_fun = expr_to_executable_function(["X_0", "+", "1"])
>>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([]))
array([2, 3, 4, 5])
>>> executable_fun = expr_to_executable_function(["pi"])
>>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([1]))
array([3.14159265, 3.14159265, 3.14159265, 3.14159265])
>>> executable_fun = expr_to_executable_function(["C"])
>>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([1]))
array([1, 1, 1, 1])
>>> tree = tokens_to_tree(["X_0", "+", "1"], SymbolLibrary.default_symbols(1))
>>> executable_fun = expr_to_executable_function(tree)
>>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([]))
array([2, 3, 4, 5])
>>> # In case you need libraries other than numpy for the evaluation of your expressions,
>>> # you can add them to the preamble in the SymbolLibrary. Here is how a preamble would look like:
>>> symbol_library = SymbolLibrary.default_symbols(1)
>>> symbol_library.preamble = ["import numpy as np"]
>>> # Usually this is done when initializing the SymbolLibrary as SymbolLibrary(preamble=preamble)
>>> executable_fun = expr_to_executable_function(tree, symbol_library)
>>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([]))
array([2, 3, 4, 5])

Parameters:

Name Type Description Default
expr Union[List[str], Node]

Expression as a token list in infix notation or a Node tree.

required
symbol_library SymbolLibrary

Defines token semantics (NumPy function strings, preamble imports). Defaults to SymbolLibrary.default_symbols.

default_symbols()

Returns:

Type Description
Callable[[ndarray, Optional[ndarray]], ndarray]

A callable f(X, C) where X is a 2-D array of shape (n_samples, n_features) and C is a 1-D array of constant values. Returns a 1-D output array of shape (n_samples,).

Raises:

Type Description
Exception

If expr is neither a list nor a Node.

Source code in SRToolkit/utils/expression_compiler.py
def expr_to_executable_function(
    expr: Union[List[str], Node],
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> Callable[[np.ndarray, Optional[np.ndarray]], np.ndarray]:
    """
    Compile an expression into an executable Python function.

    The returned callable evaluates the expression over a batch of inputs and a vector
    of constant values. To use a backend other than NumPy, set
    ``symbol_library.preamble`` to the required import statements.

    Examples:
        >>> executable_fun = expr_to_executable_function(["X_0", "+", "1"])
        >>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([]))
        array([2, 3, 4, 5])
        >>> executable_fun = expr_to_executable_function(["pi"])
        >>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([1]))
        array([3.14159265, 3.14159265, 3.14159265, 3.14159265])
        >>> executable_fun = expr_to_executable_function(["C"])
        >>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([1]))
        array([1, 1, 1, 1])
        >>> tree = tokens_to_tree(["X_0", "+", "1"], SymbolLibrary.default_symbols(1))
        >>> executable_fun = expr_to_executable_function(tree)
        >>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([]))
        array([2, 3, 4, 5])
        >>> # In case you need libraries other than numpy for the evaluation of your expressions,
        >>> # you can add them to the preamble in the SymbolLibrary. Here is how a preamble would look like:
        >>> symbol_library = SymbolLibrary.default_symbols(1)
        >>> symbol_library.preamble = ["import numpy as np"]
        >>> # Usually this is done when initializing the SymbolLibrary as SymbolLibrary(preamble=preamble)
        >>> executable_fun = expr_to_executable_function(tree, symbol_library)
        >>> executable_fun(np.array([[1], [2], [3], [4]]), np.array([]))
        array([2, 3, 4, 5])

    Args:
        expr: Expression as a token list in infix notation or a [Node][SRToolkit.utils.expression_tree.Node] tree.
        symbol_library: Defines token semantics (NumPy function strings, preamble imports).
            Defaults to [SymbolLibrary.default_symbols][SRToolkit.utils.symbol_library.SymbolLibrary.default_symbols].

    Returns:
        A callable ``f(X, C)`` where ``X`` is a 2-D array of shape ``(n_samples, n_features)`` and ``C`` is a 1-D array of constant values. Returns a 1-D output array of shape ``(n_samples,)``.

    Raises:
        Exception: If ``expr`` is neither a list nor a [Node][SRToolkit.utils.expression_tree.Node].
    """
    if not (isinstance(expr, list) or isinstance(expr, Node)):
        raise Exception(
            "Expression must be given as either a list of tokens or a tree (an instance of the "
            "SRToolkit.utils.expression_tree.Node class)"
        )

    if isinstance(expr, list):
        tree = tokens_to_tree(expr, symbol_library)
    else:
        tree = expr
    code, symbol, var_counter, const_counter = tree_to_function_rec(tree, symbol_library)

    fun_string = "\n".join(symbol_library.preamble) + "\ndef _executable_expression_(X, C):\n"
    for c in code:
        fun_string += "\t" + c + "\n"
    fun_string += "\treturn " + symbol

    fun_assignment_dict: Dict[str, Callable] = {}
    exec(fun_string, {"np": np}, fun_assignment_dict)
    return fun_assignment_dict["_executable_expression_"]

tree_to_function_rec

tree_to_function_rec(
    tree: Node,
    symbol_library: SymbolLibrary,
    var_counter: int = 0,
    const_counter: int = 0,
) -> Tuple[List[str], str, int, int]

Recursively convert a parse tree into lines of Python code for expression evaluation.

This is a low-level helper for expr_to_executable_function and expr_to_error_function. Call those functions directly unless you need fine-grained control over code generation.

Parameters:

Name Type Description Default
tree Node

Root of the subtree to convert.

required
symbol_library SymbolLibrary

Provides NumPy function strings for each token.

required
var_counter int

Running count of intermediate variables, used to generate unique names. Default 0.

0
const_counter int

Running count of constants consumed; used to index into the C array. Default 0.

0

Returns:

Type Description
Tuple[List[str], str, int, int]

A 4-tuple (code, symbol, var_counter, const_counter) where code is a list of Python assignment strings forming the expression body, symbol is the name of the variable holding this subtree's result, and var_counter / const_counter are the updated counters.

Raises:

Type Description
Exception

If the tree contains a token that is neither a recognized symbol nor a numeric literal.

Source code in SRToolkit/utils/expression_compiler.py
def tree_to_function_rec(
    tree: Node,
    symbol_library: SymbolLibrary,
    var_counter: int = 0,
    const_counter: int = 0,
) -> Tuple[List[str], str, int, int]:
    """
    Recursively convert a parse tree into lines of Python code for expression evaluation.

    This is a low-level helper for [expr_to_executable_function][SRToolkit.utils.expression_compiler.expr_to_executable_function] and
    [expr_to_error_function][SRToolkit.utils.expression_compiler.expr_to_error_function]. Call those functions directly unless you need
    fine-grained control over code generation.

    Args:
        tree: Root of the subtree to convert.
        symbol_library: Provides NumPy function strings for each token.
        var_counter: Running count of intermediate variables, used to generate unique
            names. Default ``0``.
        const_counter: Running count of constants consumed; used to index into the ``C``
            array. Default ``0``.

    Returns:
        A 4-tuple ``(code, symbol, var_counter, const_counter)`` where ``code`` is a list of Python assignment strings forming the expression body, ``symbol`` is the name of the variable holding this subtree's result, and ``var_counter`` / ``const_counter`` are the updated counters.

    Raises:
        Exception: If the tree contains a token that is neither a recognized symbol nor
            a numeric literal.
    """
    if tree.left is None and tree.right is None:
        if symbol_library.get_type(tree.symbol) in ["var", "lit"]:
            return [], symbol_library.get_np_fn(tree.symbol), var_counter, const_counter
        elif symbol_library.get_type(tree.symbol) == "const":
            return (
                [],
                symbol_library.get_np_fn(tree.symbol).format(const_counter),
                var_counter,
                const_counter + 1,
            )
        else:
            if is_float(tree.symbol):
                return [], tree.symbol, var_counter, const_counter
            else:
                raise Exception(f"Encountered invalid symbol {tree.symbol} while converting to function.")

    elif tree.left is not None and tree.right is None:
        code, symbol, var_counter, const_counter = tree_to_function_rec(
            tree.left, symbol_library, var_counter, const_counter
        )
        output_symbol = "y_{}".format(var_counter)
        code.append(symbol_library.get_np_fn(tree.symbol).format(output_symbol, symbol))
        return code, output_symbol, var_counter + 1, const_counter

    else:
        assert tree.right is not None, "Right child should be present in this branch."
        assert tree.left is not None, "Left child should be present if right child is present."
        left_code, left_symbol, var_counter, const_counter = tree_to_function_rec(
            tree.left, symbol_library, var_counter, const_counter
        )
        right_code, right_symbol, var_counter, const_counter = tree_to_function_rec(
            tree.right, symbol_library, var_counter, const_counter
        )
        output_symbol = "y_{}".format(var_counter)
        code = left_code + right_code
        code.append(symbol_library.get_np_fn(tree.symbol).format(output_symbol, left_symbol, right_symbol))
        return code, output_symbol, var_counter + 1, const_counter

create_generic_pcfg

create_generic_pcfg(symbol_library: SymbolLibrary) -> str

Construct a generic Probabilistic Context-Free Grammar (PCFG) from a symbol library.

The grammar encodes standard mathematical operator precedence through a fixed non-terminal hierarchy:

  • E — additive level (precedence 0 operators)
  • F — multiplicative level (precedence 1 operators)
  • B — power level (precedence 2 operators)
  • T — terminal: function application (R), constant (C), or variable (V)
  • R — unary functions (precedence 5) and parenthesised sub-expressions
  • P — postfix functions (precedence -1, e.g. ^2)

The returned string is in NLTK PCFG format and can be passed directly to generate_from_pcfg or generate_n_expressions.

Examples:

>>> sl = SymbolLibrary.from_symbol_list(["+", "-", "*", "sin", "^2", "pi"], 2)
>>> print(create_generic_pcfg(sl))
E -> E '+' F [0.2]
E -> E '-' F [0.2]
E -> F [0.6]
F -> F '*' B [0.4]
F -> B [0.6]
B -> T [1.0]
T -> R [0.2]
T -> C [0.2]
T -> V [0.6]
C -> 'pi' [1.0]
R -> 'sin' '(' E ')' [0.4]
R -> P [0.15]
R -> '(' E ')' [0.45]
P -> '(' E ')' '^2' [1.0]
V -> 'X_0' [0.5]
V -> 'X_1' [0.5]

Parameters:

Name Type Description Default
symbol_library SymbolLibrary

Symbol library defining the available tokens, their types, and precedences.

required

Returns:

Type Description
str

NLTK-formatted PCFG string with generic probabilities.

Source code in SRToolkit/utils/expression_generator.py
def create_generic_pcfg(symbol_library: SymbolLibrary) -> str:
    """
    Construct a generic Probabilistic Context-Free Grammar (PCFG) from a symbol library.

    The grammar encodes standard mathematical operator precedence through a fixed
    non-terminal hierarchy:

    - ``E`` — additive level (precedence 0 operators)
    - ``F`` — multiplicative level (precedence 1 operators)
    - ``B`` — power level (precedence 2 operators)
    - ``T`` — terminal: function application (``R``), constant (``C``), or variable (``V``)
    - ``R`` — unary functions (precedence 5) and parenthesised sub-expressions
    - ``P`` — postfix functions (precedence -1, e.g. ``^2``)

    The returned string is in NLTK PCFG format and can be passed directly to
    [generate_from_pcfg][SRToolkit.utils.expression_generator.generate_from_pcfg] or [generate_n_expressions][SRToolkit.utils.expression_generator.generate_n_expressions].

    Examples:
        >>> sl = SymbolLibrary.from_symbol_list(["+", "-", "*", "sin", "^2", "pi"], 2)
        >>> print(create_generic_pcfg(sl))
        E -> E '+' F [0.2]
        E -> E '-' F [0.2]
        E -> F [0.6]
        F -> F '*' B [0.4]
        F -> B [0.6]
        B -> T [1.0]
        T -> R [0.2]
        T -> C [0.2]
        T -> V [0.6]
        C -> 'pi' [1.0]
        R -> 'sin' '(' E ')' [0.4]
        R -> P [0.15]
        R -> '(' E ')' [0.45]
        P -> '(' E ')' '^2' [1.0]
        V -> 'X_0' [0.5]
        V -> 'X_1' [0.5]
        <BLANKLINE>

    Args:
        symbol_library: Symbol library defining the available tokens, their types,
            and precedences.

    Returns:
        NLTK-formatted PCFG string with generic probabilities.
    """
    symbols = symbol_library.symbols.values()
    E = [s["symbol"] for s in symbols if s["type"] == "op" and s["precedence"] == 0]
    F = [s["symbol"] for s in symbols if s["type"] == "op" and s["precedence"] == 1]
    BP = [s["symbol"] for s in symbols if s["type"] == "op" and s["precedence"] == 2]
    R = [s["symbol"] for s in symbols if s["type"] == "fn" and s["precedence"] == 5]
    P = [s["symbol"] for s in symbols if s["type"] == "fn" and s["precedence"] == -1]
    V = [s["symbol"] for s in symbols if s["type"] == "var"]
    Cc = [s["symbol"] for s in symbols if s["type"] == "const"]
    Cl = [s["symbol"] for s in symbols if s["type"] == "lit"]

    grammar = ""
    if len(E) > 0:
        for s in E:
            grammar += f"E -> E '{s}' F [{0.4 / len(E)}]\n"
        grammar += "E -> F [0.6]\n"
    else:
        grammar += "E -> F [1.0]\n"

    if len(F) > 0:
        for s in F:
            grammar += f"F -> F '{s}' B [{0.4 / len(F)}]\n"
        grammar += "F -> B [0.6]\n"
    else:
        grammar += "F -> B [1.0]\n"

    if len(BP) > 0:
        for s in BP:
            grammar += f"B -> B '{s}' T [{0.05 / len(BP)}]\n"
        grammar += "B -> T [0.95]\n"
    else:
        grammar += "B -> T [1.0]\n"

    if len(Cc) + len(Cl) > 0:
        grammar += "T -> R [0.2]\n"
        grammar += "T -> C [0.2]\n"
        grammar += "T -> V [0.6]\n"
        if len(Cl) > 0 and len(Cc) > 0:
            for s in Cl:
                grammar += f"C -> '{s}' [{0.2 / len(Cl)}]\n"
            for s in Cc:
                grammar += f"C -> '{s}' [{0.8 / len(Cc)}]\n"
        elif len(Cl) > 0:
            for s in Cl:
                grammar += f"C -> '{s}' [{1 / len(Cl)}]\n"
        elif len(Cc) > 0:
            for s in Cc:
                grammar += f"C -> '{s}' [{1 / len(Cc)}]\n"
    else:
        grammar += "T -> R [0.3]\n"
        grammar += "T -> V [0.7]\n"

    if len(R) > 0:
        for s in R:
            grammar += f"R -> '{s}' '(' E ')' [{0.4 / len(R)}]\n"
        if len(P) > 0:
            grammar += "R -> P [0.15]\n"
            grammar += "R -> '(' E ')' [0.45]\n"
        else:
            grammar += "R -> '(' E ')' [0.6]\n"
    else:
        if len(P) > 0:
            grammar += "R -> P [0.15]\n"
            grammar += "R -> '(' E ')' [0.85]\n"
        else:
            grammar += "R -> '(' E ')' [1.0]\n"

    if len(P) > 0:
        total = sum([1 / abs(float(s[1:])) for s in P])
        for s in P:
            grammar += f"P -> '(' E ')' '{s}' [{(1 / abs(float(s[1:]))) / total}]\n"

    if len(V) > 0:
        for s in V:
            grammar += f"V -> '{s}' [{1 / len(V)}]\n"

    return grammar

generate_from_pcfg

generate_from_pcfg(
    grammar_str: str,
    start_symbol: str = "E",
    max_depth: int = 40,
    limit: int = 100,
) -> List[str]

Sample a single expression from a PCFG by Monte-Carlo tree expansion.

Examples:

>>> generate_from_pcfg("E -> '1' [1.0]")
['1']
>>> grammar = create_generic_pcfg(SymbolLibrary.default_symbols())
>>> len(generate_from_pcfg(grammar)) > 0
True

Parameters:

Name Type Description Default
grammar_str str

Grammar in NLTK PCFG notation.

required
start_symbol str

Non-terminal from which expansion begins. Default "E".

'E'
max_depth int

Maximum parse-tree depth. Values below 0 allow unbounded depth. Default 40.

40
limit int

Maximum number of sampling attempts before raising an exception. Default 100.

100

Returns:

Type Description
List[str]

A single expression as a list of string tokens in infix notation.

Raises:

Type Description
Exception

If a valid expression cannot be produced within limit attempts.

Source code in SRToolkit/utils/expression_generator.py
def generate_from_pcfg(grammar_str: str, start_symbol: str = "E", max_depth: int = 40, limit: int = 100) -> List[str]:
    """
    Sample a single expression from a PCFG by Monte-Carlo tree expansion.

    Examples:
        >>> generate_from_pcfg("E -> '1' [1.0]")
        ['1']
        >>> grammar = create_generic_pcfg(SymbolLibrary.default_symbols())
        >>> len(generate_from_pcfg(grammar)) > 0
        True

    Args:
        grammar_str: Grammar in NLTK PCFG notation.
        start_symbol: Non-terminal from which expansion begins. Default ``"E"``.
        max_depth: Maximum parse-tree depth. Values below ``0`` allow unbounded depth.
            Default ``40``.
        limit: Maximum number of sampling attempts before raising an exception.
            Default ``100``.

    Returns:
        A single expression as a list of string tokens in infix notation.

    Raises:
        Exception: If a valid expression cannot be produced within ``limit`` attempts.
    """
    start_symbol = nltk.grammar.Nonterminal(start_symbol)
    grammar = nltk.PCFG.fromstring(grammar_str)
    expr = _expand(grammar, start_symbol, 0, max_depth)
    tries = 1
    while expr is None and tries < limit:
        expr = _expand(grammar, start_symbol, 0, max_depth)

    if expr is None:
        raise Exception(
            f"[Expression generation] Couldn't find an expression with max_depth {max_depth} from this grammar in {limit} tries."
        )

    return expr

generate_n_expressions

generate_n_expressions(
    expression_description: Union[str, SymbolLibrary],
    num_expressions: int,
    unique: bool = True,
    max_expression_length: int = 50,
    verbose: bool = True,
) -> List[List[str]]

Sample num_expressions expressions from a grammar or symbol library.

Examples:

>>> len(generate_n_expressions(SymbolLibrary.default_symbols(5), 100, verbose=False))
100
>>> generate_n_expressions(SymbolLibrary.from_symbol_list([], 1), 3, unique=False, verbose=False, max_expression_length=1)
[['X_0'], ['X_0'], ['X_0']]

Parameters:

Name Type Description Default
expression_description Union[str, SymbolLibrary]

Grammar source as a NLTK PCFG string or a SymbolLibrary (a generic PCFG is built automatically via create_generic_pcfg).

required
num_expressions int

Number of expressions to generate.

required
unique bool

If True, every expression in the output is lexicographically distinct (though not necessarily semantically inequivalent). Default True.

True
max_expression_length int

Maximum token count per expression. Values below 0 allow unbounded length. Default 50.

50
verbose bool

Display a progress bar. Default True.

True

Returns:

Type Description
List[List[str]]

List of expressions, each represented as a list of string tokens in infix notation.

Source code in SRToolkit/utils/expression_generator.py
def generate_n_expressions(
    expression_description: Union[str, SymbolLibrary],
    num_expressions: int,
    unique: bool = True,
    max_expression_length: int = 50,
    verbose: bool = True,
) -> List[List[str]]:
    """
    Sample ``num_expressions`` expressions from a grammar or symbol library.

    Examples:
        >>> len(generate_n_expressions(SymbolLibrary.default_symbols(5), 100, verbose=False))
        100
        >>> generate_n_expressions(SymbolLibrary.from_symbol_list([], 1), 3, unique=False, verbose=False, max_expression_length=1)
        [['X_0'], ['X_0'], ['X_0']]

    Args:
        expression_description: Grammar source as a NLTK PCFG string or a
            [SymbolLibrary][SRToolkit.utils.symbol_library.SymbolLibrary] (a generic PCFG is built automatically via
            [create_generic_pcfg][SRToolkit.utils.expression_generator.create_generic_pcfg]).
        num_expressions: Number of expressions to generate.
        unique: If ``True``, every expression in the output is lexicographically distinct
            (though not necessarily semantically inequivalent). Default ``True``.
        max_expression_length: Maximum token count per expression. Values below ``0``
            allow unbounded length. Default ``50``.
        verbose: Display a progress bar. Default ``True``.

    Returns:
        List of expressions, each represented as a list of string tokens in infix notation.
    """
    if isinstance(expression_description, SymbolLibrary):
        grammar = create_generic_pcfg(expression_description)
    elif isinstance(expression_description, str):
        grammar = expression_description
    else:
        raise Exception(
            "Description of expressions must be either a grammar written as a string or an instance of SymbolLibrary."
        )

    expressions: List[List[str]] = []
    expression_strings = set()
    if verbose:
        pbar = tqdm(total=num_expressions)
    while len(expressions) < num_expressions:
        try:
            expr = generate_from_pcfg(grammar, max_depth=max_expression_length * 10)
        except Exception:
            print("Couldn't generate a valid expression in 100 tries")
            continue
        if len(expr) > max_expression_length > 0:
            continue

        expr_string = "".join(expr)
        if expr_string not in expression_strings or not unique:
            expressions.append(expr)
            expression_strings.add(expr_string)
            if verbose:
                pbar.update(1)

    if verbose:
        pbar.close()
    return expressions

simplify

simplify(
    expr: Union[List[str], Node],
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> Union[List[str], Node]

Simplify an expression algebraically.

Two successive steps are applied:

  1. SymPy simplification — expands and reduces the expression algebraically (e.g. X_0 * X_1 / X_0X_1).
  2. Constant folding — collapses any sub-expression containing no variables into a single free constant C (e.g. C * C + CC).

Examples:

>>> expr = ["C", "+", "C", "*", "C", "+", "X_0", "*", "X_1", "/", "X_0"]
>>> print("".join(simplify(expr)))
C+X_1

Parameters:

Name Type Description Default
expr Union[List[str], Node]

Expression as a token list in infix notation or a Node tree.

required
symbol_library SymbolLibrary

Symbol library defining variables and constants. Defaults to SymbolLibrary.default_symbols.

default_symbols()

Returns:

Type Description
Union[List[str], Node]

The simplified expression in the same form as the input (list if a list was given, Node if a tree was given).

Raises:

Type Description
Exception

If simplification fails or the result contains tokens absent from symbol_library.

Source code in SRToolkit/utils/expression_simplifier.py
def simplify(
    expr: Union[List[str], Node],
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> Union[List[str], Node]:
    """
    Simplify an expression algebraically.

    Two successive steps are applied:

    1. **SymPy simplification** — expands and reduces the expression algebraically
       (e.g. ``X_0 * X_1 / X_0`` → ``X_1``).
    2. **Constant folding** — collapses any sub-expression containing no variables
       into a single free constant ``C`` (e.g. ``C * C + C`` → ``C``).

    Examples:
        >>> expr = ["C", "+", "C", "*", "C", "+", "X_0", "*", "X_1", "/", "X_0"]
        >>> print("".join(simplify(expr)))
        C+X_1

    Args:
        expr: Expression as a token list in infix notation or a [Node][SRToolkit.utils.expression_tree.Node] tree.
        symbol_library: Symbol library defining variables and constants.
            Defaults to [SymbolLibrary.default_symbols][SRToolkit.utils.symbol_library.SymbolLibrary.default_symbols].

    Returns:
        The simplified expression in the same form as the input (list if a list was given, [Node][SRToolkit.utils.expression_tree.Node] if a tree was given).

    Raises:
        Exception: If simplification fails or the result contains tokens absent from
            ``symbol_library``.
    """
    is_tree = False
    if isinstance(expr, Node):
        expr = expr.to_list(symbol_library=symbol_library, notation="infix")
        is_tree = True

    variables = symbol_library.get_symbols_of_type("var")

    # We expect only one symbol for constants
    if len(symbol_library.get_symbols_of_type("const")) > 0:
        constant = symbol_library.get_symbols_of_type("const")[0]
    else:
        # In this case constants shouldn't be problematic as they are not in the SymbolLibrary
        # Just in case and to not change other functions, I changed it to __C__.
        constant = "__C__"

    expr = _simplify_expression("".join(expr), constant, variables)
    expr = sympify(_denumerate_constants(str(expr), constant), evaluate=False)
    expr = _sympy_to_sr(expr)
    if not _check_tree(expr, symbol_library):
        raise Exception(
            "Simplified expression contains invalid symbols. Possibly skip its simplification or add symbols to the SymbolLibrary."
        )

    if is_tree:
        return expr
    else:
        return expr.to_list(symbol_library=symbol_library, notation="infix")

expr_to_latex

expr_to_latex(
    expr: Union[Node, List[str]], symbol_library: SymbolLibrary
) -> str

Convert an expression to a LaTeX string.

Examples:

>>> expr_to_latex(["(", "X_0", "+", "X_1", ")"], SymbolLibrary.default_symbols())
'$X_{0} + X_{1}$'
>>> expr = Node("+", Node("X_0"), Node("1"))
>>> expr_to_latex(expr, SymbolLibrary.default_symbols())
'$1 + X_{0}$'

Parameters:

Name Type Description Default
expr Union[Node, List[str]]

Expression as a token list or a Node tree.

required
symbol_library SymbolLibrary

Symbol library providing LaTeX templates.

required

Returns:

Type Description
str

A LaTeX string of the form $...$, or an empty string if conversion fails.

Source code in SRToolkit/utils/expression_tree.py
def expr_to_latex(expr: Union[Node, List[str]], symbol_library: SymbolLibrary) -> str:
    """
    Convert an expression to a LaTeX string.

    Examples:
        >>> expr_to_latex(["(", "X_0", "+", "X_1", ")"], SymbolLibrary.default_symbols())
        '$X_{0} + X_{1}$'
        >>> expr = Node("+", Node("X_0"), Node("1"))
        >>> expr_to_latex(expr, SymbolLibrary.default_symbols())
        '$1 + X_{0}$'

    Args:
        expr: Expression as a token list or a [Node][SRToolkit.utils.expression_tree.Node] tree.
        symbol_library: Symbol library providing LaTeX templates.

    Returns:
        A LaTeX string of the form ``$...$``, or an empty string if conversion fails.
    """
    try:
        if isinstance(expr, Node):
            return expr.to_latex(symbol_library)
        elif isinstance(expr, list):
            return tokens_to_tree(expr, symbol_library).to_latex(symbol_library)
        else:
            raise Exception(
                f"Invalid type for expression {str(expr)}. Should be SRToolkit.utils.Node or a list of tokens."
            )
    except Exception as e:
        print(f"Error while converting expression {str(expr)} to LaTeX: {str(e)}")
        return ""

is_float

is_float(element: Any) -> bool

Return True if element can be interpreted as a floating-point number.

Examples:

>>> is_float(1.0)
True
>>> is_float("1.0")
True
>>> is_float("1")
True
>>> is_float(None)
False

Parameters:

Name Type Description Default
element Any

Value to test.

required

Returns:

Type Description
bool

True if float(element) succeeds, False otherwise (including None).

Source code in SRToolkit/utils/expression_tree.py
def is_float(element: Any) -> bool:
    """
    Return ``True`` if ``element`` can be interpreted as a floating-point number.

    Examples:
        >>> is_float(1.0)
        True
        >>> is_float("1.0")
        True
        >>> is_float("1")
        True
        >>> is_float(None)
        False

    Args:
        element: Value to test.

    Returns:
        ``True`` if ``float(element)`` succeeds, ``False`` otherwise (including ``None``).
    """
    if element is None:
        return False
    try:
        float(element)
        return True
    except ValueError:
        return False

tokens_to_tree

tokens_to_tree(tokens: List[str], sl: SymbolLibrary) -> Node

Parse a token list into an expression tree using the shunting-yard algorithm.

Examples:

>>> tree = tokens_to_tree(["(", "X_0", "+", "X_1", ")"], SymbolLibrary.default_symbols())
>>> len(tree)
3

Parameters:

Name Type Description Default
tokens List[str]

Token list in infix notation.

required
sl SymbolLibrary

Symbol library used to resolve token types and precedences.

required

Returns:

Type Description
Node

Root Node of the parsed expression tree.

Raises:

Type Description
Exception

If a token is absent from sl, or if the expression is syntactically invalid.

Source code in SRToolkit/utils/expression_tree.py
def tokens_to_tree(tokens: List[str], sl: SymbolLibrary) -> Node:
    """
    Parse a token list into an expression tree using the shunting-yard algorithm.

    Examples:
        >>> tree = tokens_to_tree(["(", "X_0", "+", "X_1", ")"], SymbolLibrary.default_symbols())
        >>> len(tree)
        3

    Args:
        tokens: Token list in infix notation.
        sl: Symbol library used to resolve token types and precedences.

    Returns:
        Root [Node][SRToolkit.utils.expression_tree.Node] of the parsed expression tree.

    Raises:
        Exception: If a token is absent from ``sl``, or if the expression is
            syntactically invalid.
    """
    num_tokens = len([t for t in tokens if t != "(" and t != ")"])
    expr_str = "".join(tokens)
    tokens = ["("] + tokens + [")"]
    operator_stack = []
    out_stack = []
    for token in tokens:
        if token == "(":
            operator_stack.append(token)
        elif sl.get_type(token) in ["var", "const", "lit"] or is_float(token):
            out_stack.append(Node(token))
        elif sl.get_type(token) == "fn":
            if token[0] == "^":
                out_stack.append(Node(token, left=out_stack.pop()))
            else:
                operator_stack.append(token)
        elif sl.get_type(token) == "op":
            while (
                len(operator_stack) > 0
                and operator_stack[-1] != "("
                and sl.get_precedence(operator_stack[-1]) >= sl.get_precedence(token)
            ):
                if sl.get_type(operator_stack[-1]) == "fn":
                    out_stack.append(Node(operator_stack.pop(), left=out_stack.pop()))
                else:
                    out_stack.append(Node(operator_stack.pop(), out_stack.pop(), out_stack.pop()))
            operator_stack.append(token)
        else:
            if token != ")":
                raise Exception(
                    f'Invalid symbol "{token}" in expression {expr_str}. Did you add token "{token}" to the symbol library?'
                )

            while len(operator_stack) > 0 and operator_stack[-1] != "(":
                if sl.get_type(operator_stack[-1]) == "fn":
                    out_stack.append(Node(operator_stack.pop(), left=out_stack.pop()))
                else:
                    out_stack.append(Node(operator_stack.pop(), out_stack.pop(), out_stack.pop()))
            operator_stack.pop()
            if len(operator_stack) > 0 and sl.get_type(operator_stack[-1]) == "fn":
                out_stack.append(Node(operator_stack.pop(), left=out_stack.pop()))
    if len(out_stack[-1]) == num_tokens:
        return out_stack[-1]
    else:
        raise Exception(f"Error while parsing expression {expr_str}.")

bed

bed(
    expr1: Union[Node, List[str], ndarray],
    expr2: Union[Node, List[str], ndarray],
    X: Optional[ndarray] = None,
    num_consts_sampled: int = 32,
    num_points_sampled: int = 64,
    domain_bounds: Optional[List[Tuple[float, float]]] = None,
    consts_bounds: Tuple[float, float] = (-5, 5),
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
    seed: Optional[int] = None,
) -> float

Compute the Behavior-aware Expression Distance (BED) between two expressions.

BED measures how similarly two expressions behave over a domain by comparing their output distributions point-by-point using the Wasserstein distance. Free constants are marginalised by sampling multiple constant vectors via Latin Hypercube Sampling.

Either X or domain_bounds must be provided when expressions are given as token lists or Node trees. Pre-computed behavior matrices can be passed directly to avoid redundant evaluation.

Examples:

>>> X = np.random.rand(10, 2) - 0.5
>>> expr1 = ["X_0", "+", "C"] # instances of SRToolkit.utils.expression_tree.Node work as well
>>> expr2 = ["X_1", "+", "C"]
>>> bed(expr1, expr2, X) < 1
True
>>> # Changing the number of sampled constants
>>> bed(expr1, expr2, X, num_consts_sampled=128, consts_bounds=(-2, 2)) < 1
True
>>> # Sampling X instead of giving it directly by defining a domain
>>> bed(expr1, expr2, domain_bounds=[(0, 1), (0, 1)]) < 1
True
>>> bed(expr1, expr2, domain_bounds=[(0, 1), (0, 1)], num_points_sampled=128) < 1
True
>>> # You can use behavior matrices instead of expressions (this has potential computational advantages if same expression is used multiple times)
>>> bm1 = create_behavior_matrix(expr1, X)
>>> bed(bm1, expr2, X) < 1
True
>>> bm2 = create_behavior_matrix(expr2, X)
>>> bed(bm1, bm2) < 1
True

Parameters:

Name Type Description Default
expr1 Union[Node, List[str], ndarray]

First expression as a token list, a Node tree, or a pre-computed behavior matrix of shape (n_samples, num_consts_sampled).

required
expr2 Union[Node, List[str], ndarray]

Second expression in the same format as expr1.

required
X Optional[ndarray]

Evaluation points of shape (n_samples, n_features). Required unless both expressions are behavior matrices or domain_bounds is provided.

None
num_consts_sampled int

Number of constant vectors sampled per expression. Default 32.

32
num_points_sampled int

Number of points sampled from domain_bounds when X is None. Default 64.

64
domain_bounds Optional[List[Tuple[float, float]]]

Per-variable (lower, upper) bounds used to sample X via Latin Hypercube Sampling when X is None.

None
consts_bounds Tuple[float, float]

(lower, upper) bounds for constant sampling. Default (-5, 5).

(-5, 5)
symbol_library SymbolLibrary

Symbol library used to compile expressions. Defaults to SymbolLibrary.default_symbols.

default_symbols()
seed Optional[int]

Random seed for reproducible sampling. Default None.

None

Returns:

Type Description
float

BED between the expressions, given as a float.

Raises:

Type Description
Exception

If X is None and neither domain_bounds is provided nor both expressions are pre-computed behavior matrices.

Source code in SRToolkit/utils/measures.py
def bed(
    expr1: Union[Node, List[str], np.ndarray],
    expr2: Union[Node, List[str], np.ndarray],
    X: Optional[np.ndarray] = None,
    num_consts_sampled: int = 32,
    num_points_sampled: int = 64,
    domain_bounds: Optional[List[Tuple[float, float]]] = None,
    consts_bounds: Tuple[float, float] = (-5, 5),
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
    seed: Optional[int] = None,
) -> float:
    """
    Compute the Behavior-aware Expression Distance (BED) between two expressions.

    BED measures how similarly two expressions behave over a domain by comparing
    their output distributions point-by-point using the Wasserstein distance. Free
    constants are marginalised by sampling multiple constant vectors via Latin
    Hypercube Sampling.

    Either ``X`` or ``domain_bounds`` must be provided when expressions are given as
    token lists or [Node][SRToolkit.utils.expression_tree.Node] trees. Pre-computed behavior matrices can be passed
    directly to avoid redundant evaluation.

    Examples:
        >>> X = np.random.rand(10, 2) - 0.5
        >>> expr1 = ["X_0", "+", "C"] # instances of SRToolkit.utils.expression_tree.Node work as well
        >>> expr2 = ["X_1", "+", "C"]
        >>> bed(expr1, expr2, X) < 1
        True
        >>> # Changing the number of sampled constants
        >>> bed(expr1, expr2, X, num_consts_sampled=128, consts_bounds=(-2, 2)) < 1
        True
        >>> # Sampling X instead of giving it directly by defining a domain
        >>> bed(expr1, expr2, domain_bounds=[(0, 1), (0, 1)]) < 1
        True
        >>> bed(expr1, expr2, domain_bounds=[(0, 1), (0, 1)], num_points_sampled=128) < 1
        True
        >>> # You can use behavior matrices instead of expressions (this has potential computational advantages if same expression is used multiple times)
        >>> bm1 = create_behavior_matrix(expr1, X)
        >>> bed(bm1, expr2, X) < 1
        True
        >>> bm2 = create_behavior_matrix(expr2, X)
        >>> bed(bm1, bm2) < 1
        True

    Args:
        expr1: First expression as a token list, a [Node][SRToolkit.utils.expression_tree.Node] tree, or a pre-computed
            behavior matrix of shape ``(n_samples, num_consts_sampled)``.
        expr2: Second expression in the same format as ``expr1``.
        X: Evaluation points of shape ``(n_samples, n_features)``. Required unless both
            expressions are behavior matrices or ``domain_bounds`` is provided.
        num_consts_sampled: Number of constant vectors sampled per expression. Default ``32``.
        num_points_sampled: Number of points sampled from ``domain_bounds`` when ``X`` is
            ``None``. Default ``64``.
        domain_bounds: Per-variable ``(lower, upper)`` bounds used to sample ``X`` via
            Latin Hypercube Sampling when ``X`` is ``None``.
        consts_bounds: ``(lower, upper)`` bounds for constant sampling. Default ``(-5, 5)``.
        symbol_library: Symbol library used to compile expressions. Defaults to
            [SymbolLibrary.default_symbols][SRToolkit.utils.symbol_library.SymbolLibrary.default_symbols].
        seed: Random seed for reproducible sampling. Default ``None``.

    Returns:
        BED between the expressions, given as a float.

    Raises:
        Exception: If ``X`` is ``None`` and neither ``domain_bounds`` is provided nor
            both expressions are pre-computed behavior matrices.
    """

    if X is None and not isinstance(expr1, np.ndarray) and not isinstance(expr2, np.ndarray):
        if domain_bounds is None:
            raise Exception(
                "If X is not given and both expressions are not given as a behavior matrix, "
                "then domain_bounds parameter must be given"
            )
        for i, (lb, ub) in enumerate(domain_bounds):
            if lb >= ub:
                raise ValueError(f"domain_bounds[{i}] has lower bound ({lb}) >= upper bound ({ub}).")
        interval_length = np.array([ub - lb for (lb, ub) in domain_bounds])
        lower_bound = np.array([lb for (lb, ub) in domain_bounds])
        lho = LatinHypercube(len(domain_bounds), optimization="random-cd", seed=seed)
        X = lho.random(num_points_sampled) * interval_length + lower_bound
    elif X is None and (isinstance(expr1, np.ndarray) != isinstance(expr2, np.ndarray)):
        raise Exception(
            "If X is not given, both expressions must be given as a behavior matrix or as a list of "
            "tokens/SRToolkit.utils.Node objects. Otherwise, behavior matrices are uncomparable."
        )

    if isinstance(expr1, list) or isinstance(expr1, Node):
        if X is None:
            raise ValueError(
                "Either X must be given, domain_bounds must be given, or both expressions must be given as behavior matrices."
            )
        expr1 = create_behavior_matrix(expr1, X, num_consts_sampled, consts_bounds, symbol_library, seed)

    if isinstance(expr2, list) or isinstance(expr2, Node):
        if X is None:
            raise ValueError(
                "Either X must be given, domain_bounds must be given, or both expressions must be given as behavior matrices."
            )
        expr2 = create_behavior_matrix(expr2, X, num_consts_sampled, consts_bounds, symbol_library, seed)

    if expr1.shape[0] != expr2.shape[0]:
        raise ValueError("Behavior matrices must have the same number of rows (points on which behavior is evaluated).")
    if expr1.shape[0] == 0:
        raise ValueError(
            "Behavior matrices must have at least one row. If your expressions are given as behavior "
            "matrices, make sure they are not empty. Otherwise, if X is given, make sure it contains "
            "at least one point. If X is not given, make sure num_points_sampled is greater than 0."
        )
    wds = []
    for i in range(expr1.shape[0]):
        u = expr1[i][np.isfinite(expr1[i])]
        v = expr2[i][np.isfinite(expr2[i])]
        if u.shape[0] > 0 and v.shape[0] > 0:
            wds.append(_custom_wasserstein(u, v))
        elif u.shape[0] == 0 and v.shape[0] == 0:
            wds.append(0)
        else:
            wds.append(np.inf)

    return float(np.mean(wds))

create_behavior_matrix

create_behavior_matrix(
    expr: Union[Node, List[str]],
    X: ndarray,
    num_consts_sampled: int = 32,
    consts_bounds: Tuple[float, float] = (-5, 5),
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
    seed: Optional[int] = None,
) -> np.ndarray

Evaluate an expression over multiple constant samples to produce a behavior matrix.

For expressions with free constants, constants are drawn via Latin Hypercube Sampling within consts_bounds. For constant-free expressions, all columns are identical.

Examples:

>>> X = np.random.rand(10, 2) - 0.5
>>> create_behavior_matrix(["X_0", "+", "C"], X, num_consts_sampled=32).shape
(10, 32)
>>> mean_0_1 = np.mean(create_behavior_matrix(["X_0", "+", "C"], X, num_consts_sampled=32, consts_bounds=(0, 1)))
>>> mean_1_5 = np.mean(create_behavior_matrix(["X_0", "+", "C"], X, num_consts_sampled=32, consts_bounds=(1, 5)))
>>> print(bool(mean_0_1 < mean_1_5))
True
>>> # Deterministic expressions always produce the same behavior matrix
>>> bm1 = create_behavior_matrix(["X_0", "+", "X_1"], X)
>>> bm2 = create_behavior_matrix(["X_0", "+", "X_1"], X)
>>> print(bool(np.array_equal(bm1, bm2)))
True

Parameters:

Name Type Description Default
expr Union[Node, List[str]]

Expression as a token list or a Node tree.

required
X ndarray

Input data of shape (n_samples, n_features) at which the expression is evaluated.

required
num_consts_sampled int

Number of constant vectors to sample; sets the number of output columns. Default 32.

32
consts_bounds Tuple[float, float]

(lower, upper) bounds for constant sampling. Default (-5, 5).

(-5, 5)
symbol_library SymbolLibrary

Symbol library used to compile the expression. Defaults to SymbolLibrary.default_symbols.

default_symbols()
seed Optional[int]

Random seed for reproducible constant sampling. Default None.

None

Returns:

Type Description
ndarray

Behavior matrix of shape (n_samples, num_consts_sampled).

Raises:

Type Description
Exception

If expr is neither a token list nor a Node.

Source code in SRToolkit/utils/measures.py
def create_behavior_matrix(
    expr: Union[Node, List[str]],
    X: np.ndarray,
    num_consts_sampled: int = 32,
    consts_bounds: Tuple[float, float] = (-5, 5),
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
    seed: Optional[int] = None,
) -> np.ndarray:
    """
    Evaluate an expression over multiple constant samples to produce a behavior matrix.

    For expressions with free constants, constants are drawn via Latin Hypercube Sampling
    within ``consts_bounds``. For constant-free expressions, all columns are identical.

    Examples:
        >>> X = np.random.rand(10, 2) - 0.5
        >>> create_behavior_matrix(["X_0", "+", "C"], X, num_consts_sampled=32).shape
        (10, 32)
        >>> mean_0_1 = np.mean(create_behavior_matrix(["X_0", "+", "C"], X, num_consts_sampled=32, consts_bounds=(0, 1)))
        >>> mean_1_5 = np.mean(create_behavior_matrix(["X_0", "+", "C"], X, num_consts_sampled=32, consts_bounds=(1, 5)))
        >>> print(bool(mean_0_1 < mean_1_5))
        True
        >>> # Deterministic expressions always produce the same behavior matrix
        >>> bm1 = create_behavior_matrix(["X_0", "+", "X_1"], X)
        >>> bm2 = create_behavior_matrix(["X_0", "+", "X_1"], X)
        >>> print(bool(np.array_equal(bm1, bm2)))
        True

    Args:
        expr: Expression as a token list or a [Node][SRToolkit.utils.expression_tree.Node] tree.
        X: Input data of shape ``(n_samples, n_features)`` at which the expression is
            evaluated.
        num_consts_sampled: Number of constant vectors to sample; sets the number of
            output columns. Default ``32``.
        consts_bounds: ``(lower, upper)`` bounds for constant sampling. Default ``(-5, 5)``.
        symbol_library: Symbol library used to compile the expression. Defaults to
            [SymbolLibrary.default_symbols][SRToolkit.utils.symbol_library.SymbolLibrary.default_symbols].
        seed: Random seed for reproducible constant sampling. Default ``None``.

    Returns:
        Behavior matrix of shape ``(n_samples, num_consts_sampled)``.

    Raises:
        Exception: If ``expr`` is neither a token list nor a [Node][SRToolkit.utils.expression_tree.Node].
    """
    if symbol_library is None:
        symbol_library = SymbolLibrary.default_symbols()
    const_symbols = symbol_library.get_symbols_of_type("const")

    if isinstance(expr, list):
        tokens = expr
    elif isinstance(expr, Node):
        tokens = expr.to_list(notation="postfix")
    else:
        raise Exception("Expression should be a list of strings (tokens) or a Node")

    num_constants = sum(tokens.count(c) for c in const_symbols)

    callable_expr = expr_to_executable_function(expr, symbol_library)

    with np.errstate(divide="ignore", invalid="ignore", over="ignore", under="ignore"):
        if num_constants > 0:
            lho = LatinHypercube(num_constants, seed=seed)
            constants = lho.random(num_consts_sampled) * (consts_bounds[1] - consts_bounds[0]) + consts_bounds[0]
            ys = []
            for c in constants:
                ys.append(callable_expr(X, c))
            return np.array(ys).T
        else:
            return np.repeat(callable_expr(X, None)[:, None], num_consts_sampled, axis=1)

edit_distance

edit_distance(
    expr1: Union[List[str], Node],
    expr2: Union[List[str], Node],
    notation: str = "postfix",
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> int

Compute the edit distance between two expressions.

Both expressions are normalised to the requested notation before computing Levenshtein distance, making the result independent of input serialisation.

Examples:

>>> edit_distance(["X_0", "+", "1"], ["X_0", "+", "1"])
0
>>> edit_distance(["X_0", "+", "1"], ["X_0", "-", "1"])
1
>>> edit_distance(tokens_to_tree(["X_0", "+", "1"], SymbolLibrary.default_symbols(1)), tokens_to_tree(["X_0", "-", "1"], SymbolLibrary.default_symbols(1)))
1

Parameters:

Name Type Description Default
expr1 Union[List[str], Node]

First expression as a token list or a Node tree.

required
expr2 Union[List[str], Node]

Second expression as a token list or a Node tree.

required
notation str

Notation used for comparison: "infix", "prefix", or "postfix". Defaults to "postfix" to avoid parenthesis artefacts.

'postfix'
symbol_library SymbolLibrary

Symbol library used when converting expressions to the target notation. Defaults to SymbolLibrary.default_symbols.

default_symbols()

Returns:

Type Description
int

Integer edit distance between the two serialised expressions.

Source code in SRToolkit/utils/measures.py
def edit_distance(
    expr1: Union[List[str], Node],
    expr2: Union[List[str], Node],
    notation: str = "postfix",
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> int:
    """
    Compute the edit distance between two expressions.

    Both expressions are normalised to the requested notation before computing
    Levenshtein distance, making the result independent of input serialisation.

    Examples:
        >>> edit_distance(["X_0", "+", "1"], ["X_0", "+", "1"])
        0
        >>> edit_distance(["X_0", "+", "1"], ["X_0", "-", "1"])
        1
        >>> edit_distance(tokens_to_tree(["X_0", "+", "1"], SymbolLibrary.default_symbols(1)), tokens_to_tree(["X_0", "-", "1"], SymbolLibrary.default_symbols(1)))
        1

    Args:
        expr1: First expression as a token list or a [Node][SRToolkit.utils.expression_tree.Node] tree.
        expr2: Second expression as a token list or a [Node][SRToolkit.utils.expression_tree.Node] tree.
        notation: Notation used for comparison: ``"infix"``, ``"prefix"``, or
            ``"postfix"``. Defaults to ``"postfix"`` to avoid parenthesis artefacts.
        symbol_library: Symbol library used when converting expressions to the target
            notation. Defaults to [SymbolLibrary.default_symbols][SRToolkit.utils.symbol_library.SymbolLibrary.default_symbols].

    Returns:
        Integer edit distance between the two serialised expressions.
    """
    if isinstance(expr1, Node):
        expr1 = expr1.to_list(symbol_library=symbol_library, notation=notation)
    elif isinstance(expr1, list):
        expr1 = tokens_to_tree(expr1, symbol_library).to_list(symbol_library=symbol_library, notation=notation)

    if isinstance(expr2, Node):
        expr2 = expr2.to_list(symbol_library=symbol_library, notation=notation)
    elif isinstance(expr2, list):
        expr2 = tokens_to_tree(expr2, symbol_library).to_list(symbol_library=symbol_library, notation=notation)

    return editdistance.eval(expr1, expr2)

tree_edit_distance

tree_edit_distance(
    expr1: Union[Node, List[str]],
    expr2: Union[Node, List[str]],
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> int

Compute the Zhang-Shasha tree edit distance between two expressions.

Examples:

>>> tree_edit_distance(["X_0", "+", "1"], ["X_0", "+", "1"])
0
>>> tree_edit_distance(["X_0", "+", "1"], ["X_0", "-", "1"])
1
>>> tree_edit_distance(tokens_to_tree(["X_0", "+", "1"], SymbolLibrary.default_symbols(1)), tokens_to_tree(["X_0", "-", "1"], SymbolLibrary.default_symbols(1)))
1

Parameters:

Name Type Description Default
expr1 Union[Node, List[str]]

First expression as a token list or a Node tree.

required
expr2 Union[Node, List[str]]

Second expression as a token list or a Node tree.

required
symbol_library SymbolLibrary

Symbol library used when converting token lists to trees. Defaults to SymbolLibrary.default_symbols.

default_symbols()

Returns:

Type Description
int

Integer tree edit distance.

Source code in SRToolkit/utils/measures.py
def tree_edit_distance(
    expr1: Union[Node, List[str]],
    expr2: Union[Node, List[str]],
    symbol_library: SymbolLibrary = SymbolLibrary.default_symbols(),
) -> int:
    """
    Compute the Zhang-Shasha tree edit distance between two expressions.

    Examples:
        >>> tree_edit_distance(["X_0", "+", "1"], ["X_0", "+", "1"])
        0
        >>> tree_edit_distance(["X_0", "+", "1"], ["X_0", "-", "1"])
        1
        >>> tree_edit_distance(tokens_to_tree(["X_0", "+", "1"], SymbolLibrary.default_symbols(1)), tokens_to_tree(["X_0", "-", "1"], SymbolLibrary.default_symbols(1)))
        1

    Args:
        expr1: First expression as a token list or a [Node][SRToolkit.utils.expression_tree.Node] tree.
        expr2: Second expression as a token list or a [Node][SRToolkit.utils.expression_tree.Node] tree.
        symbol_library: Symbol library used when converting token lists to trees.
            Defaults to [SymbolLibrary.default_symbols][SRToolkit.utils.symbol_library.SymbolLibrary.default_symbols].

    Returns:
        Integer tree edit distance.
    """
    if isinstance(expr1, Node):
        expr1 = _expr_to_zss(expr1)
    elif isinstance(expr1, list):
        expr1 = _expr_to_zss(tokens_to_tree(expr1, symbol_library))

    if isinstance(expr2, Node):
        expr2 = _expr_to_zss(expr2)
    elif isinstance(expr2, list):
        expr2 = _expr_to_zss(tokens_to_tree(expr2, symbol_library))

    return int(zss.simple_distance(expr1, expr2))