base

`AsyncLM`

Bases: ABC

Abstract base class for asynchronous language models.

This class provides an interface for language models that can generate token probabilities asynchronously. It handles tokenization and vocabulary management.

Parameters:

Name	Type	Description	Default
`tokenizer`		A Hugging Face tokenizer instance compatible with the language model	required

Source code in genlm_backend/llm/base.py

class AsyncLM(ABC):
    """Abstract base class for asynchronous language models.

    This class provides an interface for language models that can generate token probabilities
    asynchronously. It handles tokenization and vocabulary management.

    Args:
        tokenizer: A Hugging Face tokenizer instance compatible with the language model
    """

    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
        self.byte_vocab, self.str_vocab = decode_vocab(self.tokenizer)

    @abstractmethod
    async def next_token_logprobs(self, token_ids):
        """Request log probabilities of next token asynchronously.

        Args:
            token_ids (list[int]): A list of token IDs representing the prompt.

        Returns:
            (torch.Tensor): Normalized log probability tensor.
        """
        pass

    @abstractmethod
    def next_token_logprobs_sync(self, token_ids):
        """Request log probabilities of next token synchronously.

        Args:
            token_ids (list[int]): A list of token IDs representing the prompt.

        Returns:
            (torch.Tensor): Normalized log probability tensor.
        """
        pass

    async def batch_next_token_logprobs(self, token_ids_list):
        """Batch request log probabilities for multiple token sequences asynchronously.

        Args:
            token_ids_list (list[list[int]]): A list of token ID lists.

        Returns:
            (torch.Tensor): A tensor of log probability tensors.
        """
        logprobs = await asyncio.gather(
            *[self.next_token_logprobs(token_ids) for token_ids in token_ids_list]
        )

        return torch.stack(logprobs)

    def batch_next_token_logprobs_sync(self, token_ids_list):
        """Batch request log probabilities for multiple token sequences synchronously.

        Args:
            token_ids_list (list[list[int]]): A list of token ID lists.

        Returns:
            (torch.Tensor): A tensor of log probability tensors.
        """
        return torch.stack(
            [self.next_token_logprobs_sync(token_ids) for token_ids in token_ids_list]
        )

    def clear_cache(self):
        """Clear any caches used by the language model. No-op in base class."""
        pass

`batch_next_token_logprobs(token_ids_list)` `async`

Batch request log probabilities for multiple token sequences asynchronously.

Parameters:

Name	Type	Description	Default
`token_ids_list`	`list[list[int]]`	A list of token ID lists.	required

Returns:

Type	Description
`Tensor`	A tensor of log probability tensors.

Source code in genlm_backend/llm/base.py

async def batch_next_token_logprobs(self, token_ids_list):
    """Batch request log probabilities for multiple token sequences asynchronously.

    Args:
        token_ids_list (list[list[int]]): A list of token ID lists.

    Returns:
        (torch.Tensor): A tensor of log probability tensors.
    """
    logprobs = await asyncio.gather(
        *[self.next_token_logprobs(token_ids) for token_ids in token_ids_list]
    )

    return torch.stack(logprobs)

`batch_next_token_logprobs_sync(token_ids_list)`

Batch request log probabilities for multiple token sequences synchronously.

Parameters:

Name	Type	Description	Default
`token_ids_list`	`list[list[int]]`	A list of token ID lists.	required

Returns:

Type	Description
`Tensor`	A tensor of log probability tensors.

Source code in genlm_backend/llm/base.py

def batch_next_token_logprobs_sync(self, token_ids_list):
    """Batch request log probabilities for multiple token sequences synchronously.

    Args:
        token_ids_list (list[list[int]]): A list of token ID lists.

    Returns:
        (torch.Tensor): A tensor of log probability tensors.
    """
    return torch.stack(
        [self.next_token_logprobs_sync(token_ids) for token_ids in token_ids_list]
    )

`clear_cache()`

Clear any caches used by the language model. No-op in base class.

Source code in genlm_backend/llm/base.py

def clear_cache(self):
    """Clear any caches used by the language model. No-op in base class."""
    pass

`next_token_logprobs(token_ids)` `abstractmethod` `async`

Request log probabilities of next token asynchronously.

Parameters:

Name	Type	Description	Default
`token_ids`	`list[int]`	A list of token IDs representing the prompt.	required

Returns:

Type	Description
`Tensor`	Normalized log probability tensor.

Source code in genlm_backend/llm/base.py

@abstractmethod
async def next_token_logprobs(self, token_ids):
    """Request log probabilities of next token asynchronously.

    Args:
        token_ids (list[int]): A list of token IDs representing the prompt.

    Returns:
        (torch.Tensor): Normalized log probability tensor.
    """
    pass

`next_token_logprobs_sync(token_ids)` `abstractmethod`

Request log probabilities of next token synchronously.

Parameters:

Name	Type	Description	Default
`token_ids`	`list[int]`	A list of token IDs representing the prompt.	required

Returns:

Type	Description
`Tensor`	Normalized log probability tensor.

Source code in genlm_backend/llm/base.py

@abstractmethod
def next_token_logprobs_sync(self, token_ids):
    """Request log probabilities of next token synchronously.

    Args:
        token_ids (list[int]): A list of token IDs representing the prompt.

    Returns:
        (torch.Tensor): Normalized log probability tensor.
    """
    pass

`MockAsyncLM`

Bases: AsyncLM

Mock implementation of AsyncLM used for testing.

Source code in genlm_backend/llm/base.py

class MockAsyncLM(AsyncLM):
    """Mock implementation of AsyncLM used for testing."""

    def __init__(self, tokenizer):
        """Initialize a `MockAsyncLM` instance.

        Args:
            tokenizer: Hugging Face tokenizer instance
        """
        super().__init__(tokenizer)
        self._rng = np.random.RandomState(42)

    @classmethod
    def from_name(cls, model_name, **kwargs):
        """Create a MockAsyncLM instance over the vocabulary of the model's tokenizer.

        Args:
            model_name (str): Name of pretrained model to load tokenizer from
            **kwargs: Additional arguments passed to `MockAsyncLM` constructor

        Returns:
            (MockAsyncLM): `MockAsyncLM` instance initialized with tokenizer from `model_name`
        """
        from transformers import AutoTokenizer

        return cls(AutoTokenizer.from_pretrained(model_name), **kwargs)

    async def next_token_logprobs(self, token_ids):
        """Get next token log probabilities asynchronously.

        Args:
            token_ids (list[int]): Input token IDs.

        Returns:
            (torch.Tensor): Normalized log probability tensor.
        """
        return self._get_logprobs(token_ids)

    def next_token_logprobs_sync(self, token_ids):
        """Get next token log probabilities synchronously.

        Args:
            token_ids (list[int]): Input token IDs.

        Returns:
            (torch.Tensor): Normalized log probability tensor.
        """
        return self._get_logprobs(token_ids)

    def _get_logprobs(self, token_ids):
        """Generate random but deterministic log probabilities for given tokens.

        Uses token_ids to seed the random generator, ensuring same inputs produce same outputs.

        Args:
            token_ids (list[int]): Input token IDs.

        Returns:
            (torch.Tensor): Normalized log probability tensor.
        """
        seed = sum([(i + 1) * t for i, t in enumerate(token_ids)])
        self._rng.seed(seed)
        logits = torch.from_numpy(
            self._rng.rand(len(self.tokenizer)).astype(np.float32)
        )
        return torch.log_softmax(logits, dim=-1)

`init(tokenizer)`

Initialize a MockAsyncLM instance.

Parameters:

Name	Type	Description	Default
`tokenizer`		Hugging Face tokenizer instance	required

Source code in genlm_backend/llm/base.py

def __init__(self, tokenizer):
    """Initialize a `MockAsyncLM` instance.

    Args:
        tokenizer: Hugging Face tokenizer instance
    """
    super().__init__(tokenizer)
    self._rng = np.random.RandomState(42)

`from_name(model_name, **kwargs)` `classmethod`

Create a MockAsyncLM instance over the vocabulary of the model's tokenizer.

Parameters:

Name	Type	Description	Default
`model_name`	`str`	Name of pretrained model to load tokenizer from	required
`**kwargs`		Additional arguments passed to `MockAsyncLM` constructor	`{}`

Returns:

Type	Description
`MockAsyncLM`	`MockAsyncLM` instance initialized with tokenizer from `model_name`

Source code in genlm_backend/llm/base.py

@classmethod
def from_name(cls, model_name, **kwargs):
    """Create a MockAsyncLM instance over the vocabulary of the model's tokenizer.

    Args:
        model_name (str): Name of pretrained model to load tokenizer from
        **kwargs: Additional arguments passed to `MockAsyncLM` constructor

    Returns:
        (MockAsyncLM): `MockAsyncLM` instance initialized with tokenizer from `model_name`
    """
    from transformers import AutoTokenizer

    return cls(AutoTokenizer.from_pretrained(model_name), **kwargs)

`next_token_logprobs(token_ids)` `async`

Get next token log probabilities asynchronously.

Parameters:

Name	Type	Description	Default
`token_ids`	`list[int]`	Input token IDs.	required

Returns:

Type	Description
`Tensor`	Normalized log probability tensor.

Source code in genlm_backend/llm/base.py

async def next_token_logprobs(self, token_ids):
    """Get next token log probabilities asynchronously.

    Args:
        token_ids (list[int]): Input token IDs.

    Returns:
        (torch.Tensor): Normalized log probability tensor.
    """
    return self._get_logprobs(token_ids)

`next_token_logprobs_sync(token_ids)`

Get next token log probabilities synchronously.

Parameters:

Name	Type	Description	Default
`token_ids`	`list[int]`	Input token IDs.	required

Returns:

Type	Description
`Tensor`	Normalized log probability tensor.

Source code in genlm_backend/llm/base.py

def next_token_logprobs_sync(self, token_ids):
    """Get next token log probabilities synchronously.

    Args:
        token_ids (list[int]): Input token IDs.

    Returns:
        (torch.Tensor): Normalized log probability tensor.
    """
    return self._get_logprobs(token_ids)

base

AsyncLM

batch_next_token_logprobs(token_ids_list) async

batch_next_token_logprobs_sync(token_ids_list)

clear_cache()

next_token_logprobs(token_ids) abstractmethod async

next_token_logprobs_sync(token_ids) abstractmethod

MockAsyncLM

__init__(tokenizer)

from_name(model_name, **kwargs) classmethod

next_token_logprobs(token_ids) async

next_token_logprobs_sync(token_ids)

`AsyncLM`

`batch_next_token_logprobs(token_ids_list)` `async`

`batch_next_token_logprobs_sync(token_ids_list)`

`clear_cache()`

`next_token_logprobs(token_ids)` `abstractmethod` `async`

`next_token_logprobs_sync(token_ids)` `abstractmethod`

`MockAsyncLM`

`init(tokenizer)`

`from_name(model_name, **kwargs)` `classmethod`

`next_token_logprobs(token_ids)` `async`

`next_token_logprobs_sync(token_ids)`