💂Serving

Batch Text Generator

In Fiat Copilot, we provide a text generator abstraction for handling batch prediction of text -

from ray import serve

class BatchTextGenerator:
    batch_size: int = 4

    def __init__(
            self,
            model,         # Your model
            batch_handler, # This is what you need to define
            batch_size: int = 4
    ):
        self.model = model
        self.batch_handler = batch_handler
        self.batch_size = batch_size

    @serve.batch(max_batch_size=batch_size)
    async def handle_batch(self, inputs: List[str]) -> List[str]:
        logger.info(f"Our input array has length: {len(inputs)}")

        outputs = self.batch_handler(self.model, inputs)

        return outputs

    async def __call__(self, requests: List[Request]) -> List[str]:
        ret = await self.handle_batch(
            inputs=[request.query_params["text"] for request in requests]
        )

        return ret

But you don't need to write your own class to extend from the BatchTextGenerator class, you can use our helper function -

@contextmanager
def adhoc_serving_setup(
        serving_model,
        handle_func: Callable[..., Any],
        serving_type: ServiceType,
        app_description: AppDescription,
        ray_cluster: str | None = None,
) -> RayServeHandle | None

With the adhoc_serving_setup, you can efficiently test your model on an existing Ray cluster. You also need to call the get_result_with_server_handle to fetch your serving results -

def get_serving_result_with_handle(
        handle: RayServeHandle,
        input_batch: list
) -> list[Any]

Here is a complete example -

Batch Text Generator Example

from typing import List

from rich import print
from transformers import pipeline

from fiat_copilot.serving.context import adhoc_serving_setup, get_serving_result_with_handle
from fiat_copilot.serving.domain import AppDescription, ServiceType
from fiat_copilot.utils.config import get_application_conf


def model_inference(model, inputs: List[str]):
    model_results = model(inputs)

    return [result[0]["generated_text"] for result in model_results]


if __name__ == '__main__':
    # Load your model
    model = pipeline("text-generation", "gpt2")
    # Get ray config
    conf: dict = get_application_conf()
    serving_conf: dict = conf['ray']['serving']

    with adhoc_serving_setup(
            ray_cluster=None,
            serving_model=model,
            handle_func=model_inference,
            serving_type=ServiceType.Text,
            app_description=AppDescription(
                host="127.0.0.1",
                port=0,
                name="test",
                route_prefix="/fiat/serve",
                resource_config=serving_conf
            ),
    ) as serve_handle:
        input_batch = [
            'Once upon a time,',
            'Hi my name is Lewis and I like to',
            'My name is Mary, and my favorite',
            'My name is Clara and I am',
            'My name is Julien and I like to',
            'Today I accidentally',
            'My greatest wish is to',
            'In a galaxy far far away',
            'My best talent is',
        ]
        print("Input batch is", input_batch)

        result_batch = get_serving_result_with_handle(
            handle=serve_handle,
            input_batch=input_batch
        )

        print("Result batch is", result_batch)

PreviousTrainer & Predictor NextFiat Dash - 🚧 WIP

Last updated 2 years ago

hashtagBatch Text Generator

Batch Text Generator