πServing
Batch Text Generator
In Fiat Copilot, we provide a text generator abstraction for handling batch prediction of text -
from ray import serve
class BatchTextGenerator:
batch_size: int = 4
def __init__(
self,
model, # Your model
batch_handler, # This is what you need to define
batch_size: int = 4
):
self.model = model
self.batch_handler = batch_handler
self.batch_size = batch_size
@serve.batch(max_batch_size=batch_size)
async def handle_batch(self, inputs: List[str]) -> List[str]:
logger.info(f"Our input array has length: {len(inputs)}")
outputs = self.batch_handler(self.model, inputs)
return outputs
async def __call__(self, requests: List[Request]) -> List[str]:
ret = await self.handle_batch(
inputs=[request.query_params["text"] for request in requests]
)
return retBut you don't need to write your own class to extend from the BatchTextGenerator class, you can use our helper function -
With the adhoc_serving_setup, you can efficiently test your model on an existing Ray cluster. You also need to call the get_result_with_server_handle to fetch your serving results -
Here is a complete example -
Last updated