Expose multiple RealtimeAPIs as a single endpoint for A/B tests, multi-armed bandits, or canary deployments.
import cortex​class PythonPredictor:def __init__(self, config):from transformers import pipelineself.model = pipeline(task="text-generation")​def predict(self, payload):return self.model(payload["text"])[0]​requirements = ["tensorflow", "transformers"]​api_spec_cpu = {"name": "text-generator-cpu","kind": "RealtimeAPI","compute": {"cpu": 1,},}​api_spec_gpu = {"name": "text-generator-gpu","kind": "RealtimeAPI","compute": {"gpu": 1,},}​cx = cortex.client("aws")cx.create_api(api_spec_cpu, predictor=PythonPredictor, requirements=requirements)cx.create_api(api_spec_gpu, predictor=PythonPredictor, requirements=requirements)
traffic_splitter_spec = {"name": "text-generator","kind": "TrafficSplitter","apis": [{"name": "text-generator-cpu", "weight": 50},{"name": "text-generator-gpu", "weight": 50},],}​cx.create_api(traffic_splitter_spec)
traffic_splitter_spec = cx.get_api("text-generator")["spec"]["submitted_api_spec"]​# send 99% of the traffic to text-generator-gputraffic_splitter_spec["apis"][0]["weight"] = 1traffic_splitter_spec["apis"][1]["weight"] = 99​cx.patch(traffic_splitter_spec)