From e30e8e9e49e76c04fec2b740936ed1dc81ddf160 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Sun, 31 May 2026 14:52:44 -0700 Subject: [PATCH] feat: Onboard Vertex Model Garden to GenAI Python SDK: Add list_models and list_deployable_models support PiperOrigin-RevId: 924362030 --- agentplatform/_genai/client.py | 29 + agentplatform/_genai/model_garden.py | 647 ++++++ agentplatform/_genai/types/__init__.py | 200 ++ agentplatform/_genai/types/common.py | 1754 +++++++++++++++-- .../agentplatform/genai/replays/conftest.py | 5 + .../genai/replays/test_model_garden.py | 82 + .../agentplatform/genai/test_model_garden.py | 409 ++++ 7 files changed, 2931 insertions(+), 195 deletions(-) create mode 100644 agentplatform/_genai/model_garden.py create mode 100644 tests/unit/agentplatform/genai/replays/test_model_garden.py create mode 100644 tests/unit/agentplatform/genai/test_model_garden.py diff --git a/agentplatform/_genai/client.py b/agentplatform/_genai/client.py index d04deba6d9..0ed9ddf79e 100644 --- a/agentplatform/_genai/client.py +++ b/agentplatform/_genai/client.py @@ -39,6 +39,9 @@ ) from agentplatform._genai import prompts as prompts_module from agentplatform._genai import skills as skills_module + from agentplatform._genai import ( + model_garden as model_garden_module, + ) from agentplatform._genai import live as live_module from agentplatform._genai import rag as rag_module @@ -85,6 +88,7 @@ def __init__(self, api_client: genai_client.BaseApiClient): # type: ignore[name self._datasets: Optional[ModuleType] = None self._skills: Optional[ModuleType] = None self._rag: Optional[ModuleType] = None + self._model_garden: Optional[ModuleType] = None @property @_common.experimental_warning( @@ -179,6 +183,18 @@ def rag(self) -> "rag_module.AsyncRag": ) return self._rag.AsyncRag(self._api_client) # type: ignore[no-any-return] + @property + @_common.experimental_warning( + "The Model Garden module is experimental, and may change in future " "versions." + ) + def model_garden(self) -> "model_garden_module.AsyncModelGarden": + if self._model_garden is None: + self._model_garden = importlib.import_module( + ".model_garden", + __package__, + ) + return self._model_garden.AsyncModelGarden(self._api_client) # type: ignore[no-any-return] + async def aclose(self) -> None: """Closes the async client explicitly. @@ -284,6 +300,7 @@ def __init__( self._datasets: Optional[ModuleType] = None self._skills: Optional[ModuleType] = None self._rag: Optional[ModuleType] = None + self._model_garden: Optional[ModuleType] = None @property def evals(self) -> "evals_module.Evals": @@ -402,3 +419,15 @@ def rag(self) -> "rag_module.Rag": __package__, ) return self._rag.Rag(self._api_client) # type: ignore[no-any-return] + + @property + @_common.experimental_warning( + "The Model Garden module is experimental, and may change in future " "versions." + ) + def model_garden(self) -> "model_garden_module.ModelGarden": + if self._model_garden is None: + self._model_garden = importlib.import_module( + ".model_garden", + __package__, + ) + return self._model_garden.ModelGarden(self._api_client) # type: ignore[no-any-return] diff --git a/agentplatform/_genai/model_garden.py b/agentplatform/_genai/model_garden.py new file mode 100644 index 0000000000..b9ec5c1cab --- /dev/null +++ b/agentplatform/_genai/model_garden.py @@ -0,0 +1,647 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Code generated by the Google Gen AI SDK generator DO NOT EDIT. + +import json +import logging +from typing import Any, Optional, Union +from urllib.parse import urlencode + +from google.genai import _api_module +from google.genai import _common +from google.genai._common import get_value_by_path as getv +from google.genai._common import set_value_by_path as setv + +from . import types + +logger = logging.getLogger("agentplatform_genai.modelgarden") + + +def _ListPublisherModelsConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + + if getv(from_object, ["page_size"]) is not None: + setv(parent_object, ["_query", "pageSize"], getv(from_object, ["page_size"])) + + if getv(from_object, ["page_token"]) is not None: + setv(parent_object, ["_query", "pageToken"], getv(from_object, ["page_token"])) + + if getv(from_object, ["filter"]) is not None: + setv(parent_object, ["_query", "filter"], getv(from_object, ["filter"])) + + if getv(from_object, ["list_all_versions"]) is not None: + setv( + parent_object, + ["_query", "listAllVersions"], + getv(from_object, ["list_all_versions"]), + ) + + return to_object + + +def _ListPublisherModelsRequestParameters_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ["parent"]) is not None: + setv(to_object, ["_url", "parent"], getv(from_object, ["parent"])) + + if getv(from_object, ["config"]) is not None: + _ListPublisherModelsConfig_to_vertex(getv(from_object, ["config"]), to_object) + + return to_object + + +class ModelGarden(_api_module.BaseModule): + """Model Garden module.""" + + def _list_publisher_models( + self, + *, + parent: Optional[str] = None, + config: Optional[types.ListPublisherModelsConfigOrDict] = None, + ) -> types.ListPublisherModelsResponse: + """ + Lists publisher models (internal). + """ + + parameter_model = types._ListPublisherModelsRequestParameters( + parent=parent, + config=config, + ) + + request_url_dict: Optional[dict[str, str]] + if not self._api_client.vertexai: + raise ValueError( + "This method is only supported in Gemini Enterprise Agent Platform mode, not in Gemini Developer API mode." + ) + else: + request_dict = _ListPublisherModelsRequestParameters_to_vertex( + parameter_model + ) + request_url_dict = request_dict.get("_url") + if request_url_dict: + path = "{parent}/models".format_map(request_url_dict) + else: + path = "{parent}/models" + + query_params = request_dict.get("_query") + if query_params: + path = f"{path}?{urlencode(query_params)}" + # TODO: remove the hack that pops config. + request_dict.pop("config", None) + + http_options: Optional[types.HttpOptions] = None + if ( + parameter_model.config is not None + and parameter_model.config.http_options is not None + ): + http_options = parameter_model.config.http_options + + request_dict = _common.convert_to_dict(request_dict) + request_dict = _common.encode_unserializable_types(request_dict) + + response = self._api_client.request("get", path, request_dict, http_options) + + response_dict = {} if not response.body else json.loads(response.body) + + return_value = types.ListPublisherModelsResponse._from_response( + response=response_dict, + kwargs=( + { + "config": { + "response_schema": getattr( + parameter_model.config, "response_schema", None + ), + "response_json_schema": getattr( + parameter_model.config, "response_json_schema", None + ), + "include_all_fields": getattr( + parameter_model.config, "include_all_fields", None + ), + } + } + if getattr(parameter_model, "config", None) + else {} + ), + ) + + self._api_client._verify_response(return_value) + return return_value + + @staticmethod + def _build_filter_str( + model_filter: Optional[str], + include_hugging_face_models: Optional[bool], + deployable_only: bool, + ) -> str: + """Builds the filter string for the ListPublisherModels API. + + Args: + model_filter: Optional substring to match against model IDs and display + names (case-insensitive). + include_hugging_face_models: Whether to include HuggingFace models. If + True, uses ``is_hf_wildcard(true)``; otherwise ``is_hf_wildcard(false)``. + deployable_only: Whether to restrict to models with verified deployment + configurations via the ``VERIFIED_DEPLOYMENT_SUCCEED`` label. + + Returns: + A filter string suitable for the ``filter`` parameter of the + ListPublisherModels API. + """ + import re + + if include_hugging_face_models: + filter_str = "is_hf_wildcard(true)" + if deployable_only: + filter_str += ( + " AND labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED" + ) + else: + filter_str = "is_hf_wildcard(false)" + + if model_filter: + escaped = re.escape(model_filter) + filter_str = ( + f'{filter_str} AND (model_user_id=~"(?i).*{escaped}.*"' + f' OR display_name=~"(?i).*{escaped}.*")' + ) + + return filter_str + + @staticmethod + def _format_model_name( + model: types.PublisherModel, + include_hugging_face_models: bool, + ) -> str: + """Formats a PublisherModel into a human-readable model name string. + + Args: + model: The PublisherModel to format. + include_hugging_face_models: Whether HuggingFace models are included in + the listing. Controls whether the ``@version`` suffix is appended. + + Returns: + A formatted model name string in one of the following formats: + + - ``'{publisher}/{model}@{version}'`` when + ``include_hugging_face_models`` is False. + - ``'{publisher}/{model}'`` when ``include_hugging_face_models`` is True. + """ + import re + + name = model.name or "" + formatted = re.sub(r"publishers/(hf-|)|models/", "", name) + if include_hugging_face_models: + return formatted + return formatted + "@" + (model.version_id or "") + + @staticmethod + def _has_deploy_config(model: types.PublisherModel) -> bool: + """Checks whether a model has verified deployment configurations. + + Args: + model: The PublisherModel to check. + + Returns: + True if the model has at least one entry in + ``supported_actions.multi_deploy_vertex``. + """ + return bool( + model.supported_actions + and model.supported_actions.multi_deploy_vertex + and model.supported_actions.multi_deploy_vertex.multi_deploy_vertex + ) + + def _list_all_publisher_models( + self, + api_config: types.ListPublisherModelsConfig, + ) -> list[types.PublisherModel]: + """Fetches all pages of publisher models from the API. + + Args: + api_config: The configuration for the ListPublisherModels API call, + including filter and version settings. + + Returns: + A list of all ``PublisherModel`` objects across all pages. + """ + all_models = [] + page_token = None + while True: + if page_token: + api_config = types.ListPublisherModelsConfig( + filter=api_config.filter, + list_all_versions=api_config.list_all_versions, + page_token=page_token, + ) + response = self._list_publisher_models( + parent="publishers/*", + config=api_config, + ) + all_models.extend(response.publisher_models or []) + page_token = response.next_page_token + if not page_token: + break + return all_models + + def list_deployable_models( + self, + config: Optional[types.ListDeployableModelsConfigOrDict] = None, + ) -> list[str]: + """Lists models in Model Garden that support deployment. + + Returns models that have at least one verified deployment configuration. + When ``include_hugging_face_models`` is False, HuggingFace models are + excluded from the results. + + Args: + config: Optional configuration for filtering results. Accepts a + ``ListDeployableModelsConfig`` instance or an equivalent dict. + + Returns: + A list of model name strings in the format + ``'{publisher}/{model}@{version}'`` (e.g. ``'google/gemma2@gemma-2-2b-it'``) + or ``'{publisher}/{model}'`` when ``include_hugging_face_models`` is True + (e.g. ``'meta-llama/Llama-3.3-70B-Instruct'``). + """ + if config is None: + config = types.ListDeployableModelsConfig() + if isinstance(config, dict): + config = types.ListDeployableModelsConfig.model_validate(config) + + filter_str = self._build_filter_str( + config.model_filter, + config.include_hugging_face_models, + deployable_only=True, + ) + + api_config = types.ListPublisherModelsConfig( + filter=filter_str, + list_all_versions=True, + ) + + models = self._list_all_publisher_models(api_config) + + if not config.include_hugging_face_models: + models = [ + m for m in models if m.name and not m.name.startswith("publishers/hf-") + ] + + models = [m for m in models if self._has_deploy_config(m)] + + include_hf = bool(config.include_hugging_face_models) + return [self._format_model_name(m, include_hf) for m in models] + + def list_models( + self, + config: Optional[types.ListModelGardenModelsConfigOrDict] = None, + ) -> list[str]: + """Lists all models available in Model Garden. + + Returns all models regardless of deployment support. When + ``include_hugging_face_models`` is False, HuggingFace models are excluded + from the results. + + Args: + config: Optional configuration for filtering results. Accepts a + ``ListModelGardenModelsConfig`` instance or an equivalent dict. + + Returns: + A list of model name strings in the format + ``'{publisher}/{model}@{version}'`` (e.g. ``'google/gemma2@gemma-2-2b-it'``) + or ``'{publisher}/{model}'`` when ``include_hugging_face_models`` is True + (e.g. ``'meta-llama/Llama-3.3-70B-Instruct'``). + """ + if config is None: + config = types.ListModelGardenModelsConfig() + if isinstance(config, dict): + config = types.ListModelGardenModelsConfig.model_validate(config) + + filter_str = self._build_filter_str( + config.model_filter, + config.include_hugging_face_models, + deployable_only=False, + ) + + api_config = types.ListPublisherModelsConfig( + filter=filter_str, + list_all_versions=True, + ) + + models = self._list_all_publisher_models(api_config) + + if not config.include_hugging_face_models: + models = [ + m for m in models if m.name and not m.name.startswith("publishers/hf-") + ] + + include_hf = bool(config.include_hugging_face_models) + return [self._format_model_name(m, include_hf) for m in models] + + +class AsyncModelGarden(_api_module.BaseModule): + """Model Garden module.""" + + async def _list_publisher_models( + self, + *, + parent: Optional[str] = None, + config: Optional[types.ListPublisherModelsConfigOrDict] = None, + ) -> types.ListPublisherModelsResponse: + """ + Lists publisher models (internal). + """ + + parameter_model = types._ListPublisherModelsRequestParameters( + parent=parent, + config=config, + ) + + request_url_dict: Optional[dict[str, str]] + if not self._api_client.vertexai: + raise ValueError( + "This method is only supported in Gemini Enterprise Agent Platform mode, not in Gemini Developer API mode." + ) + else: + request_dict = _ListPublisherModelsRequestParameters_to_vertex( + parameter_model + ) + request_url_dict = request_dict.get("_url") + if request_url_dict: + path = "{parent}/models".format_map(request_url_dict) + else: + path = "{parent}/models" + + query_params = request_dict.get("_query") + if query_params: + path = f"{path}?{urlencode(query_params)}" + # TODO: remove the hack that pops config. + request_dict.pop("config", None) + + http_options: Optional[types.HttpOptions] = None + if ( + parameter_model.config is not None + and parameter_model.config.http_options is not None + ): + http_options = parameter_model.config.http_options + + request_dict = _common.convert_to_dict(request_dict) + request_dict = _common.encode_unserializable_types(request_dict) + + response = await self._api_client.async_request( + "get", path, request_dict, http_options + ) + + response_dict = {} if not response.body else json.loads(response.body) + + return_value = types.ListPublisherModelsResponse._from_response( + response=response_dict, + kwargs=( + { + "config": { + "response_schema": getattr( + parameter_model.config, "response_schema", None + ), + "response_json_schema": getattr( + parameter_model.config, "response_json_schema", None + ), + "include_all_fields": getattr( + parameter_model.config, "include_all_fields", None + ), + } + } + if getattr(parameter_model, "config", None) + else {} + ), + ) + + self._api_client._verify_response(return_value) + return return_value + + @staticmethod + def _build_filter_str( + model_filter: Optional[str], + include_hugging_face_models: Optional[bool], + deployable_only: bool, + ) -> str: + """Builds the filter string for the ListPublisherModels API. + + Args: + model_filter: Optional substring to match against model IDs and display + names (case-insensitive). + include_hugging_face_models: Whether to include HuggingFace models. If + True, uses ``is_hf_wildcard(true)``; otherwise ``is_hf_wildcard(false)``. + deployable_only: Whether to restrict to models with verified deployment + configurations via the ``VERIFIED_DEPLOYMENT_SUCCEED`` label. + + Returns: + A filter string suitable for the ``filter`` parameter of the + ListPublisherModels API. + """ + import re + + if include_hugging_face_models: + filter_str = "is_hf_wildcard(true)" + if deployable_only: + filter_str += ( + " AND labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED" + ) + else: + filter_str = "is_hf_wildcard(false)" + + if model_filter: + escaped = re.escape(model_filter) + filter_str = ( + f'{filter_str} AND (model_user_id=~"(?i).*{escaped}.*"' + f' OR display_name=~"(?i).*{escaped}.*")' + ) + + return filter_str + + @staticmethod + def _format_model_name( + model: types.PublisherModel, + include_hugging_face_models: bool, + ) -> str: + """Formats a PublisherModel into a human-readable model name string. + + Args: + model: The PublisherModel to format. + include_hugging_face_models: Whether HuggingFace models are included in + the listing. Controls whether the ``@version`` suffix is appended. + + Returns: + A formatted model name string in one of the following formats: + + - ``'{publisher}/{model}@{version}'`` when + ``include_hugging_face_models`` is False. + - ``'{publisher}/{model}'`` when ``include_hugging_face_models`` is True. + """ + import re + + name = model.name or "" + formatted = re.sub(r"publishers/(hf-|)|models/", "", name) + if include_hugging_face_models: + return formatted + return formatted + "@" + (model.version_id or "") + + @staticmethod + def _has_deploy_config(model: types.PublisherModel) -> bool: + """Checks whether a model has verified deployment configurations. + + Args: + model: The PublisherModel to check. + + Returns: + True if the model has at least one entry in + ``supported_actions.multi_deploy_vertex``. + """ + return bool( + model.supported_actions + and model.supported_actions.multi_deploy_vertex + and model.supported_actions.multi_deploy_vertex.multi_deploy_vertex + ) + + async def _list_all_publisher_models( + self, + api_config: types.ListPublisherModelsConfig, + ) -> list[types.PublisherModel]: + """Fetches all pages of publisher models from the API. + + Args: + api_config: The configuration for the ListPublisherModels API call, + including filter and version settings. + + Returns: + A list of all ``PublisherModel`` objects across all pages. + """ + all_models = [] + page_token = None + while True: + if page_token: + api_config = types.ListPublisherModelsConfig( + filter=api_config.filter, + list_all_versions=api_config.list_all_versions, + page_token=page_token, + ) + response = await self._list_publisher_models( + parent="publishers/*", + config=api_config, + ) + all_models.extend(response.publisher_models or []) + page_token = response.next_page_token + if not page_token: + break + return all_models + + async def list_deployable_models( + self, + config: Optional[types.ListDeployableModelsConfigOrDict] = None, + ) -> list[str]: + """Lists models in Model Garden that support deployment. + + Returns models that have at least one verified deployment configuration. + When ``include_hugging_face_models`` is False, HuggingFace models are + excluded from the results. + + Args: + config: Optional configuration for filtering results. Accepts a + ``ListDeployableModelsConfig`` instance or an equivalent dict. + + Returns: + A list of model name strings in the format + ``'{publisher}/{model}@{version}'`` (e.g. ``'google/gemma2@gemma-2-2b-it'``) + or ``'{publisher}/{model}'`` when ``include_hugging_face_models`` is True + (e.g. ``'meta-llama/Llama-3.3-70B-Instruct'``). + """ + if config is None: + config = types.ListDeployableModelsConfig() + if isinstance(config, dict): + config = types.ListDeployableModelsConfig.model_validate(config) + + filter_str = self._build_filter_str( + config.model_filter, + config.include_hugging_face_models, + deployable_only=True, + ) + + api_config = types.ListPublisherModelsConfig( + filter=filter_str, + list_all_versions=True, + ) + + models = await self._list_all_publisher_models(api_config) + + if not config.include_hugging_face_models: + models = [ + m for m in models if m.name and not m.name.startswith("publishers/hf-") + ] + + models = [m for m in models if self._has_deploy_config(m)] + + include_hf = bool(config.include_hugging_face_models) + return [self._format_model_name(m, include_hf) for m in models] + + async def list_models( + self, + config: Optional[types.ListModelGardenModelsConfigOrDict] = None, + ) -> list[str]: + """Lists all models available in Model Garden. + + Returns all models regardless of deployment support. When + ``include_hugging_face_models`` is False, HuggingFace models are excluded + from the results. + + Args: + config: Optional configuration for filtering results. Accepts a + ``ListModelGardenModelsConfig`` instance or an equivalent dict. + + Returns: + A list of model name strings in the format + ``'{publisher}/{model}@{version}'`` (e.g. ``'google/gemma2@gemma-2-2b-it'``) + or ``'{publisher}/{model}'`` when ``include_hugging_face_models`` is True + (e.g. ``'meta-llama/Llama-3.3-70B-Instruct'``). + """ + if config is None: + config = types.ListModelGardenModelsConfig() + if isinstance(config, dict): + config = types.ListModelGardenModelsConfig.model_validate(config) + + filter_str = self._build_filter_str( + config.model_filter, + config.include_hugging_face_models, + deployable_only=False, + ) + + api_config = types.ListPublisherModelsConfig( + filter=filter_str, + list_all_versions=True, + ) + + models = await self._list_all_publisher_models(api_config) + + if not config.include_hugging_face_models: + models = [ + m for m in models if m.name and not m.name.startswith("publishers/hf-") + ] + + include_hf = bool(config.include_hugging_face_models) + return [self._format_model_name(m, include_hf) for m in models] diff --git a/agentplatform/_genai/types/__init__.py b/agentplatform/_genai/types/__init__.py index 999aed44fb..1e44334f8a 100644 --- a/agentplatform/_genai/types/__init__.py +++ b/agentplatform/_genai/types/__init__.py @@ -120,6 +120,7 @@ from .common import _ListDatasetVersionsRequestParameters from .common import _ListEvaluationMetricsParameters from .common import _ListMultimodalDatasetsRequestParameters +from .common import _ListPublisherModelsRequestParameters from .common import _ListRagCorporaRequestParameters from .common import _ListRagFilesRequestParameters from .common import _ListSandboxEnvironmentSnapshotsRequestParameters @@ -237,6 +238,12 @@ from .common import AttackCategoryResult from .common import AttackCategoryResultDict from .common import AttackCategoryResultOrDict +from .common import AutomaticResources +from .common import AutomaticResourcesDict +from .common import AutomaticResourcesOrDict +from .common import AutoscalingMetricSpec +from .common import AutoscalingMetricSpecDict +from .common import AutoscalingMetricSpecOrDict from .common import BatchPredictionResourceUsageAssessmentConfig from .common import BatchPredictionResourceUsageAssessmentConfigDict from .common import BatchPredictionResourceUsageAssessmentConfigOrDict @@ -378,6 +385,12 @@ from .common import DatasetVersion from .common import DatasetVersionDict from .common import DatasetVersionOrDict +from .common import DedicatedResources +from .common import DedicatedResourcesDict +from .common import DedicatedResourcesOrDict +from .common import DedicatedResourcesScaleToZeroSpec +from .common import DedicatedResourcesScaleToZeroSpecDict +from .common import DedicatedResourcesScaleToZeroSpecOrDict from .common import DefaultContainerCategory from .common import DeleteAgentEngineConfig from .common import DeleteAgentEngineConfigDict @@ -590,6 +603,9 @@ from .common import FileStatus from .common import FileStatusDict from .common import FileStatusOrDict +from .common import FlexStart +from .common import FlexStartDict +from .common import FlexStartOrDict from .common import Framework from .common import GcsDestination from .common import GcsDestinationDict @@ -782,6 +798,10 @@ from .common import KeepAliveProbeHttpGetOrDict from .common import KeepAliveProbeOrDict from .common import Language +from .common import LargeModelReference +from .common import LargeModelReferenceDict +from .common import LargeModelReferenceOrDict +from .common import LaunchStage from .common import ListAgentEngineConfig from .common import ListAgentEngineConfigDict from .common import ListAgentEngineConfigOrDict @@ -830,12 +850,18 @@ from .common import ListDatasetVersionsResponse from .common import ListDatasetVersionsResponseDict from .common import ListDatasetVersionsResponseOrDict +from .common import ListDeployableModelsConfig +from .common import ListDeployableModelsConfigDict +from .common import ListDeployableModelsConfigOrDict from .common import ListEvaluationMetricsConfig from .common import ListEvaluationMetricsConfigDict from .common import ListEvaluationMetricsConfigOrDict from .common import ListEvaluationMetricsResponse from .common import ListEvaluationMetricsResponseDict from .common import ListEvaluationMetricsResponseOrDict +from .common import ListModelGardenModelsConfig +from .common import ListModelGardenModelsConfigDict +from .common import ListModelGardenModelsConfigOrDict from .common import ListMultimodalDatasetsConfig from .common import ListMultimodalDatasetsConfigDict from .common import ListMultimodalDatasetsConfigOrDict @@ -845,6 +871,12 @@ from .common import ListPromptsConfig from .common import ListPromptsConfigDict from .common import ListPromptsConfigOrDict +from .common import ListPublisherModelsConfig +from .common import ListPublisherModelsConfigDict +from .common import ListPublisherModelsConfigOrDict +from .common import ListPublisherModelsResponse +from .common import ListPublisherModelsResponseDict +from .common import ListPublisherModelsResponseOrDict from .common import ListRagCorporaConfig from .common import ListRagCorporaConfigDict from .common import ListRagCorporaConfigOrDict @@ -1016,6 +1048,9 @@ from .common import MetricxResult from .common import MetricxResultDict from .common import MetricxResultOrDict +from .common import ModelContainerSpec +from .common import ModelContainerSpecDict +from .common import ModelContainerSpecOrDict from .common import MultimodalDataset from .common import MultimodalDatasetDict from .common import MultimodalDatasetOperation @@ -1028,6 +1063,7 @@ from .common import ObservabilityEvalCase from .common import ObservabilityEvalCaseDict from .common import ObservabilityEvalCaseOrDict +from .common import OpenSourceCategory from .common import Operator from .common import OptimizationMethod from .common import OptimizeConfig @@ -1056,7 +1092,31 @@ from .common import PointwiseMetricInstance from .common import PointwiseMetricInstanceDict from .common import PointwiseMetricInstanceOrDict +from .common import Port +from .common import PortDict +from .common import PortOrDict from .common import PostSnapshotAction +from .common import PredictSchemata +from .common import PredictSchemataDict +from .common import PredictSchemataOrDict +from .common import Probe +from .common import ProbeDict +from .common import ProbeExecAction +from .common import ProbeExecActionDict +from .common import ProbeExecActionOrDict +from .common import ProbeGrpcAction +from .common import ProbeGrpcActionDict +from .common import ProbeGrpcActionOrDict +from .common import ProbeHttpGetAction +from .common import ProbeHttpGetActionDict +from .common import ProbeHttpGetActionOrDict +from .common import ProbeHttpHeader +from .common import ProbeHttpHeaderDict +from .common import ProbeHttpHeaderOrDict +from .common import ProbeOrDict +from .common import ProbeTcpSocketAction +from .common import ProbeTcpSocketActionDict +from .common import ProbeTcpSocketActionOrDict from .common import Prompt from .common import PromptData from .common import PromptDataDict @@ -1083,6 +1143,45 @@ from .common import PscInterfaceConfig from .common import PscInterfaceConfigDict from .common import PscInterfaceConfigOrDict +from .common import PublisherModel +from .common import PublisherModelCallToAction +from .common import PublisherModelCallToActionDeploy +from .common import PublisherModelCallToActionDeployDeployMetadata +from .common import PublisherModelCallToActionDeployDeployMetadataDict +from .common import PublisherModelCallToActionDeployDeployMetadataOrDict +from .common import PublisherModelCallToActionDeployDict +from .common import PublisherModelCallToActionDeployGke +from .common import PublisherModelCallToActionDeployGkeDict +from .common import PublisherModelCallToActionDeployGkeOrDict +from .common import PublisherModelCallToActionDeployOrDict +from .common import PublisherModelCallToActionDeployVertex +from .common import PublisherModelCallToActionDeployVertexDict +from .common import PublisherModelCallToActionDeployVertexOrDict +from .common import PublisherModelCallToActionDict +from .common import PublisherModelCallToActionOpenFineTuningPipelines +from .common import PublisherModelCallToActionOpenFineTuningPipelinesDict +from .common import PublisherModelCallToActionOpenFineTuningPipelinesOrDict +from .common import PublisherModelCallToActionOpenNotebooks +from .common import PublisherModelCallToActionOpenNotebooksDict +from .common import PublisherModelCallToActionOpenNotebooksOrDict +from .common import PublisherModelCallToActionOrDict +from .common import PublisherModelCallToActionRegionalResourceReferences +from .common import PublisherModelCallToActionRegionalResourceReferencesDict +from .common import PublisherModelCallToActionRegionalResourceReferencesOrDict +from .common import PublisherModelCallToActionViewRestApi +from .common import PublisherModelCallToActionViewRestApiDict +from .common import PublisherModelCallToActionViewRestApiOrDict +from .common import PublisherModelDict +from .common import PublisherModelDocumentation +from .common import PublisherModelDocumentationDict +from .common import PublisherModelDocumentationOrDict +from .common import PublisherModelOrDict +from .common import PublisherModelParent +from .common import PublisherModelParentDict +from .common import PublisherModelParentOrDict +from .common import PublisherModelResourceReference +from .common import PublisherModelResourceReferenceDict +from .common import PublisherModelResourceReferenceOrDict from .common import PurgeAgentEngineMemoriesConfig from .common import PurgeAgentEngineMemoriesConfigDict from .common import PurgeAgentEngineMemoriesConfigOrDict @@ -1770,6 +1869,7 @@ from .common import UploadRagFileResponse from .common import UploadRagFileResponseDict from .common import UploadRagFileResponseOrDict +from .common import VersionState from .common import VertexAiSearchConfig from .common import VertexAiSearchConfigDict from .common import VertexAiSearchConfigOrDict @@ -3260,6 +3360,96 @@ "ListSkillRevisionsResponse", "ListSkillRevisionsResponseDict", "ListSkillRevisionsResponseOrDict", + "ListPublisherModelsConfig", + "ListPublisherModelsConfigDict", + "ListPublisherModelsConfigOrDict", + "PublisherModelResourceReference", + "PublisherModelResourceReferenceDict", + "PublisherModelResourceReferenceOrDict", + "PublisherModelParent", + "PublisherModelParentDict", + "PublisherModelParentOrDict", + "PredictSchemata", + "PredictSchemataDict", + "PredictSchemataOrDict", + "PublisherModelCallToActionRegionalResourceReferences", + "PublisherModelCallToActionRegionalResourceReferencesDict", + "PublisherModelCallToActionRegionalResourceReferencesOrDict", + "AutomaticResources", + "AutomaticResourcesDict", + "AutomaticResourcesOrDict", + "Port", + "PortDict", + "PortOrDict", + "ProbeExecAction", + "ProbeExecActionDict", + "ProbeExecActionOrDict", + "ProbeGrpcAction", + "ProbeGrpcActionDict", + "ProbeGrpcActionOrDict", + "ProbeHttpHeader", + "ProbeHttpHeaderDict", + "ProbeHttpHeaderOrDict", + "ProbeHttpGetAction", + "ProbeHttpGetActionDict", + "ProbeHttpGetActionOrDict", + "ProbeTcpSocketAction", + "ProbeTcpSocketActionDict", + "ProbeTcpSocketActionOrDict", + "Probe", + "ProbeDict", + "ProbeOrDict", + "ModelContainerSpec", + "ModelContainerSpecDict", + "ModelContainerSpecOrDict", + "AutoscalingMetricSpec", + "AutoscalingMetricSpecDict", + "AutoscalingMetricSpecOrDict", + "FlexStart", + "FlexStartDict", + "FlexStartOrDict", + "DedicatedResourcesScaleToZeroSpec", + "DedicatedResourcesScaleToZeroSpecDict", + "DedicatedResourcesScaleToZeroSpecOrDict", + "DedicatedResources", + "DedicatedResourcesDict", + "DedicatedResourcesOrDict", + "PublisherModelCallToActionDeployDeployMetadata", + "PublisherModelCallToActionDeployDeployMetadataDict", + "PublisherModelCallToActionDeployDeployMetadataOrDict", + "LargeModelReference", + "LargeModelReferenceDict", + "LargeModelReferenceOrDict", + "PublisherModelCallToActionDeploy", + "PublisherModelCallToActionDeployDict", + "PublisherModelCallToActionDeployOrDict", + "PublisherModelCallToActionDeployGke", + "PublisherModelCallToActionDeployGkeDict", + "PublisherModelCallToActionDeployGkeOrDict", + "PublisherModelCallToActionDeployVertex", + "PublisherModelCallToActionDeployVertexDict", + "PublisherModelCallToActionDeployVertexOrDict", + "PublisherModelCallToActionOpenFineTuningPipelines", + "PublisherModelCallToActionOpenFineTuningPipelinesDict", + "PublisherModelCallToActionOpenFineTuningPipelinesOrDict", + "PublisherModelCallToActionOpenNotebooks", + "PublisherModelCallToActionOpenNotebooksDict", + "PublisherModelCallToActionOpenNotebooksOrDict", + "PublisherModelDocumentation", + "PublisherModelDocumentationDict", + "PublisherModelDocumentationOrDict", + "PublisherModelCallToActionViewRestApi", + "PublisherModelCallToActionViewRestApiDict", + "PublisherModelCallToActionViewRestApiOrDict", + "PublisherModelCallToAction", + "PublisherModelCallToActionDict", + "PublisherModelCallToActionOrDict", + "PublisherModel", + "PublisherModelDict", + "PublisherModelOrDict", + "ListPublisherModelsResponse", + "ListPublisherModelsResponseDict", + "ListPublisherModelsResponseOrDict", "PromptOptimizerConfig", "PromptOptimizerConfigDict", "PromptOptimizerConfigOrDict", @@ -3350,6 +3540,12 @@ "AgentEngineRuntimeRevision", "AgentEngineRuntimeRevisionDict", "AgentEngineRuntimeRevisionOrDict", + "ListDeployableModelsConfig", + "ListDeployableModelsConfigDict", + "ListDeployableModelsConfigOrDict", + "ListModelGardenModelsConfig", + "ListModelGardenModelsConfigDict", + "ListModelGardenModelsConfigOrDict", "A2aTaskState", "State", "Strategy", @@ -3372,6 +3568,9 @@ "Framework", "SkillState", "SkillSource", + "LaunchStage", + "OpenSourceCategory", + "VersionState", "EvaluationItemType", "SamplingMethod", "EvaluationRunState", @@ -3528,6 +3727,7 @@ "_GetSkillOperationParameters", "_GetSkillRevisionRequestParameters", "_ListSkillRevisionsRequestParameters", + "_ListPublisherModelsRequestParameters", "evals", "agent_engines", "prompts", diff --git a/agentplatform/_genai/types/common.py b/agentplatform/_genai/types/common.py index 6db6c8249b..da247bddf1 100644 --- a/agentplatform/_genai/types/common.py +++ b/agentplatform/_genai/types/common.py @@ -431,6 +431,51 @@ class SkillSource(_common.CaseInSensitiveEnum): """The skill is a system skill.""" +class LaunchStage(_common.CaseInSensitiveEnum): + """Indicates the launch stage of the model.""" + + LAUNCH_STAGE_UNSPECIFIED = "LAUNCH_STAGE_UNSPECIFIED" + """The model launch stage is unspecified.""" + EXPERIMENTAL = "EXPERIMENTAL" + """Used to indicate the PublisherModel is at Experimental launch stage, available to a small set of customers.""" + PRIVATE_PREVIEW = "PRIVATE_PREVIEW" + """Used to indicate the PublisherModel is at Private Preview launch stage, only available to a small set of customers, although a larger set of customers than an Experimental launch. Previews are the first launch stage used to get feedback from customers.""" + PUBLIC_PREVIEW = "PUBLIC_PREVIEW" + """Used to indicate the PublisherModel is at Public Preview launch stage, available to all customers, although not supported for production workloads.""" + GA = "GA" + """Used to indicate the PublisherModel is at GA launch stage, available to all customers and ready for production workload.""" + + +class OpenSourceCategory(_common.CaseInSensitiveEnum): + """Indicates the open source category of the publisher model.""" + + OPEN_SOURCE_CATEGORY_UNSPECIFIED = "OPEN_SOURCE_CATEGORY_UNSPECIFIED" + """The open source category is unspecified, which should not be used.""" + PROPRIETARY = "PROPRIETARY" + """Used to indicate the PublisherModel is not open sourced.""" + GOOGLE_OWNED_OSS_WITH_GOOGLE_CHECKPOINT = "GOOGLE_OWNED_OSS_WITH_GOOGLE_CHECKPOINT" + """Used to indicate the PublisherModel is a Google-owned open source model w/ Google checkpoint.""" + THIRD_PARTY_OWNED_OSS_WITH_GOOGLE_CHECKPOINT = ( + "THIRD_PARTY_OWNED_OSS_WITH_GOOGLE_CHECKPOINT" + ) + """Used to indicate the PublisherModel is a 3p-owned open source model w/ Google checkpoint.""" + GOOGLE_OWNED_OSS = "GOOGLE_OWNED_OSS" + """Used to indicate the PublisherModel is a Google-owned pure open source model.""" + THIRD_PARTY_OWNED_OSS = "THIRD_PARTY_OWNED_OSS" + """Used to indicate the PublisherModel is a 3p-owned pure open source model.""" + + +class VersionState(_common.CaseInSensitiveEnum): + """Indicates the state of the model version.""" + + VERSION_STATE_UNSPECIFIED = "VERSION_STATE_UNSPECIFIED" + """The version state is unspecified.""" + VERSION_STATE_STABLE = "VERSION_STATE_STABLE" + """Used to indicate the version is stable.""" + VERSION_STATE_UNSTABLE = "VERSION_STATE_UNSTABLE" + """Used to indicate the version is unstable.""" + + class EvaluationItemType(_common.CaseInSensitiveEnum): """The type of the EvaluationItem.""" @@ -22474,317 +22519,1584 @@ class ListSkillRevisionsResponseDict(TypedDict, total=False): ] -class PromptOptimizerConfig(_common.BaseModel): - """VAPO Prompt Optimizer Config.""" +class ListPublisherModelsConfig(_common.BaseModel): + """Config for listing publisher models.""" - config_path: Optional[str] = Field( - default=None, - description="""The gcs path to the config file, e.g. gs://bucket/config.json.""", - ) - service_account: Optional[str] = Field( - default=None, - description="""The service account to use for the custom job. Cannot be provided at the same time as service_account_project_number.""", - ) - service_account_project_number: Optional[Union[int, str]] = Field( - default=None, - description="""The project number used to construct the default service account:{service_account_project_number}-compute@developer.gserviceaccount.comCannot be provided at the same time as "service_account".""", + http_options: Optional[genai_types.HttpOptions] = Field( + default=None, description="""Used to override HTTP request options.""" ) - wait_for_completion: Optional[bool] = Field( - default=True, - description="""Whether to wait for the job tocomplete. Ignored for async jobs.""", + page_size: Optional[int] = Field(default=None, description="""""") + page_token: Optional[str] = Field(default=None, description="""""") + filter: Optional[str] = Field( + default=None, description="""Filter string for publisher models.""" ) - optimizer_job_display_name: Optional[str] = Field( - default=None, - description="""The display name of the optimization job. If not provided, a display name in the format of "vapo-optimizer-{timestamp}" will be used.""", + list_all_versions: Optional[bool] = Field( + default=None, description="""Whether to list all versions.""" ) -class PromptOptimizerConfigDict(TypedDict, total=False): - """VAPO Prompt Optimizer Config.""" +class ListPublisherModelsConfigDict(TypedDict, total=False): + """Config for listing publisher models.""" - config_path: Optional[str] - """The gcs path to the config file, e.g. gs://bucket/config.json.""" + http_options: Optional[genai_types.HttpOptionsDict] + """Used to override HTTP request options.""" - service_account: Optional[str] - """The service account to use for the custom job. Cannot be provided at the same time as service_account_project_number.""" + page_size: Optional[int] + """""" - service_account_project_number: Optional[Union[int, str]] - """The project number used to construct the default service account:{service_account_project_number}-compute@developer.gserviceaccount.comCannot be provided at the same time as "service_account".""" + page_token: Optional[str] + """""" - wait_for_completion: Optional[bool] - """Whether to wait for the job tocomplete. Ignored for async jobs.""" + filter: Optional[str] + """Filter string for publisher models.""" - optimizer_job_display_name: Optional[str] - """The display name of the optimization job. If not provided, a display name in the format of "vapo-optimizer-{timestamp}" will be used.""" + list_all_versions: Optional[bool] + """Whether to list all versions.""" -PromptOptimizerConfigOrDict = Union[PromptOptimizerConfig, PromptOptimizerConfigDict] +ListPublisherModelsConfigOrDict = Union[ + ListPublisherModelsConfig, ListPublisherModelsConfigDict +] -class OptimizeResponse(_common.BaseModel): - """Response for the optimize_prompt method.""" +class _ListPublisherModelsRequestParameters(_common.BaseModel): + """Parameters for listing publisher models.""" - raw_text_response: Optional[str] = Field(default=None, description="""""") - parsed_response: Optional["ParsedResponseUnion"] = Field( + parent: Optional[str] = Field(default=None, description="""""") + config: Optional[ListPublisherModelsConfig] = Field( default=None, description="""""" ) -class OptimizeResponseDict(TypedDict, total=False): - """Response for the optimize_prompt method.""" +class _ListPublisherModelsRequestParametersDict(TypedDict, total=False): + """Parameters for listing publisher models.""" - raw_text_response: Optional[str] + parent: Optional[str] """""" - parsed_response: Optional["ParsedResponseUnionDict"] + config: Optional[ListPublisherModelsConfigDict] """""" -OptimizeResponseOrDict = Union[OptimizeResponse, OptimizeResponseDict] +_ListPublisherModelsRequestParametersOrDict = Union[ + _ListPublisherModelsRequestParameters, _ListPublisherModelsRequestParametersDict +] -class ContentMapContents(_common.BaseModel): - """Map of placeholder in metric prompt template to contents of model input.""" +class PublisherModelResourceReference(_common.BaseModel): + """Reference to a resource.""" - contents: Optional[list[genai_types.Content]] = Field( - default=None, description="""Contents of the model input.""" + description: Optional[str] = Field( + default=None, description="""Description of the resource.""" + ) + resource_name: Optional[str] = Field( + default=None, description="""The resource name of the Google Cloud resource.""" + ) + uri: Optional[str] = Field(default=None, description="""The URI of the resource.""") + use_case: Optional[str] = Field( + default=None, description="""Use case (CUJ) of the resource.""" ) -class ContentMapContentsDict(TypedDict, total=False): - """Map of placeholder in metric prompt template to contents of model input.""" +class PublisherModelResourceReferenceDict(TypedDict, total=False): + """Reference to a resource.""" - contents: Optional[list[genai_types.ContentDict]] - """Contents of the model input.""" + description: Optional[str] + """Description of the resource.""" + resource_name: Optional[str] + """The resource name of the Google Cloud resource.""" -ContentMapContentsOrDict = Union[ContentMapContents, ContentMapContentsDict] + uri: Optional[str] + """The URI of the resource.""" + use_case: Optional[str] + """Use case (CUJ) of the resource.""" -class EvaluateMethodConfig(_common.BaseModel): - """Optional parameters for the evaluate method.""" - http_options: Optional[genai_types.HttpOptions] = Field( - default=None, description="""Used to override HTTP request options.""" - ) - dataset_schema: Optional[Literal["GEMINI", "FLATTEN", "OPENAI"]] = Field( +PublisherModelResourceReferenceOrDict = Union[ + PublisherModelResourceReference, PublisherModelResourceReferenceDict +] + + +class PublisherModelParent(_common.BaseModel): + """The information about the parent of a model.""" + + display_name: Optional[str] = Field( default=None, - description="""The schema to use for the dataset. - If not specified, the dataset schema will be inferred from the first - example in the dataset.""", + description="""Required. The display name of the parent. E.g., LaMDA, T5, Vision API, Natural Language API.""", ) - dest: Optional[str] = Field( - default=None, description="""The destination path for the evaluation results.""" - ) - evaluation_service_qps: Optional[float] = Field( + reference: Optional[PublisherModelResourceReference] = Field( default=None, - description="""The rate limit (queries per second) for calls to the - evaluation service. Defaults to 10. Increase this value if your - project has a higher EvaluateInstances API quota.""", + description="""Optional. The Google Cloud resource name or the URI reference.""", ) -class EvaluateMethodConfigDict(TypedDict, total=False): - """Optional parameters for the evaluate method.""" +class PublisherModelParentDict(TypedDict, total=False): + """The information about the parent of a model.""" - http_options: Optional[genai_types.HttpOptionsDict] - """Used to override HTTP request options.""" + display_name: Optional[str] + """Required. The display name of the parent. E.g., LaMDA, T5, Vision API, Natural Language API.""" - dataset_schema: Optional[Literal["GEMINI", "FLATTEN", "OPENAI"]] - """The schema to use for the dataset. - If not specified, the dataset schema will be inferred from the first - example in the dataset.""" + reference: Optional[PublisherModelResourceReferenceDict] + """Optional. The Google Cloud resource name or the URI reference.""" - dest: Optional[str] - """The destination path for the evaluation results.""" - evaluation_service_qps: Optional[float] - """The rate limit (queries per second) for calls to the - evaluation service. Defaults to 10. Increase this value if your - project has a higher EvaluateInstances API quota.""" +PublisherModelParentOrDict = Union[PublisherModelParent, PublisherModelParentDict] -EvaluateMethodConfigOrDict = Union[EvaluateMethodConfig, EvaluateMethodConfigDict] +class PredictSchemata(_common.BaseModel): + """Contains the schemata used in Model's predictions and explanations via PredictionService.Predict, PredictionService.Explain and BatchPredictionJob.""" + instance_schema_uri: Optional[str] = Field( + default=None, + description="""Immutable. Points to a YAML file stored on Google Cloud Storage describing the format of a single instance, which are used in PredictRequest.instances, ExplainRequest.instances and BatchPredictionJob.input_config. The schema is defined as an OpenAPI 3.0.2 [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject). AutoML Models always have this field populated by Vertex AI. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access.""", + ) + parameters_schema_uri: Optional[str] = Field( + default=None, + description="""Immutable. Points to a YAML file stored on Google Cloud Storage describing the parameters of prediction and explanation via PredictRequest.parameters, ExplainRequest.parameters and BatchPredictionJob.model_parameters. The schema is defined as an OpenAPI 3.0.2 [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject). AutoML Models always have this field populated by Vertex AI, if no parameters are supported, then it is set to an empty string. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access.""", + ) + prediction_schema_uri: Optional[str] = Field( + default=None, + description="""Immutable. Points to a YAML file stored on Google Cloud Storage describing the format of a single prediction produced by this Model, which are returned via PredictResponse.predictions, ExplainResponse.explanations, and BatchPredictionJob.output_config. The schema is defined as an OpenAPI 3.0.2 [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject). AutoML Models always have this field populated by Vertex AI. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access.""", + ) -class EvaluateDatasetConfig(_common.BaseModel): - """Config for evaluate instances.""" - http_options: Optional[genai_types.HttpOptions] = Field( - default=None, description="""Used to override HTTP request options.""" - ) +class PredictSchemataDict(TypedDict, total=False): + """Contains the schemata used in Model's predictions and explanations via PredictionService.Predict, PredictionService.Explain and BatchPredictionJob.""" + instance_schema_uri: Optional[str] + """Immutable. Points to a YAML file stored on Google Cloud Storage describing the format of a single instance, which are used in PredictRequest.instances, ExplainRequest.instances and BatchPredictionJob.input_config. The schema is defined as an OpenAPI 3.0.2 [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject). AutoML Models always have this field populated by Vertex AI. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access.""" -class EvaluateDatasetConfigDict(TypedDict, total=False): - """Config for evaluate instances.""" + parameters_schema_uri: Optional[str] + """Immutable. Points to a YAML file stored on Google Cloud Storage describing the parameters of prediction and explanation via PredictRequest.parameters, ExplainRequest.parameters and BatchPredictionJob.model_parameters. The schema is defined as an OpenAPI 3.0.2 [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject). AutoML Models always have this field populated by Vertex AI, if no parameters are supported, then it is set to an empty string. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access.""" - http_options: Optional[genai_types.HttpOptionsDict] - """Used to override HTTP request options.""" + prediction_schema_uri: Optional[str] + """Immutable. Points to a YAML file stored on Google Cloud Storage describing the format of a single prediction produced by this Model, which are returned via PredictResponse.predictions, ExplainResponse.explanations, and BatchPredictionJob.output_config. The schema is defined as an OpenAPI 3.0.2 [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schemaObject). AutoML Models always have this field populated by Vertex AI. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access.""" -EvaluateDatasetConfigOrDict = Union[EvaluateDatasetConfig, EvaluateDatasetConfigDict] +PredictSchemataOrDict = Union[PredictSchemata, PredictSchemataDict] -class EvaluateDatasetOperation(_common.BaseModel): +class PublisherModelCallToActionRegionalResourceReferences(_common.BaseModel): + """The regional resource name or the URI. Key is region, e.g., us-central1, europe-west2, global, etc..""" - name: Optional[str] = Field( + colab_notebook_disabled: Optional[bool] = Field( default=None, - description="""The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the `name` should be a resource name ending with `operations/{unique_id}`.""", + description="""Optional. For notebook resource. When set to true, the Colab Enterprise link will be disabled in the "open notebook" dialog in UI.""", ) - metadata: Optional[dict[str, Any]] = Field( - default=None, - description="""Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.""", + references: Optional[dict[str, PublisherModelResourceReference]] = Field( + default=None, description="""Required.""" ) - done: Optional[bool] = Field( - default=None, - description="""If the value is `false`, it means the operation is still in progress. If `true`, the operation is completed, and either `error` or `response` is available.""", + resource_description: Optional[str] = Field( + default=None, description="""Optional. Description of the resource.""" ) - error: Optional[dict[str, Any]] = Field( + resource_title: Optional[str] = Field( + default=None, description="""Optional. Title of the resource.""" + ) + resource_use_case: Optional[str] = Field( + default=None, description="""Optional. Use case (CUJ) of the resource.""" + ) + supports_workbench: Optional[bool] = Field( default=None, - description="""The error result of the operation in case of failure or cancellation.""", + description="""Optional. For notebook resource, whether the notebook supports Workbench.""", ) - response: Optional[EvaluationDataset] = Field(default=None, description="""""") + title: Optional[str] = Field(default=None, description="""Required. """) -class EvaluateDatasetOperationDict(TypedDict, total=False): +class PublisherModelCallToActionRegionalResourceReferencesDict(TypedDict, total=False): + """The regional resource name or the URI. Key is region, e.g., us-central1, europe-west2, global, etc..""" - name: Optional[str] - """The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the `name` should be a resource name ending with `operations/{unique_id}`.""" + colab_notebook_disabled: Optional[bool] + """Optional. For notebook resource. When set to true, the Colab Enterprise link will be disabled in the "open notebook" dialog in UI.""" - metadata: Optional[dict[str, Any]] - """Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.""" + references: Optional[dict[str, PublisherModelResourceReferenceDict]] + """Required.""" - done: Optional[bool] - """If the value is `false`, it means the operation is still in progress. If `true`, the operation is completed, and either `error` or `response` is available.""" + resource_description: Optional[str] + """Optional. Description of the resource.""" - error: Optional[dict[str, Any]] - """The error result of the operation in case of failure or cancellation.""" + resource_title: Optional[str] + """Optional. Title of the resource.""" - response: Optional[EvaluationDatasetDict] - """""" + resource_use_case: Optional[str] + """Optional. Use case (CUJ) of the resource.""" + supports_workbench: Optional[bool] + """Optional. For notebook resource, whether the notebook supports Workbench.""" -EvaluateDatasetOperationOrDict = Union[ - EvaluateDatasetOperation, EvaluateDatasetOperationDict + title: Optional[str] + """Required. """ + + +PublisherModelCallToActionRegionalResourceReferencesOrDict = Union[ + PublisherModelCallToActionRegionalResourceReferences, + PublisherModelCallToActionRegionalResourceReferencesDict, ] -class EvaluateDatasetRequestParameters(_common.BaseModel): - """Parameters for batch dataset evaluation.""" +class AutomaticResources(_common.BaseModel): + """A description of resources that to large degree are decided by Vertex AI, and require only a modest additional configuration. Each Model supporting these resources documents its specific guidelines.""" - dataset: Optional[EvaluationDataset] = Field(default=None, description="""""") - metrics: Optional[list[Metric]] = Field(default=None, description="""""") - output_config: Optional[genai_types.OutputConfig] = Field( - default=None, description="""""" + max_replica_count: Optional[int] = Field( + default=None, + description="""Immutable. The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, a no upper bound for scaling under heavy traffic will be assume, though Vertex AI may be unable to scale beyond certain replica number.""", ) - autorater_config: Optional[genai_types.AutoraterConfig] = Field( + min_replica_count: Optional[int] = Field( default=None, - description="""Autorater config used for evaluation. Not applicable for predefined metrics (PredefinedMetricSpec); the server uses its own model configuration for predefined metrics and this field is ignored.""", + description="""Immutable. The minimum number of replicas that will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas up to max_replica_count, and as traffic decreases, some of these extra replicas may be freed. If the requested value is too large, the deployment will error.""", ) - config: Optional[EvaluateDatasetConfig] = Field(default=None, description="""""") -class EvaluateDatasetRequestParametersDict(TypedDict, total=False): - """Parameters for batch dataset evaluation.""" +class AutomaticResourcesDict(TypedDict, total=False): + """A description of resources that to large degree are decided by Vertex AI, and require only a modest additional configuration. Each Model supporting these resources documents its specific guidelines.""" - dataset: Optional[EvaluationDatasetDict] - """""" + max_replica_count: Optional[int] + """Immutable. The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, a no upper bound for scaling under heavy traffic will be assume, though Vertex AI may be unable to scale beyond certain replica number.""" - metrics: Optional[list[MetricDict]] - """""" + min_replica_count: Optional[int] + """Immutable. The minimum number of replicas that will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas up to max_replica_count, and as traffic decreases, some of these extra replicas may be freed. If the requested value is too large, the deployment will error.""" - output_config: Optional[genai_types.OutputConfigDict] - """""" - autorater_config: Optional[genai_types.AutoraterConfigDict] - """Autorater config used for evaluation. Not applicable for predefined metrics (PredefinedMetricSpec); the server uses its own model configuration for predefined metrics and this field is ignored.""" +AutomaticResourcesOrDict = Union[AutomaticResources, AutomaticResourcesDict] - config: Optional[EvaluateDatasetConfigDict] - """""" +class Port(_common.BaseModel): + """Represents a network port in a container.""" -EvaluateDatasetRequestParametersOrDict = Union[ - EvaluateDatasetRequestParameters, EvaluateDatasetRequestParametersDict -] + container_port: Optional[int] = Field( + default=None, + description="""The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.""", + ) -class ObservabilityEvalCase(_common.BaseModel): - """A single evaluation case instance for data stored in GCP Observability.""" +class PortDict(TypedDict, total=False): + """Represents a network port in a container.""" - input_src: Optional[str] = Field( - default=None, - description="""String containing the GCS reference to the GenAI input content.""", - ) - output_src: Optional[str] = Field( - default=None, - description="""String containing the GCS reference to the GenAI response content.""", - ) - system_instruction_src: Optional[str] = Field( - default=None, - description="""An optional string containing the GCS reference to the GenAI system instruction.""", - ) - api_client: Optional[Any] = Field( - default=None, description="""The underlying API client.""" - ) + container_port: Optional[int] + """The number of the port to expose on the pod's IP address. Must be a valid port number, between 1 and 65535 inclusive.""" -class ObservabilityEvalCaseDict(TypedDict, total=False): - """A single evaluation case instance for data stored in GCP Observability.""" +PortOrDict = Union[Port, PortDict] - input_src: Optional[str] - """String containing the GCS reference to the GenAI input content.""" - output_src: Optional[str] - """String containing the GCS reference to the GenAI response content.""" +class ProbeExecAction(_common.BaseModel): + """ExecAction specifies a command to execute.""" - system_instruction_src: Optional[str] - """An optional string containing the GCS reference to the GenAI system instruction.""" + command: Optional[list[str]] = Field( + default=None, + description="""Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.""", + ) - api_client: Optional[Any] - """The underlying API client.""" +class ProbeExecActionDict(TypedDict, total=False): + """ExecAction specifies a command to execute.""" -ObservabilityEvalCaseOrDict = Union[ObservabilityEvalCase, ObservabilityEvalCaseDict] + command: Optional[list[str]] + """Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.""" -class RubricGroup(_common.BaseModel): - """A group of rubrics. +ProbeExecActionOrDict = Union[ProbeExecAction, ProbeExecActionDict] - Used for grouping rubrics based on a metric or a version. - """ - group_id: Optional[str] = Field( - default=None, description="""Unique identifier for the group.""" - ) - display_name: Optional[str] = Field( +class ProbeGrpcAction(_common.BaseModel): + """GrpcAction checks the health of a container using a gRPC service.""" + + port: Optional[int] = Field( default=None, - description="""Human-readable name for the group. This should be unique - within a given context if used for display or selection. - Example: "Instruction Following V1", "Content Quality - Summarization - Task".""", + description="""Port number of the gRPC service. Number must be in the range 1 to 65535.""", ) - rubrics: Optional[list[evals_types.Rubric]] = Field( - default=None, description="""Rubrics that are part of this group.""" + service: Optional[str] = Field( + default=None, + description="""Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.""", ) -class RubricGroupDict(TypedDict, total=False): - """A group of rubrics. +class ProbeGrpcActionDict(TypedDict, total=False): + """GrpcAction checks the health of a container using a gRPC service.""" - Used for grouping rubrics based on a metric or a version. - """ + port: Optional[int] + """Port number of the gRPC service. Number must be in the range 1 to 65535.""" - group_id: Optional[str] - """Unique identifier for the group.""" + service: Optional[str] + """Service is the name of the service to place in the gRPC HealthCheckRequest. See https://github.com/grpc/grpc/blob/master/doc/health-checking.md. If this is not specified, the default behavior is defined by gRPC.""" - display_name: Optional[str] - """Human-readable name for the group. This should be unique - within a given context if used for display or selection. - Example: "Instruction Following V1", "Content Quality - Summarization - Task".""" - rubrics: Optional[list[evals_types.Rubric]] - """Rubrics that are part of this group.""" +ProbeGrpcActionOrDict = Union[ProbeGrpcAction, ProbeGrpcActionDict] + + +class ProbeHttpHeader(_common.BaseModel): + """HttpHeader describes a custom header to be used in HTTP probes""" + + name: Optional[str] = Field( + default=None, + description="""The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.""", + ) + value: Optional[str] = Field(default=None, description="""The header field value""") + + +class ProbeHttpHeaderDict(TypedDict, total=False): + """HttpHeader describes a custom header to be used in HTTP probes""" + + name: Optional[str] + """The header field name. This will be canonicalized upon output, so case-variant names will be understood as the same header.""" + + value: Optional[str] + """The header field value""" + + +ProbeHttpHeaderOrDict = Union[ProbeHttpHeader, ProbeHttpHeaderDict] + + +class ProbeHttpGetAction(_common.BaseModel): + """HttpGetAction describes an action based on HTTP Get requests.""" + + host: Optional[str] = Field( + default=None, + description="""Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.""", + ) + http_headers: Optional[list[ProbeHttpHeader]] = Field( + default=None, + description="""Custom headers to set in the request. HTTP allows repeated headers.""", + ) + path: Optional[str] = Field( + default=None, description="""Path to access on the HTTP server.""" + ) + port: Optional[int] = Field( + default=None, + description="""Number of the port to access on the container. Number must be in the range 1 to 65535.""", + ) + scheme: Optional[str] = Field( + default=None, + description="""Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".""", + ) + + +class ProbeHttpGetActionDict(TypedDict, total=False): + """HttpGetAction describes an action based on HTTP Get requests.""" + + host: Optional[str] + """Host name to connect to, defaults to the model serving container's IP. You probably want to set "Host" in httpHeaders instead.""" + + http_headers: Optional[list[ProbeHttpHeaderDict]] + """Custom headers to set in the request. HTTP allows repeated headers.""" + + path: Optional[str] + """Path to access on the HTTP server.""" + + port: Optional[int] + """Number of the port to access on the container. Number must be in the range 1 to 65535.""" + + scheme: Optional[str] + """Scheme to use for connecting to the host. Defaults to HTTP. Acceptable values are "HTTP" or "HTTPS".""" + + +ProbeHttpGetActionOrDict = Union[ProbeHttpGetAction, ProbeHttpGetActionDict] + + +class ProbeTcpSocketAction(_common.BaseModel): + """TcpSocketAction probes the health of a container by opening a TCP socket connection.""" + + host: Optional[str] = Field( + default=None, + description="""Optional: Host name to connect to, defaults to the model serving container's IP.""", + ) + port: Optional[int] = Field( + default=None, + description="""Number of the port to access on the container. Number must be in the range 1 to 65535.""", + ) + + +class ProbeTcpSocketActionDict(TypedDict, total=False): + """TcpSocketAction probes the health of a container by opening a TCP socket connection.""" + + host: Optional[str] + """Optional: Host name to connect to, defaults to the model serving container's IP.""" + + port: Optional[int] + """Number of the port to access on the container. Number must be in the range 1 to 65535.""" + + +ProbeTcpSocketActionOrDict = Union[ProbeTcpSocketAction, ProbeTcpSocketActionDict] + + +class Probe(_common.BaseModel): + """Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic.""" + + exec: Optional[ProbeExecAction] = Field( + default=None, + description="""ExecAction probes the health of a container by executing a command.""", + ) + failure_threshold: Optional[int] = Field( + default=None, + description="""Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.""", + ) + grpc: Optional[ProbeGrpcAction] = Field( + default=None, + description="""GrpcAction probes the health of a container by sending a gRPC request.""", + ) + http_get: Optional[ProbeHttpGetAction] = Field( + default=None, + description="""HttpGetAction probes the health of a container by sending an HTTP GET request.""", + ) + initial_delay_seconds: Optional[int] = Field( + default=None, + description="""Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.""", + ) + period_seconds: Optional[int] = Field( + default=None, + description="""How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.""", + ) + success_threshold: Optional[int] = Field( + default=None, + description="""Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.""", + ) + tcp_socket: Optional[ProbeTcpSocketAction] = Field( + default=None, + description="""TcpSocketAction probes the health of a container by opening a TCP socket connection.""", + ) + timeout_seconds: Optional[int] = Field( + default=None, + description="""Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.""", + ) + + +class ProbeDict(TypedDict, total=False): + """Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic.""" + + exec: Optional[ProbeExecActionDict] + """ExecAction probes the health of a container by executing a command.""" + + failure_threshold: Optional[int] + """Number of consecutive failures before the probe is considered failed. Defaults to 3. Minimum value is 1. Maps to Kubernetes probe argument 'failureThreshold'.""" + + grpc: Optional[ProbeGrpcActionDict] + """GrpcAction probes the health of a container by sending a gRPC request.""" + + http_get: Optional[ProbeHttpGetActionDict] + """HttpGetAction probes the health of a container by sending an HTTP GET request.""" + + initial_delay_seconds: Optional[int] + """Number of seconds to wait before starting the probe. Defaults to 0. Minimum value is 0. Maps to Kubernetes probe argument 'initialDelaySeconds'.""" + + period_seconds: Optional[int] + """How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1. Must be less than timeout_seconds. Maps to Kubernetes probe argument 'periodSeconds'.""" + + success_threshold: Optional[int] + """Number of consecutive successes before the probe is considered successful. Defaults to 1. Minimum value is 1. Maps to Kubernetes probe argument 'successThreshold'.""" + + tcp_socket: Optional[ProbeTcpSocketActionDict] + """TcpSocketAction probes the health of a container by opening a TCP socket connection.""" + + timeout_seconds: Optional[int] + """Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. Must be greater or equal to period_seconds. Maps to Kubernetes probe argument 'timeoutSeconds'.""" + + +ProbeOrDict = Union[Probe, ProbeDict] + + +class ModelContainerSpec(_common.BaseModel): + """Specification of a container for serving predictions. Some fields in this message correspond to fields in the [Kubernetes Container v1 core specification](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""" + + args: Optional[list[str]] = Field( + default=None, + description="""Immutable. Specifies arguments for the command that runs when the container starts. This overrides the container's [`CMD`](https://docs.docker.com/engine/reference/builder/#cmd). Specify this field as an array of executable and arguments, similar to a Docker `CMD`'s "default parameters" form. If you don't specify this field but do specify the command field, then the command from the `command` field runs without any additional arguments. See the [Kubernetes documentation about how the `command` and `args` fields interact with a container's `ENTRYPOINT` and `CMD`](https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#notes). If you don't specify this field and don't specify the `command` field, then the container's [`ENTRYPOINT`](https://docs.docker.com/engine/reference/builder/#cmd) and `CMD` determine what runs based on their default behavior. See the Docker documentation about [how `CMD` and `ENTRYPOINT` interact](https://docs.docker.com/engine/reference/builder/#understand-how-cmd-and-entrypoint-interact). In this field, you can reference [environment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax: $( VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with `$$`; for example: $$(VARIABLE_NAME) This field corresponds to the `args` field of the Kubernetes Containers [v1 core API](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""", + ) + command: Optional[list[str]] = Field( + default=None, + description="""Immutable. Specifies the command that runs when the container starts. This overrides the container's [ENTRYPOINT](https://docs.docker.com/engine/reference/builder/#entrypoint). Specify this field as an array of executable and arguments, similar to a Docker `ENTRYPOINT`'s "exec" form, not its "shell" form. If you do not specify this field, then the container's `ENTRYPOINT` runs, in conjunction with the args field or the container's [`CMD`](https://docs.docker.com/engine/reference/builder/#cmd), if either exists. If this field is not specified and the container does not have an `ENTRYPOINT`, then refer to the Docker documentation about [how `CMD` and `ENTRYPOINT` interact](https://docs.docker.com/engine/reference/builder/#understand-how-cmd-and-entrypoint-interact). If you specify this field, then you can also specify the `args` field to provide additional arguments for this command. However, if you specify this field, then the container's `CMD` is ignored. See the [Kubernetes documentation about how the `command` and `args` fields interact with a container's `ENTRYPOINT` and `CMD`](https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#notes). In this field, you can reference [environment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax: $( VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with `$$`; for example: $$(VARIABLE_NAME) This field corresponds to the `command` field of the Kubernetes Containers [v1 core API](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""", + ) + deployment_timeout: Optional[str] = Field( + default=None, + description="""Immutable. Deployment timeout. Limit for deployment timeout is 2 hours.""", + ) + env: Optional[list[EnvVar]] = Field( + default=None, + description="""Immutable. List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable `VAR_2` to have the value `foo bar`: ```json [ { "name": "VAR_1", "value": "foo" }, { "name": "VAR_2", "value": "$(VAR_1) bar" } ] ``` If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the `env` field of the Kubernetes Containers [v1 core API](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""", + ) + grpc_ports: Optional[list[Port]] = Field( + default=None, + description="""Immutable. List of ports to expose from the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the `ports` field of the Kubernetes Containers v1 core API.""", + ) + health_probe: Optional[Probe] = Field( + default=None, + description="""Immutable. Specification for Kubernetes readiness probe.""", + ) + health_route: Optional[str] = Field( + default=None, + description="""Immutable. HTTP path on the container to send health checks to. Vertex AI intermittently sends GET requests to this path on the container's IP address and port to check that the container is healthy. Read more about [health checks](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#health). For example, if you set this field to `/bar`, then Vertex AI intermittently sends a GET request to the `/bar` path on the port of your container specified by the first value of this `ModelContainerSpec`'s ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint: /v1/endpoints/ENDPOINT/deployedModels/ DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows: * ENDPOINT: The last segment (following `endpoints/`)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the [`AIP_ENDPOINT_ID` environment variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).) * DEPLOYED_MODEL: DeployedModel.id of the `DeployedModel`. (Vertex AI makes this value available to your container code as the [`AIP_DEPLOYED_MODEL_ID` environment variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).)""", + ) + image_uri: Optional[str] = Field( + default=None, + description="""Required. Immutable. URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the [container publishing requirements](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#publishing), including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see [Custom container requirements](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#). You can use the URI to one of Vertex AI's [pre-built container images for prediction](https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers) in this field.""", + ) + invoke_route_prefix: Optional[str] = Field( + default=None, + description="""Immutable. Invoke route prefix for the custom container. "/*" is the only supported value right now. By setting this field, any non-root route on this model will be accessible with invoke http call eg: "/invoke/foo/bar", however the [PredictionService.Invoke] RPC is not supported yet. Only one of `predict_route` or `invoke_route_prefix` can be set, and we default to using `predict_route` if this field is not set. If this field is set, the Model can only be deployed to dedicated endpoint.""", + ) + liveness_probe: Optional[Probe] = Field( + default=None, + description="""Immutable. Specification for Kubernetes liveness probe.""", + ) + ports: Optional[list[Port]] = Field( + default=None, + description="""Immutable. List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends [liveness and health checks](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#liveness) to this port. If you do not specify this field, it defaults to following value: ```json [ { "containerPort": 8080 } ] ``` Vertex AI does not use ports other than the first one listed. This field corresponds to the `ports` field of the Kubernetes Containers [v1 core API](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""", + ) + predict_route: Optional[str] = Field( + default=None, + description="""Immutable. HTTP path on the container to send prediction requests to. Vertex AI forwards requests sent using projects.locations.endpoints.predict to this path on the container's IP address and port. Vertex AI then returns the container's response in the API response. For example, if you set this field to `/foo`, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the `/foo` path on the port of your container specified by the first value of this `ModelContainerSpec`'s ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint: /v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows: * ENDPOINT: The last segment (following `endpoints/`)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the [`AIP_ENDPOINT_ID` environment variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).) * DEPLOYED_MODEL: DeployedModel.id of the `DeployedModel`. (Vertex AI makes this value available to your container code as the [`AIP_DEPLOYED_MODEL_ID` environment variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).)""", + ) + shared_memory_size_mb: Optional[int] = Field( + default=None, + description="""Immutable. The amount of the VM memory to reserve as the shared memory for the model in megabytes.""", + ) + startup_probe: Optional[Probe] = Field( + default=None, + description="""Immutable. Specification for Kubernetes startup probe.""", + ) + + +class ModelContainerSpecDict(TypedDict, total=False): + """Specification of a container for serving predictions. Some fields in this message correspond to fields in the [Kubernetes Container v1 core specification](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""" + + args: Optional[list[str]] + """Immutable. Specifies arguments for the command that runs when the container starts. This overrides the container's [`CMD`](https://docs.docker.com/engine/reference/builder/#cmd). Specify this field as an array of executable and arguments, similar to a Docker `CMD`'s "default parameters" form. If you don't specify this field but do specify the command field, then the command from the `command` field runs without any additional arguments. See the [Kubernetes documentation about how the `command` and `args` fields interact with a container's `ENTRYPOINT` and `CMD`](https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#notes). If you don't specify this field and don't specify the `command` field, then the container's [`ENTRYPOINT`](https://docs.docker.com/engine/reference/builder/#cmd) and `CMD` determine what runs based on their default behavior. See the Docker documentation about [how `CMD` and `ENTRYPOINT` interact](https://docs.docker.com/engine/reference/builder/#understand-how-cmd-and-entrypoint-interact). In this field, you can reference [environment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax: $( VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with `$$`; for example: $$(VARIABLE_NAME) This field corresponds to the `args` field of the Kubernetes Containers [v1 core API](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""" + + command: Optional[list[str]] + """Immutable. Specifies the command that runs when the container starts. This overrides the container's [ENTRYPOINT](https://docs.docker.com/engine/reference/builder/#entrypoint). Specify this field as an array of executable and arguments, similar to a Docker `ENTRYPOINT`'s "exec" form, not its "shell" form. If you do not specify this field, then the container's `ENTRYPOINT` runs, in conjunction with the args field or the container's [`CMD`](https://docs.docker.com/engine/reference/builder/#cmd), if either exists. If this field is not specified and the container does not have an `ENTRYPOINT`, then refer to the Docker documentation about [how `CMD` and `ENTRYPOINT` interact](https://docs.docker.com/engine/reference/builder/#understand-how-cmd-and-entrypoint-interact). If you specify this field, then you can also specify the `args` field to provide additional arguments for this command. However, if you specify this field, then the container's `CMD` is ignored. See the [Kubernetes documentation about how the `command` and `args` fields interact with a container's `ENTRYPOINT` and `CMD`](https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#notes). In this field, you can reference [environment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) and environment variables set in the env field. You cannot reference environment variables set in the Docker image. In order for environment variables to be expanded, reference them by using the following syntax: $( VARIABLE_NAME) Note that this differs from Bash variable expansion, which does not use parentheses. If a variable cannot be resolved, the reference in the input string is used unchanged. To avoid variable expansion, you can escape this syntax with `$$`; for example: $$(VARIABLE_NAME) This field corresponds to the `command` field of the Kubernetes Containers [v1 core API](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""" + + deployment_timeout: Optional[str] + """Immutable. Deployment timeout. Limit for deployment timeout is 2 hours.""" + + env: Optional[list[EnvVarDict]] + """Immutable. List of environment variables to set in the container. After the container starts running, code running in the container can read these environment variables. Additionally, the command and args fields can reference these variables. Later entries in this list can also reference earlier entries. For example, the following example sets the variable `VAR_2` to have the value `foo bar`: ```json [ { "name": "VAR_1", "value": "foo" }, { "name": "VAR_2", "value": "$(VAR_1) bar" } ] ``` If you switch the order of the variables in the example, then the expansion does not occur. This field corresponds to the `env` field of the Kubernetes Containers [v1 core API](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""" + + grpc_ports: Optional[list[PortDict]] + """Immutable. List of ports to expose from the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the `ports` field of the Kubernetes Containers v1 core API.""" + + health_probe: Optional[ProbeDict] + """Immutable. Specification for Kubernetes readiness probe.""" + + health_route: Optional[str] + """Immutable. HTTP path on the container to send health checks to. Vertex AI intermittently sends GET requests to this path on the container's IP address and port to check that the container is healthy. Read more about [health checks](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#health). For example, if you set this field to `/bar`, then Vertex AI intermittently sends a GET request to the `/bar` path on the port of your container specified by the first value of this `ModelContainerSpec`'s ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint: /v1/endpoints/ENDPOINT/deployedModels/ DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows: * ENDPOINT: The last segment (following `endpoints/`)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the [`AIP_ENDPOINT_ID` environment variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).) * DEPLOYED_MODEL: DeployedModel.id of the `DeployedModel`. (Vertex AI makes this value available to your container code as the [`AIP_DEPLOYED_MODEL_ID` environment variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).)""" + + image_uri: Optional[str] + """Required. Immutable. URI of the Docker image to be used as the custom container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Learn more about the [container publishing requirements](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#publishing), including permissions requirements for the Vertex AI Service Agent. The container image is ingested upon ModelService.UploadModel, stored internally, and this original path is afterwards not used. To learn about the requirements for the Docker image itself, see [Custom container requirements](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#). You can use the URI to one of Vertex AI's [pre-built container images for prediction](https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers) in this field.""" + + invoke_route_prefix: Optional[str] + """Immutable. Invoke route prefix for the custom container. "/*" is the only supported value right now. By setting this field, any non-root route on this model will be accessible with invoke http call eg: "/invoke/foo/bar", however the [PredictionService.Invoke] RPC is not supported yet. Only one of `predict_route` or `invoke_route_prefix` can be set, and we default to using `predict_route` if this field is not set. If this field is set, the Model can only be deployed to dedicated endpoint.""" + + liveness_probe: Optional[ProbeDict] + """Immutable. Specification for Kubernetes liveness probe.""" + + ports: Optional[list[PortDict]] + """Immutable. List of ports to expose from the container. Vertex AI sends any prediction requests that it receives to the first port on this list. Vertex AI also sends [liveness and health checks](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#liveness) to this port. If you do not specify this field, it defaults to following value: ```json [ { "containerPort": 8080 } ] ``` Vertex AI does not use ports other than the first one listed. This field corresponds to the `ports` field of the Kubernetes Containers [v1 core API](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core).""" + + predict_route: Optional[str] + """Immutable. HTTP path on the container to send prediction requests to. Vertex AI forwards requests sent using projects.locations.endpoints.predict to this path on the container's IP address and port. Vertex AI then returns the container's response in the API response. For example, if you set this field to `/foo`, then when Vertex AI receives a prediction request, it forwards the request body in a POST request to the `/foo` path on the port of your container specified by the first value of this `ModelContainerSpec`'s ports field. If you don't specify this field, it defaults to the following value when you deploy this Model to an Endpoint: /v1/endpoints/ENDPOINT/deployedModels/DEPLOYED_MODEL:predict The placeholders in this value are replaced as follows: * ENDPOINT: The last segment (following `endpoints/`)of the Endpoint.name][] field of the Endpoint where this Model has been deployed. (Vertex AI makes this value available to your container code as the [`AIP_ENDPOINT_ID` environment variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).) * DEPLOYED_MODEL: DeployedModel.id of the `DeployedModel`. (Vertex AI makes this value available to your container code as the [`AIP_DEPLOYED_MODEL_ID` environment variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).)""" + + shared_memory_size_mb: Optional[int] + """Immutable. The amount of the VM memory to reserve as the shared memory for the model in megabytes.""" + + startup_probe: Optional[ProbeDict] + """Immutable. Specification for Kubernetes startup probe.""" + + +ModelContainerSpecOrDict = Union[ModelContainerSpec, ModelContainerSpecDict] + + +class AutoscalingMetricSpec(_common.BaseModel): + """The metric specification that defines the target resource utilization (CPU utilization, accelerator's duty cycle, and so on) for calculating the desired replica count.""" + + metric_name: Optional[str] = Field( + default=None, + description="""Required. The resource metric name. Supported metrics: * For Online Prediction: * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle` * `aiplatform.googleapis.com/prediction/online/cpu/utilization` * `aiplatform.googleapis.com/prediction/online/request_count` * `pubsub.googleapis.com/subscription/num_undelivered_messages` * `prometheus.googleapis.com/vertex_dcgm_fi_dev_gpu_util` * `prometheus.googleapis.com/vertex_vllm_gpu_cache_usage_perc` * `prometheus.googleapis.com/vertex_vllm_num_requests_waiting`""", + ) + monitored_resource_labels: Optional[dict[str, str]] = Field( + default=None, + description="""Optional. The Cloud Monitoring monitored resource labels as key value pairs used for metrics filtering. See Cloud Monitoring Labels https://cloud.google.com/monitoring/api/v3/metric-model#generic-label-info""", + ) + target: Optional[int] = Field( + default=None, + description="""The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.""", + ) + + +class AutoscalingMetricSpecDict(TypedDict, total=False): + """The metric specification that defines the target resource utilization (CPU utilization, accelerator's duty cycle, and so on) for calculating the desired replica count.""" + + metric_name: Optional[str] + """Required. The resource metric name. Supported metrics: * For Online Prediction: * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle` * `aiplatform.googleapis.com/prediction/online/cpu/utilization` * `aiplatform.googleapis.com/prediction/online/request_count` * `pubsub.googleapis.com/subscription/num_undelivered_messages` * `prometheus.googleapis.com/vertex_dcgm_fi_dev_gpu_util` * `prometheus.googleapis.com/vertex_vllm_gpu_cache_usage_perc` * `prometheus.googleapis.com/vertex_vllm_num_requests_waiting`""" + + monitored_resource_labels: Optional[dict[str, str]] + """Optional. The Cloud Monitoring monitored resource labels as key value pairs used for metrics filtering. See Cloud Monitoring Labels https://cloud.google.com/monitoring/api/v3/metric-model#generic-label-info""" + + target: Optional[int] + """The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided.""" + + +AutoscalingMetricSpecOrDict = Union[AutoscalingMetricSpec, AutoscalingMetricSpecDict] + + +class FlexStart(_common.BaseModel): + """FlexStart is used to schedule the deployment workload on DWS resource. It contains the max duration of the deployment.""" + + max_runtime_duration: Optional[str] = Field( + default=None, + description="""The max duration of the deployment is max_runtime_duration. The deployment will be terminated after the duration. The max_runtime_duration can be set up to 7 days.""", + ) + + +class FlexStartDict(TypedDict, total=False): + """FlexStart is used to schedule the deployment workload on DWS resource. It contains the max duration of the deployment.""" + + max_runtime_duration: Optional[str] + """The max duration of the deployment is max_runtime_duration. The deployment will be terminated after the duration. The max_runtime_duration can be set up to 7 days.""" + + +FlexStartOrDict = Union[FlexStart, FlexStartDict] + + +class DedicatedResourcesScaleToZeroSpec(_common.BaseModel): + """Specification for scale-to-zero feature.""" + + idle_scaledown_period: Optional[str] = Field( + default=None, + description="""Optional. Duration of no traffic before scaling to zero. [MinValue=300] (5 minutes) [MaxValue=28800] (8 hours)""", + ) + min_scaleup_period: Optional[str] = Field( + default=None, + description="""Optional. Minimum duration that a deployment will be scaled up before traffic is evaluated for potential scale-down. [MinValue=300] (5 minutes) [MaxValue=28800] (8 hours)""", + ) + + +class DedicatedResourcesScaleToZeroSpecDict(TypedDict, total=False): + """Specification for scale-to-zero feature.""" + + idle_scaledown_period: Optional[str] + """Optional. Duration of no traffic before scaling to zero. [MinValue=300] (5 minutes) [MaxValue=28800] (8 hours)""" + + min_scaleup_period: Optional[str] + """Optional. Minimum duration that a deployment will be scaled up before traffic is evaluated for potential scale-down. [MinValue=300] (5 minutes) [MaxValue=28800] (8 hours)""" + + +DedicatedResourcesScaleToZeroSpecOrDict = Union[ + DedicatedResourcesScaleToZeroSpec, DedicatedResourcesScaleToZeroSpecDict +] + + +class DedicatedResources(_common.BaseModel): + """A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration.""" + + autoscaling_metric_specs: Optional[list[AutoscalingMetricSpec]] = Field( + default=None, + description="""Immutable. The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and autoscaling_metric_specs.target to `80`.""", + ) + flex_start: Optional[FlexStart] = Field( + default=None, + description="""Optional. Immutable. If set, use DWS resource to schedule the deployment workload. reference: (https://cloud.google.com/blog/products/compute/introducing-dynamic-workload-scheduler)""", + ) + initial_replica_count: Optional[int] = Field( + default=None, + description="""Immutable. Number of initial replicas being deployed on when scaling the workload up from zero or when creating the workload in case min_replica_count = 0. When min_replica_count > 0 (meaning that the scale-to-zero feature is not enabled), initial_replica_count should not be set. When min_replica_count = 0 (meaning that the scale-to-zero feature is enabled), initial_replica_count should be larger than zero, but no greater than max_replica_count.""", + ) + machine_spec: Optional[MachineSpec] = Field( + default=None, + description="""Required. Immutable. The specification of a single machine being used.""", + ) + max_replica_count: Optional[int] = Field( + default=None, + description="""Immutable. The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).""", + ) + min_replica_count: Optional[int] = Field( + default=None, + description="""Required. Immutable. The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.""", + ) + required_replica_count: Optional[int] = Field( + default=None, + description="""Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.""", + ) + scale_to_zero_spec: Optional[DedicatedResourcesScaleToZeroSpec] = Field( + default=None, + description="""Optional. Specification for scale-to-zero feature.""", + ) + spot: Optional[bool] = Field( + default=None, + description="""Optional. If true, schedule the deployment workload on [spot VMs](https://cloud.google.com/kubernetes-engine/docs/concepts/spot-vms).""", + ) + + +class DedicatedResourcesDict(TypedDict, total=False): + """A description of resources that are dedicated to a DeployedModel or DeployedIndex, and that need a higher degree of manual configuration.""" + + autoscaling_metric_specs: Optional[list[AutoscalingMetricSpecDict]] + """Immutable. The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and autoscaling_metric_specs.target to `80`.""" + + flex_start: Optional[FlexStartDict] + """Optional. Immutable. If set, use DWS resource to schedule the deployment workload. reference: (https://cloud.google.com/blog/products/compute/introducing-dynamic-workload-scheduler)""" + + initial_replica_count: Optional[int] + """Immutable. Number of initial replicas being deployed on when scaling the workload up from zero or when creating the workload in case min_replica_count = 0. When min_replica_count > 0 (meaning that the scale-to-zero feature is not enabled), initial_replica_count should not be set. When min_replica_count = 0 (meaning that the scale-to-zero feature is enabled), initial_replica_count should be larger than zero, but no greater than max_replica_count.""" + + machine_spec: Optional[MachineSpecDict] + """Required. Immutable. The specification of a single machine being used.""" + + max_replica_count: Optional[int] + """Immutable. The maximum number of replicas that may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale to that many replicas is guaranteed (barring service outages). If traffic increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for (max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type).""" + + min_replica_count: Optional[int] + """Required. Immutable. The minimum number of machine replicas that will be always deployed on. This value must be greater than or equal to 1. If traffic increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed.""" + + required_replica_count: Optional[int] + """Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial deployment/mutation is desired. If set, the deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. If not set, the default required_replica_count will be min_replica_count.""" + + scale_to_zero_spec: Optional[DedicatedResourcesScaleToZeroSpecDict] + """Optional. Specification for scale-to-zero feature.""" + + spot: Optional[bool] + """Optional. If true, schedule the deployment workload on [spot VMs](https://cloud.google.com/kubernetes-engine/docs/concepts/spot-vms).""" + + +DedicatedResourcesOrDict = Union[DedicatedResources, DedicatedResourcesDict] + + +class PublisherModelCallToActionDeployDeployMetadata(_common.BaseModel): + """Metadata information about the deployment for managing deployment config.""" + + labels: Optional[dict[str, str]] = Field( + default=None, + description="""Optional. Labels for the deployment config. For managing deployment config like verifying, source of deployment config, etc.""", + ) + sample_request: Optional[str] = Field( + default=None, description="""Optional. Sample request for deployed endpoint.""" + ) + + +class PublisherModelCallToActionDeployDeployMetadataDict(TypedDict, total=False): + """Metadata information about the deployment for managing deployment config.""" + + labels: Optional[dict[str, str]] + """Optional. Labels for the deployment config. For managing deployment config like verifying, source of deployment config, etc.""" + + sample_request: Optional[str] + """Optional. Sample request for deployed endpoint.""" + + +PublisherModelCallToActionDeployDeployMetadataOrDict = Union[ + PublisherModelCallToActionDeployDeployMetadata, + PublisherModelCallToActionDeployDeployMetadataDict, +] + + +class LargeModelReference(_common.BaseModel): + """Contains information about the Large Model.""" + + name: Optional[str] = Field( + default=None, + description="""Required. The unique name of the large Foundation or pre-built model. Like "chat-bison", "text-bison". Or model name with version ID, like "chat-bison@001", "text-bison@005", etc.""", + ) + + +class LargeModelReferenceDict(TypedDict, total=False): + """Contains information about the Large Model.""" + + name: Optional[str] + """Required. The unique name of the large Foundation or pre-built model. Like "chat-bison", "text-bison". Or model name with version ID, like "chat-bison@001", "text-bison@005", etc.""" + + +LargeModelReferenceOrDict = Union[LargeModelReference, LargeModelReferenceDict] + + +class PublisherModelCallToActionDeploy(_common.BaseModel): + """Model metadata that is needed for UploadModel or DeployModel/CreateEndpoint requests.""" + + artifact_uri: Optional[str] = Field( + default=None, + description="""Optional. The path to the directory containing the Model artifact and any of its supporting files.""", + ) + automatic_resources: Optional[AutomaticResources] = Field( + default=None, + description="""A description of resources that to large degree are decided by Vertex AI, and require only a modest additional configuration.""", + ) + container_spec: Optional[ModelContainerSpec] = Field( + default=None, + description="""Optional. The specification of the container that is to be used when deploying this Model in Vertex AI. Not present for Large Models.""", + ) + dedicated_resources: Optional[DedicatedResources] = Field( + default=None, + description="""A description of resources that are dedicated to the DeployedModel, and that need a higher degree of manual configuration.""", + ) + deploy_metadata: Optional[PublisherModelCallToActionDeployDeployMetadata] = Field( + default=None, + description="""Optional. Metadata information about this deployment config.""", + ) + deploy_task_name: Optional[str] = Field( + default=None, + description="""Optional. The name of the deploy task (e.g., "text to image generation").""", + ) + large_model_reference: Optional[LargeModelReference] = Field( + default=None, + description="""Optional. Large model reference. When this is set, model_artifact_spec is not needed.""", + ) + model_display_name: Optional[str] = Field( + default=None, description="""Optional. Default model display name.""" + ) + public_artifact_uri: Optional[str] = Field( + default=None, + description="""Optional. The signed URI for ephemeral Cloud Storage access to model artifact.""", + ) + shared_resources: Optional[str] = Field( + default=None, + description="""The resource name of the shared DeploymentResourcePool to deploy on. Format: `projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}`""", + ) + title: Optional[str] = Field( + default=None, + description="""Required. The title of the regional resource reference.""", + ) + + +class PublisherModelCallToActionDeployDict(TypedDict, total=False): + """Model metadata that is needed for UploadModel or DeployModel/CreateEndpoint requests.""" + + artifact_uri: Optional[str] + """Optional. The path to the directory containing the Model artifact and any of its supporting files.""" + + automatic_resources: Optional[AutomaticResourcesDict] + """A description of resources that to large degree are decided by Vertex AI, and require only a modest additional configuration.""" + + container_spec: Optional[ModelContainerSpecDict] + """Optional. The specification of the container that is to be used when deploying this Model in Vertex AI. Not present for Large Models.""" + + dedicated_resources: Optional[DedicatedResourcesDict] + """A description of resources that are dedicated to the DeployedModel, and that need a higher degree of manual configuration.""" + + deploy_metadata: Optional[PublisherModelCallToActionDeployDeployMetadataDict] + """Optional. Metadata information about this deployment config.""" + + deploy_task_name: Optional[str] + """Optional. The name of the deploy task (e.g., "text to image generation").""" + + large_model_reference: Optional[LargeModelReferenceDict] + """Optional. Large model reference. When this is set, model_artifact_spec is not needed.""" + + model_display_name: Optional[str] + """Optional. Default model display name.""" + + public_artifact_uri: Optional[str] + """Optional. The signed URI for ephemeral Cloud Storage access to model artifact.""" + + shared_resources: Optional[str] + """The resource name of the shared DeploymentResourcePool to deploy on. Format: `projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}`""" + + title: Optional[str] + """Required. The title of the regional resource reference.""" + + +PublisherModelCallToActionDeployOrDict = Union[ + PublisherModelCallToActionDeploy, PublisherModelCallToActionDeployDict +] + + +class PublisherModelCallToActionDeployGke(_common.BaseModel): + """Configurations for PublisherModel GKE deployment""" + + gke_yaml_configs: Optional[list[str]] = Field( + default=None, + description="""Optional. GKE deployment configuration in yaml format.""", + ) + + +class PublisherModelCallToActionDeployGkeDict(TypedDict, total=False): + """Configurations for PublisherModel GKE deployment""" + + gke_yaml_configs: Optional[list[str]] + """Optional. GKE deployment configuration in yaml format.""" + + +PublisherModelCallToActionDeployGkeOrDict = Union[ + PublisherModelCallToActionDeployGke, PublisherModelCallToActionDeployGkeDict +] + + +class PublisherModelCallToActionDeployVertex(_common.BaseModel): + """Multiple setups to deploy the PublisherModel.""" + + multi_deploy_vertex: Optional[list[PublisherModelCallToActionDeploy]] = Field( + default=None, description="""Optional. One click deployment configurations.""" + ) + + +class PublisherModelCallToActionDeployVertexDict(TypedDict, total=False): + """Multiple setups to deploy the PublisherModel.""" + + multi_deploy_vertex: Optional[list[PublisherModelCallToActionDeployDict]] + """Optional. One click deployment configurations.""" + + +PublisherModelCallToActionDeployVertexOrDict = Union[ + PublisherModelCallToActionDeployVertex, PublisherModelCallToActionDeployVertexDict +] + + +class PublisherModelCallToActionOpenFineTuningPipelines(_common.BaseModel): + """Open fine tuning pipelines.""" + + fine_tuning_pipelines: Optional[ + list[PublisherModelCallToActionRegionalResourceReferences] + ] = Field( + default=None, + description="""Required. Regional resource references to fine tuning pipelines.""", + ) + + +class PublisherModelCallToActionOpenFineTuningPipelinesDict(TypedDict, total=False): + """Open fine tuning pipelines.""" + + fine_tuning_pipelines: Optional[ + list[PublisherModelCallToActionRegionalResourceReferencesDict] + ] + """Required. Regional resource references to fine tuning pipelines.""" + + +PublisherModelCallToActionOpenFineTuningPipelinesOrDict = Union[ + PublisherModelCallToActionOpenFineTuningPipelines, + PublisherModelCallToActionOpenFineTuningPipelinesDict, +] + + +class PublisherModelCallToActionOpenNotebooks(_common.BaseModel): + """Open notebooks.""" + + notebooks: Optional[list[PublisherModelCallToActionRegionalResourceReferences]] = ( + Field( + default=None, + description="""Required. Regional resource references to notebooks.""", + ) + ) + + +class PublisherModelCallToActionOpenNotebooksDict(TypedDict, total=False): + """Open notebooks.""" + + notebooks: Optional[list[PublisherModelCallToActionRegionalResourceReferencesDict]] + """Required. Regional resource references to notebooks.""" + + +PublisherModelCallToActionOpenNotebooksOrDict = Union[ + PublisherModelCallToActionOpenNotebooks, PublisherModelCallToActionOpenNotebooksDict +] + + +class PublisherModelDocumentation(_common.BaseModel): + """A named piece of documentation.""" + + content: Optional[str] = Field( + default=None, + description="""Required. Content of this piece of document (in Markdown format).""", + ) + title: Optional[str] = Field( + default=None, + description="""Required. E.g., OVERVIEW, USE CASES, DOCUMENTATION, SDK & SAMPLES, JAVA, NODE.JS, etc..""", + ) + + +class PublisherModelDocumentationDict(TypedDict, total=False): + """A named piece of documentation.""" + + content: Optional[str] + """Required. Content of this piece of document (in Markdown format).""" + + title: Optional[str] + """Required. E.g., OVERVIEW, USE CASES, DOCUMENTATION, SDK & SAMPLES, JAVA, NODE.JS, etc..""" + + +PublisherModelDocumentationOrDict = Union[ + PublisherModelDocumentation, PublisherModelDocumentationDict +] + + +class PublisherModelCallToActionViewRestApi(_common.BaseModel): + """Rest API docs.""" + + documentations: Optional[list[PublisherModelDocumentation]] = Field( + default=None, description="""Required.""" + ) + title: Optional[str] = Field( + default=None, description="""Required. The title of the view rest API.""" + ) + + +class PublisherModelCallToActionViewRestApiDict(TypedDict, total=False): + """Rest API docs.""" + + documentations: Optional[list[PublisherModelDocumentationDict]] + """Required.""" + + title: Optional[str] + """Required. The title of the view rest API.""" + + +PublisherModelCallToActionViewRestApiOrDict = Union[ + PublisherModelCallToActionViewRestApi, PublisherModelCallToActionViewRestApiDict +] + + +class PublisherModelCallToAction(_common.BaseModel): + """Actions could take on this Publisher Model.""" + + create_application: Optional[ + PublisherModelCallToActionRegionalResourceReferences + ] = Field( + default=None, + description="""Optional. Create application using the PublisherModel.""", + ) + deploy: Optional[PublisherModelCallToActionDeploy] = Field( + default=None, + description="""Optional. Deploy the PublisherModel to Vertex Endpoint.""", + ) + deploy_gke: Optional[PublisherModelCallToActionDeployGke] = Field( + default=None, + description="""Optional. Deploy PublisherModel to Google Kubernetes Engine.""", + ) + multi_deploy_vertex: Optional[PublisherModelCallToActionDeployVertex] = Field( + default=None, + description="""Optional. Multiple setups to deploy the PublisherModel to Vertex Endpoint.""", + ) + open_evaluation_pipeline: Optional[ + PublisherModelCallToActionRegionalResourceReferences + ] = Field( + default=None, + description="""Optional. Open evaluation pipeline of the PublisherModel.""", + ) + open_fine_tuning_pipeline: Optional[ + PublisherModelCallToActionRegionalResourceReferences + ] = Field( + default=None, + description="""Optional. Open fine-tuning pipeline of the PublisherModel.""", + ) + open_fine_tuning_pipelines: Optional[ + PublisherModelCallToActionOpenFineTuningPipelines + ] = Field( + default=None, + description="""Optional. Open fine-tuning pipelines of the PublisherModel.""", + ) + open_generation_ai_studio: Optional[ + PublisherModelCallToActionRegionalResourceReferences + ] = Field(default=None, description="""Optional. Open in Generation AI Studio.""") + open_genie: Optional[PublisherModelCallToActionRegionalResourceReferences] = Field( + default=None, description="""Optional. Open Genie / Playground.""" + ) + open_notebook: Optional[PublisherModelCallToActionRegionalResourceReferences] = ( + Field( + default=None, + description="""Optional. Open notebook of the PublisherModel.""", + ) + ) + open_notebooks: Optional[PublisherModelCallToActionOpenNotebooks] = Field( + default=None, description="""Optional. Open notebooks of the PublisherModel.""" + ) + open_prompt_tuning_pipeline: Optional[ + PublisherModelCallToActionRegionalResourceReferences + ] = Field( + default=None, + description="""Optional. Open prompt-tuning pipeline of the PublisherModel.""", + ) + request_access: Optional[PublisherModelCallToActionRegionalResourceReferences] = ( + Field(default=None, description="""Optional. Request for access.""") + ) + view_rest_api: Optional[PublisherModelCallToActionViewRestApi] = Field( + default=None, description="""Optional. To view Rest API docs.""" + ) + + +class PublisherModelCallToActionDict(TypedDict, total=False): + """Actions could take on this Publisher Model.""" + + create_application: Optional[ + PublisherModelCallToActionRegionalResourceReferencesDict + ] + """Optional. Create application using the PublisherModel.""" + + deploy: Optional[PublisherModelCallToActionDeployDict] + """Optional. Deploy the PublisherModel to Vertex Endpoint.""" + + deploy_gke: Optional[PublisherModelCallToActionDeployGkeDict] + """Optional. Deploy PublisherModel to Google Kubernetes Engine.""" + + multi_deploy_vertex: Optional[PublisherModelCallToActionDeployVertexDict] + """Optional. Multiple setups to deploy the PublisherModel to Vertex Endpoint.""" + + open_evaluation_pipeline: Optional[ + PublisherModelCallToActionRegionalResourceReferencesDict + ] + """Optional. Open evaluation pipeline of the PublisherModel.""" + + open_fine_tuning_pipeline: Optional[ + PublisherModelCallToActionRegionalResourceReferencesDict + ] + """Optional. Open fine-tuning pipeline of the PublisherModel.""" + + open_fine_tuning_pipelines: Optional[ + PublisherModelCallToActionOpenFineTuningPipelinesDict + ] + """Optional. Open fine-tuning pipelines of the PublisherModel.""" + + open_generation_ai_studio: Optional[ + PublisherModelCallToActionRegionalResourceReferencesDict + ] + """Optional. Open in Generation AI Studio.""" + + open_genie: Optional[PublisherModelCallToActionRegionalResourceReferencesDict] + """Optional. Open Genie / Playground.""" + + open_notebook: Optional[PublisherModelCallToActionRegionalResourceReferencesDict] + """Optional. Open notebook of the PublisherModel.""" + + open_notebooks: Optional[PublisherModelCallToActionOpenNotebooksDict] + """Optional. Open notebooks of the PublisherModel.""" + + open_prompt_tuning_pipeline: Optional[ + PublisherModelCallToActionRegionalResourceReferencesDict + ] + """Optional. Open prompt-tuning pipeline of the PublisherModel.""" + + request_access: Optional[PublisherModelCallToActionRegionalResourceReferencesDict] + """Optional. Request for access.""" + + view_rest_api: Optional[PublisherModelCallToActionViewRestApiDict] + """Optional. To view Rest API docs.""" + + +PublisherModelCallToActionOrDict = Union[ + PublisherModelCallToAction, PublisherModelCallToActionDict +] + + +class PublisherModel(_common.BaseModel): + """Publisher model from Model Garden.""" + + frameworks: Optional[list[str]] = Field( + default=None, + description="""Optional. Additional information about the model's Frameworks.""", + ) + launch_stage: Optional[LaunchStage] = Field( + default=None, + description="""Optional. Indicates the launch stage of the model.""", + ) + name: Optional[str] = Field( + default=None, + description="""Output only. Identifier. The resource name of the PublisherModel.""", + ) + open_source_category: Optional[OpenSourceCategory] = Field( + default=None, + description="""Required. Indicates the open source category of the publisher model.""", + ) + parent: Optional[PublisherModelParent] = Field( + default=None, + description="""Optional. The parent that this model was customized from. E.g., Vision API, Natural Language API, LaMDA, T5, etc. Foundation models don't have parents.""", + ) + predict_schemata: Optional[PredictSchemata] = Field( + default=None, + description="""Optional. The schemata that describes formats of the PublisherModel's predictions and explanations as given and returned via PredictionService.Predict.""", + ) + publisher_model_template: Optional[str] = Field( + default=None, + description="""Optional. Output only. Immutable. Used to indicate this model has a publisher model and provide the template of the publisher model resource name.""", + ) + supported_actions: Optional[PublisherModelCallToAction] = Field( + default=None, description="""Optional. Supported call-to-action options.""" + ) + version_id: Optional[str] = Field( + default=None, + description="""Output only. Immutable. The version ID of the PublisherModel. A new version is committed when a new model version is uploaded under an existing model id. It is an auto-incrementing decimal number in string representation.""", + ) + version_state: Optional[VersionState] = Field( + default=None, + description="""Optional. Indicates the state of the model version.""", + ) + + +class PublisherModelDict(TypedDict, total=False): + """Publisher model from Model Garden.""" + + frameworks: Optional[list[str]] + """Optional. Additional information about the model's Frameworks.""" + + launch_stage: Optional[LaunchStage] + """Optional. Indicates the launch stage of the model.""" + + name: Optional[str] + """Output only. Identifier. The resource name of the PublisherModel.""" + + open_source_category: Optional[OpenSourceCategory] + """Required. Indicates the open source category of the publisher model.""" + + parent: Optional[PublisherModelParentDict] + """Optional. The parent that this model was customized from. E.g., Vision API, Natural Language API, LaMDA, T5, etc. Foundation models don't have parents.""" + + predict_schemata: Optional[PredictSchemataDict] + """Optional. The schemata that describes formats of the PublisherModel's predictions and explanations as given and returned via PredictionService.Predict.""" + + publisher_model_template: Optional[str] + """Optional. Output only. Immutable. Used to indicate this model has a publisher model and provide the template of the publisher model resource name.""" + + supported_actions: Optional[PublisherModelCallToActionDict] + """Optional. Supported call-to-action options.""" + + version_id: Optional[str] + """Output only. Immutable. The version ID of the PublisherModel. A new version is committed when a new model version is uploaded under an existing model id. It is an auto-incrementing decimal number in string representation.""" + + version_state: Optional[VersionState] + """Optional. Indicates the state of the model version.""" + + +PublisherModelOrDict = Union[PublisherModel, PublisherModelDict] + + +class ListPublisherModelsResponse(_common.BaseModel): + """Response for listing publisher models.""" + + sdk_http_response: Optional[genai_types.HttpResponse] = Field( + default=None, description="""Used to retain the full HTTP response.""" + ) + next_page_token: Optional[str] = Field( + default=None, + description="""A token to retrieve next page of results. Pass to ListPublisherModels.page_token to obtain that page.""", + ) + publisher_models: Optional[list[PublisherModel]] = Field( + default=None, description="""List of PublisherModels in the requested page.""" + ) + + +class ListPublisherModelsResponseDict(TypedDict, total=False): + """Response for listing publisher models.""" + + sdk_http_response: Optional[genai_types.HttpResponseDict] + """Used to retain the full HTTP response.""" + + next_page_token: Optional[str] + """A token to retrieve next page of results. Pass to ListPublisherModels.page_token to obtain that page.""" + + publisher_models: Optional[list[PublisherModelDict]] + """List of PublisherModels in the requested page.""" + + +ListPublisherModelsResponseOrDict = Union[ + ListPublisherModelsResponse, ListPublisherModelsResponseDict +] + + +class PromptOptimizerConfig(_common.BaseModel): + """VAPO Prompt Optimizer Config.""" + + config_path: Optional[str] = Field( + default=None, + description="""The gcs path to the config file, e.g. gs://bucket/config.json.""", + ) + service_account: Optional[str] = Field( + default=None, + description="""The service account to use for the custom job. Cannot be provided at the same time as service_account_project_number.""", + ) + service_account_project_number: Optional[Union[int, str]] = Field( + default=None, + description="""The project number used to construct the default service account:{service_account_project_number}-compute@developer.gserviceaccount.comCannot be provided at the same time as "service_account".""", + ) + wait_for_completion: Optional[bool] = Field( + default=True, + description="""Whether to wait for the job tocomplete. Ignored for async jobs.""", + ) + optimizer_job_display_name: Optional[str] = Field( + default=None, + description="""The display name of the optimization job. If not provided, a display name in the format of "vapo-optimizer-{timestamp}" will be used.""", + ) + + +class PromptOptimizerConfigDict(TypedDict, total=False): + """VAPO Prompt Optimizer Config.""" + + config_path: Optional[str] + """The gcs path to the config file, e.g. gs://bucket/config.json.""" + + service_account: Optional[str] + """The service account to use for the custom job. Cannot be provided at the same time as service_account_project_number.""" + + service_account_project_number: Optional[Union[int, str]] + """The project number used to construct the default service account:{service_account_project_number}-compute@developer.gserviceaccount.comCannot be provided at the same time as "service_account".""" + + wait_for_completion: Optional[bool] + """Whether to wait for the job tocomplete. Ignored for async jobs.""" + + optimizer_job_display_name: Optional[str] + """The display name of the optimization job. If not provided, a display name in the format of "vapo-optimizer-{timestamp}" will be used.""" + + +PromptOptimizerConfigOrDict = Union[PromptOptimizerConfig, PromptOptimizerConfigDict] + + +class OptimizeResponse(_common.BaseModel): + """Response for the optimize_prompt method.""" + + raw_text_response: Optional[str] = Field(default=None, description="""""") + parsed_response: Optional["ParsedResponseUnion"] = Field( + default=None, description="""""" + ) + + +class OptimizeResponseDict(TypedDict, total=False): + """Response for the optimize_prompt method.""" + + raw_text_response: Optional[str] + """""" + + parsed_response: Optional["ParsedResponseUnionDict"] + """""" + + +OptimizeResponseOrDict = Union[OptimizeResponse, OptimizeResponseDict] + + +class ContentMapContents(_common.BaseModel): + """Map of placeholder in metric prompt template to contents of model input.""" + + contents: Optional[list[genai_types.Content]] = Field( + default=None, description="""Contents of the model input.""" + ) + + +class ContentMapContentsDict(TypedDict, total=False): + """Map of placeholder in metric prompt template to contents of model input.""" + + contents: Optional[list[genai_types.ContentDict]] + """Contents of the model input.""" + + +ContentMapContentsOrDict = Union[ContentMapContents, ContentMapContentsDict] + + +class EvaluateMethodConfig(_common.BaseModel): + """Optional parameters for the evaluate method.""" + + http_options: Optional[genai_types.HttpOptions] = Field( + default=None, description="""Used to override HTTP request options.""" + ) + dataset_schema: Optional[Literal["GEMINI", "FLATTEN", "OPENAI"]] = Field( + default=None, + description="""The schema to use for the dataset. + If not specified, the dataset schema will be inferred from the first + example in the dataset.""", + ) + dest: Optional[str] = Field( + default=None, description="""The destination path for the evaluation results.""" + ) + evaluation_service_qps: Optional[float] = Field( + default=None, + description="""The rate limit (queries per second) for calls to the + evaluation service. Defaults to 10. Increase this value if your + project has a higher EvaluateInstances API quota.""", + ) + + +class EvaluateMethodConfigDict(TypedDict, total=False): + """Optional parameters for the evaluate method.""" + + http_options: Optional[genai_types.HttpOptionsDict] + """Used to override HTTP request options.""" + + dataset_schema: Optional[Literal["GEMINI", "FLATTEN", "OPENAI"]] + """The schema to use for the dataset. + If not specified, the dataset schema will be inferred from the first + example in the dataset.""" + + dest: Optional[str] + """The destination path for the evaluation results.""" + + evaluation_service_qps: Optional[float] + """The rate limit (queries per second) for calls to the + evaluation service. Defaults to 10. Increase this value if your + project has a higher EvaluateInstances API quota.""" + + +EvaluateMethodConfigOrDict = Union[EvaluateMethodConfig, EvaluateMethodConfigDict] + + +class EvaluateDatasetConfig(_common.BaseModel): + """Config for evaluate instances.""" + + http_options: Optional[genai_types.HttpOptions] = Field( + default=None, description="""Used to override HTTP request options.""" + ) + + +class EvaluateDatasetConfigDict(TypedDict, total=False): + """Config for evaluate instances.""" + + http_options: Optional[genai_types.HttpOptionsDict] + """Used to override HTTP request options.""" + + +EvaluateDatasetConfigOrDict = Union[EvaluateDatasetConfig, EvaluateDatasetConfigDict] + + +class EvaluateDatasetOperation(_common.BaseModel): + + name: Optional[str] = Field( + default=None, + description="""The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the `name` should be a resource name ending with `operations/{unique_id}`.""", + ) + metadata: Optional[dict[str, Any]] = Field( + default=None, + description="""Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.""", + ) + done: Optional[bool] = Field( + default=None, + description="""If the value is `false`, it means the operation is still in progress. If `true`, the operation is completed, and either `error` or `response` is available.""", + ) + error: Optional[dict[str, Any]] = Field( + default=None, + description="""The error result of the operation in case of failure or cancellation.""", + ) + response: Optional[EvaluationDataset] = Field(default=None, description="""""") + + +class EvaluateDatasetOperationDict(TypedDict, total=False): + + name: Optional[str] + """The server-assigned name, which is only unique within the same service that originally returns it. If you use the default HTTP mapping, the `name` should be a resource name ending with `operations/{unique_id}`.""" + + metadata: Optional[dict[str, Any]] + """Service-specific metadata associated with the operation. It typically contains progress information and common metadata such as create time. Some services might not provide such metadata. Any method that returns a long-running operation should document the metadata type, if any.""" + + done: Optional[bool] + """If the value is `false`, it means the operation is still in progress. If `true`, the operation is completed, and either `error` or `response` is available.""" + + error: Optional[dict[str, Any]] + """The error result of the operation in case of failure or cancellation.""" + + response: Optional[EvaluationDatasetDict] + """""" + + +EvaluateDatasetOperationOrDict = Union[ + EvaluateDatasetOperation, EvaluateDatasetOperationDict +] + + +class EvaluateDatasetRequestParameters(_common.BaseModel): + """Parameters for batch dataset evaluation.""" + + dataset: Optional[EvaluationDataset] = Field(default=None, description="""""") + metrics: Optional[list[Metric]] = Field(default=None, description="""""") + output_config: Optional[genai_types.OutputConfig] = Field( + default=None, description="""""" + ) + autorater_config: Optional[genai_types.AutoraterConfig] = Field( + default=None, + description="""Autorater config used for evaluation. Not applicable for predefined metrics (PredefinedMetricSpec); the server uses its own model configuration for predefined metrics and this field is ignored.""", + ) + config: Optional[EvaluateDatasetConfig] = Field(default=None, description="""""") + + +class EvaluateDatasetRequestParametersDict(TypedDict, total=False): + """Parameters for batch dataset evaluation.""" + + dataset: Optional[EvaluationDatasetDict] + """""" + + metrics: Optional[list[MetricDict]] + """""" + + output_config: Optional[genai_types.OutputConfigDict] + """""" + + autorater_config: Optional[genai_types.AutoraterConfigDict] + """Autorater config used for evaluation. Not applicable for predefined metrics (PredefinedMetricSpec); the server uses its own model configuration for predefined metrics and this field is ignored.""" + + config: Optional[EvaluateDatasetConfigDict] + """""" + + +EvaluateDatasetRequestParametersOrDict = Union[ + EvaluateDatasetRequestParameters, EvaluateDatasetRequestParametersDict +] + + +class ObservabilityEvalCase(_common.BaseModel): + """A single evaluation case instance for data stored in GCP Observability.""" + + input_src: Optional[str] = Field( + default=None, + description="""String containing the GCS reference to the GenAI input content.""", + ) + output_src: Optional[str] = Field( + default=None, + description="""String containing the GCS reference to the GenAI response content.""", + ) + system_instruction_src: Optional[str] = Field( + default=None, + description="""An optional string containing the GCS reference to the GenAI system instruction.""", + ) + api_client: Optional[Any] = Field( + default=None, description="""The underlying API client.""" + ) + + +class ObservabilityEvalCaseDict(TypedDict, total=False): + """A single evaluation case instance for data stored in GCP Observability.""" + + input_src: Optional[str] + """String containing the GCS reference to the GenAI input content.""" + + output_src: Optional[str] + """String containing the GCS reference to the GenAI response content.""" + + system_instruction_src: Optional[str] + """An optional string containing the GCS reference to the GenAI system instruction.""" + + api_client: Optional[Any] + """The underlying API client.""" + + +ObservabilityEvalCaseOrDict = Union[ObservabilityEvalCase, ObservabilityEvalCaseDict] + + +class RubricGroup(_common.BaseModel): + """A group of rubrics. + + Used for grouping rubrics based on a metric or a version. + """ + + group_id: Optional[str] = Field( + default=None, description="""Unique identifier for the group.""" + ) + display_name: Optional[str] = Field( + default=None, + description="""Human-readable name for the group. This should be unique + within a given context if used for display or selection. + Example: "Instruction Following V1", "Content Quality - Summarization + Task".""", + ) + rubrics: Optional[list[evals_types.Rubric]] = Field( + default=None, description="""Rubrics that are part of this group.""" + ) + + +class RubricGroupDict(TypedDict, total=False): + """A group of rubrics. + + Used for grouping rubrics based on a metric or a version. + """ + + group_id: Optional[str] + """Unique identifier for the group.""" + + display_name: Optional[str] + """Human-readable name for the group. This should be unique + within a given context if used for display or selection. + Example: "Instruction Following V1", "Content Quality - Summarization + Task".""" + + rubrics: Optional[list[evals_types.Rubric]] + """Rubrics that are part of this group.""" RubricGroupOrDict = Union[RubricGroup, RubricGroupDict] @@ -24401,3 +25713,55 @@ class AgentEngineRuntimeRevisionDict(TypedDict, total=False): AgentEngineRuntimeRevisionOrDict = Union[ AgentEngineRuntimeRevision, AgentEngineRuntimeRevisionDict ] + + +class ListDeployableModelsConfig(_common.BaseModel): + """Config for listing deployable models.""" + + include_hugging_face_models: Optional[bool] = Field( + default=None, description="""Whether to list Hugging Face models.""" + ) + model_filter: Optional[str] = Field( + default=None, description="""Optional. A string to filter the models by.""" + ) + + +class ListDeployableModelsConfigDict(TypedDict, total=False): + """Config for listing deployable models.""" + + include_hugging_face_models: Optional[bool] + """Whether to list Hugging Face models.""" + + model_filter: Optional[str] + """Optional. A string to filter the models by.""" + + +ListDeployableModelsConfigOrDict = Union[ + ListDeployableModelsConfig, ListDeployableModelsConfigDict +] + + +class ListModelGardenModelsConfig(_common.BaseModel): + """Config for listing Model Garden models.""" + + include_hugging_face_models: Optional[bool] = Field( + default=None, description="""Whether to list Hugging Face models.""" + ) + model_filter: Optional[str] = Field( + default=None, description="""Optional. A string to filter the models by.""" + ) + + +class ListModelGardenModelsConfigDict(TypedDict, total=False): + """Config for listing Model Garden models.""" + + include_hugging_face_models: Optional[bool] + """Whether to list Hugging Face models.""" + + model_filter: Optional[str] + """Optional. A string to filter the models by.""" + + +ListModelGardenModelsConfigOrDict = Union[ + ListModelGardenModelsConfig, ListModelGardenModelsConfigDict +] diff --git a/tests/unit/agentplatform/genai/replays/conftest.py b/tests/unit/agentplatform/genai/replays/conftest.py index b3adc265cd..1e63b63864 100644 --- a/tests/unit/agentplatform/genai/replays/conftest.py +++ b/tests/unit/agentplatform/genai/replays/conftest.py @@ -272,10 +272,15 @@ def client(use_vertex, replays_prefix, http_options, request): if http_options.headers is None: http_options.headers = {} + project = os.environ.get("GOOGLE_CLOUD_PROJECT") + location = os.environ.get("GOOGLE_CLOUD_LOCATION") + replay_client = _replay_api_client.ReplayApiClient( mode=mode, replay_id=replay_id, vertexai=use_vertex, + project=project, + location=location, http_options=http_options, ) diff --git a/tests/unit/agentplatform/genai/replays/test_model_garden.py b/tests/unit/agentplatform/genai/replays/test_model_garden.py new file mode 100644 index 0000000000..b0f5b97958 --- /dev/null +++ b/tests/unit/agentplatform/genai/replays/test_model_garden.py @@ -0,0 +1,82 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# pylint: disable=protected-access,bad-continuation,missing-function-docstring + +from agentplatform._genai import types +from tests.unit.agentplatform.genai.replays import pytest_helper +import pytest + + +def test_list_deployable_models(client): + """Tests listing deployable models in Model Garden.""" + models = client.model_garden.list_deployable_models( + config=types.ListDeployableModelsConfig( + include_hugging_face_models=False, + model_filter="timesfm", + ) + ) + assert len(models) > 0 + # Returns formatted strings like 'google/timesfm@timesfm-2.0' + assert isinstance(models[0], str) + assert "timesfm" in models[0].lower() + + +def test_list_models(client): + """Tests listing all baseline models in Model Garden.""" + models = client.model_garden.list_models( + config=types.ListModelGardenModelsConfig( + include_hugging_face_models=False, + model_filter="timesfm", + ) + ) + assert len(models) > 0 + assert isinstance(models[0], str) + assert "timesfm" in models[0].lower() + + +pytestmark = pytest_helper.setup( + file=__file__, + globals_for_file=globals(), +) + +pytest_plugins = ("pytest_asyncio",) + + +@pytest.mark.asyncio +async def test_list_deployable_models_async(client): + """Tests listing deployable models asynchronously.""" + models = await client.aio.model_garden.list_deployable_models( + config=types.ListDeployableModelsConfig( + include_hugging_face_models=False, + model_filter="timesfm", + ) + ) + assert len(models) > 0 + assert isinstance(models[0], str) + assert "timesfm" in models[0].lower() + + +@pytest.mark.asyncio +async def test_list_models_async(client): + """Tests listing all baseline models asynchronously.""" + models = await client.aio.model_garden.list_models( + config=types.ListModelGardenModelsConfig( + include_hugging_face_models=False, + model_filter="timesfm", + ) + ) + assert len(models) > 0 + assert isinstance(models[0], str) + assert "timesfm" in models[0].lower() diff --git a/tests/unit/agentplatform/genai/test_model_garden.py b/tests/unit/agentplatform/genai/test_model_garden.py new file mode 100644 index 0000000000..01d4b31bff --- /dev/null +++ b/tests/unit/agentplatform/genai/test_model_garden.py @@ -0,0 +1,409 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# pylint: disable=protected-access,bad-continuation,missing-function-docstring + +from unittest import mock +from agentplatform._genai import model_garden +from agentplatform._genai import types +from google.genai import client +import pytest + +_TEST_PROJECT = "test-project" +_TEST_LOCATION = "us-central1" + + +@pytest.fixture +def mock_client(): + mock_api_client = mock.Mock(spec=client.Client) + mock_api_client.project = _TEST_PROJECT + mock_api_client.location = _TEST_LOCATION + mock_api_client.vertexai = True + + return model_garden.ModelGarden(mock_api_client) + + +def _make_deployable_model(name, version_id="001"): + """Helper to create a PublisherModel with multi_deploy_vertex support.""" + return types.PublisherModel( + name=name, + version_id=version_id, + supported_actions={ + "multi_deploy_vertex": { + "multi_deploy_vertex": [{"deploy_task_name": "test-deploy"}] + } + }, + ) + + +def test_list_deployable_models_filters_hf(mock_client): + """Tests that HF models are filtered out when include_hugging_face_models=False.""" + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + _make_deployable_model("publishers/google/models/gemma-2b"), + _make_deployable_model("publishers/hf-google/models/gemma-2b"), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ) as mock_list: + models = mock_client.list_deployable_models( + config=types.ListDeployableModelsConfig(include_hugging_face_models=False) + ) + + mock_list.assert_called_once() + assert len(models) == 1 + assert models[0] == "google/gemma-2b@001" + + +def test_list_deployable_models_filters_non_deployable(mock_client): + """Tests that models without multi_deploy_vertex are filtered out.""" + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + _make_deployable_model("publishers/google/models/gemma-2b"), + # This model has no deploy config + types.PublisherModel( + name="publishers/google/models/no-deploy", version_id="001" + ), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_deployable_models( + config=types.ListDeployableModelsConfig(include_hugging_face_models=False) + ) + + assert len(models) == 1 + assert models[0] == "google/gemma-2b@001" + + +def test_list_deployable_models_with_hf(mock_client): + """Tests format when include_hugging_face_models=True: no @version suffix.""" + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + _make_deployable_model("publishers/google/models/gemma-2b"), + _make_deployable_model( + "publishers/hf-meta/models/llama-3", version_id="llama-3" + ), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_deployable_models( + config=types.ListDeployableModelsConfig(include_hugging_face_models=True) + ) + + assert len(models) == 2 + # When include_hugging_face_models=True, no @version for any model + assert models[0] == "google/gemma-2b" + assert models[1] == "meta/llama-3" + + +def test_list_deployable_models_default_config(mock_client): + """Tests list_deployable_models with config=None uses defaults.""" + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + _make_deployable_model("publishers/google/models/gemma-2b"), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_deployable_models() + + assert len(models) == 1 + assert models[0] == "google/gemma-2b@001" + + +def test_list_deployable_models_dict_config(mock_client): + """Tests list_deployable_models with config passed as a dict.""" + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + _make_deployable_model("publishers/google/models/gemma-2b"), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_deployable_models( + config={"include_hugging_face_models": False} + ) + + assert len(models) == 1 + assert models[0] == "google/gemma-2b@001" + + +def test_list_deployable_models_empty_response(mock_client): + """Tests list_deployable_models with no models returned.""" + dummy_response = types.ListPublisherModelsResponse(publisher_models=[]) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_deployable_models() + + assert models == [] + + +def test_list_deployable_models_filter_string(mock_client): + """Tests that model_filter is passed to the API filter.""" + dummy_response = types.ListPublisherModelsResponse(publisher_models=[]) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ) as mock_list: + mock_client.list_deployable_models( + config=types.ListDeployableModelsConfig(model_filter="gemma") + ) + + call_args = mock_list.call_args + api_config = call_args.kwargs.get("config") or call_args[1].get("config") + assert "gemma" in api_config.filter + assert "is_hf_wildcard(false)" in api_config.filter + # VERIFIED_DEPLOYMENT_SUCCEED only added when include_hf=True + # When include_hf=False, deploy filtering is done client-side + # via _has_deploy_config + + +def test_list_deployable_models_hf_filter_string(mock_client): + """Tests that HF deployable filter includes VERIFIED_DEPLOYMENT_SUCCEED.""" + dummy_response = types.ListPublisherModelsResponse(publisher_models=[]) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ) as mock_list: + mock_client.list_deployable_models( + config=types.ListDeployableModelsConfig( + include_hugging_face_models=True, model_filter="gemma" + ) + ) + + call_args = mock_list.call_args + api_config = call_args.kwargs.get("config") or call_args[1].get("config") + assert "gemma" in api_config.filter + assert "is_hf_wildcard(true)" in api_config.filter + assert "VERIFIED_DEPLOYMENT_SUCCEED" in api_config.filter + + +# ---- list_models tests ---- + + +def test_list_models_filters_hf(mock_client): + """Tests that HF models are filtered out when include_hugging_face_models=False.""" + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + types.PublisherModel( + name="publishers/google/models/gemma-2b", version_id="001" + ), + types.PublisherModel( + name="publishers/hf-meta/models/llama-3", version_id="llama-3" + ), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_models( + config=types.ListModelGardenModelsConfig( + include_hugging_face_models=False + ) + ) + + assert len(models) == 1 + assert models[0] == "google/gemma-2b@001" + + +def test_list_models_with_hf(mock_client): + """Tests list_models with include_hugging_face_models=True.""" + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + types.PublisherModel( + name="publishers/google/models/gemma-2b", version_id="001" + ), + types.PublisherModel( + name="publishers/hf-meta/models/llama-3", version_id="llama-3" + ), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_models( + config=types.ListModelGardenModelsConfig( + include_hugging_face_models=True + ) + ) + + assert len(models) == 2 + assert models[0] == "google/gemma-2b" + assert models[1] == "meta/llama-3" + + +def test_list_models_includes_non_deployable(mock_client): + """Tests that list_models includes models without deploy configs. + + Unlike list_deployable_models, list_models should return ALL models + regardless of whether they have multi_deploy_vertex configurations. + """ + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + _make_deployable_model("publishers/google/models/gemma-2b"), + # This model has no deploy config -- should still be included + types.PublisherModel( + name="publishers/google/models/bert-base", version_id="001" + ), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_models( + config=types.ListModelGardenModelsConfig( + include_hugging_face_models=False + ) + ) + + assert len(models) == 2 + assert models[0] == "google/gemma-2b@001" + assert models[1] == "google/bert-base@001" + + +def test_list_models_default_config(mock_client): + """Tests list_models with config=None uses defaults.""" + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + types.PublisherModel( + name="publishers/google/models/gemma-2b", version_id="001" + ), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_models() + + assert len(models) == 1 + assert models[0] == "google/gemma-2b@001" + + +def test_list_models_dict_config(mock_client): + """Tests list_models with config passed as a dict.""" + dummy_response = types.ListPublisherModelsResponse( + publisher_models=[ + types.PublisherModel( + name="publishers/google/models/gemma-2b", version_id="001" + ), + ] + ) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_models( + config={"include_hugging_face_models": False, "model_filter": "gemma"} + ) + + assert len(models) == 1 + assert models[0] == "google/gemma-2b@001" + + +def test_list_models_empty_response(mock_client): + """Tests list_models with no models returned.""" + dummy_response = types.ListPublisherModelsResponse(publisher_models=[]) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ): + models = mock_client.list_models() + + assert models == [] + + +def test_list_models_filter_string_no_deploy_filter(mock_client): + """Tests that list_models does NOT add VERIFIED_DEPLOYMENT filter.""" + dummy_response = types.ListPublisherModelsResponse(publisher_models=[]) + + with mock.patch.object( + mock_client, "_list_publisher_models", return_value=dummy_response + ) as mock_list: + mock_client.list_models( + config=types.ListModelGardenModelsConfig(model_filter="llama") + ) + + call_args = mock_list.call_args + api_config = call_args.kwargs.get("config") or call_args[1].get("config") + assert "llama" in api_config.filter + assert "is_hf_wildcard(false)" in api_config.filter + # list_models should NOT have the deployable-only filter + assert "VERIFIED_DEPLOYMENT_SUCCEED" not in api_config.filter + + +# ---- _build_filter_str tests ---- + + +def test_build_filter_str_native_deployable(): + """Tests filter string for native deployable models.""" + result = model_garden.ModelGarden._build_filter_str( + model_filter=None, include_hugging_face_models=False, deployable_only=True + ) + assert result == "is_hf_wildcard(false)" + + +def test_build_filter_str_hf_deployable(): + """Tests filter string for HF deployable models.""" + result = model_garden.ModelGarden._build_filter_str( + model_filter=None, include_hugging_face_models=True, deployable_only=True + ) + assert "is_hf_wildcard(true)" in result + assert "VERIFIED_DEPLOYMENT_SUCCEED" in result + + +def test_build_filter_str_hf_all(): + """Tests filter string for HF all models (not deployable only).""" + result = model_garden.ModelGarden._build_filter_str( + model_filter=None, include_hugging_face_models=True, deployable_only=False + ) + assert result == "is_hf_wildcard(true)" + assert "VERIFIED_DEPLOYMENT_SUCCEED" not in result + + +def test_build_filter_str_with_model_filter(): + """Tests filter string includes model_filter substring.""" + result = model_garden.ModelGarden._build_filter_str( + model_filter="gemma", include_hugging_face_models=False, deployable_only=False + ) + assert "is_hf_wildcard(false)" in result + assert "gemma" in result + assert "model_user_id" in result + assert "display_name" in result + + +def test_build_filter_str_escapes_special_chars(): + """Tests that special regex characters in model_filter are escaped.""" + result = model_garden.ModelGarden._build_filter_str( + model_filter="model.v2+", include_hugging_face_models=False, deployable_only=False + ) + # re.escape turns '.' into '\\.' and '+' into '\\+' + assert r"model\.v2\+" in result