Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
311 changes: 0 additions & 311 deletions tests/unit/aiplatform/test_language_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4883,317 +4883,6 @@ def teardown_method(self):
self._job_wait_patcher.stop()
self._log_wait_patcher.stop()

@pytest.mark.usefixtures(
"get_model_with_tuned_version_label_mock",
"get_endpoint_with_models_mock",
)
@pytest.mark.parametrize(
"job_spec",
[_TEST_EVAL_PIPELINE_SPEC_JSON, _TEST_EVAL_PIPELINE_JOB],
)
@pytest.mark.parametrize(
"mock_request_urlopen_eval",
["https://us-kfp.pkg.dev/proj/repo/pack/latest"],
indirect=True,
)
def test_model_evaluation_text_generation_task_with_gcs_input(
self,
job_spec,
mock_pipeline_service_create_eval,
mock_pipeline_job_get_eval,
mock_successfully_completed_eval_job,
mock_pipeline_bucket_exists,
mock_load_yaml_and_json,
mock_request_urlopen_eval,
):
aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

with mock.patch.object(
target=model_garden_service_client.ModelGardenServiceClient,
attribute="get_publisher_model",
return_value=gca_publisher_model.PublisherModel(
_TEXT_BISON_PUBLISHER_MODEL_DICT
),
):
my_model = preview_language_models.TextGenerationModel.get_tuned_model(
test_constants.ModelConstants._TEST_MODEL_RESOURCE_NAME
)

eval_metrics = my_model.evaluate(
task_spec=preview_language_models.EvaluationTextGenerationSpec(
ground_truth_data="gs://my-bucket/ground-truth.jsonl",
),
)

assert isinstance(eval_metrics, preview_language_models.EvaluationMetric)
assert eval_metrics.bleu == _TEST_TEXT_GENERATION_METRICS["bleu"]

@pytest.mark.usefixtures(
"get_model_with_tuned_version_label_mock",
"get_endpoint_with_models_mock",
)
@pytest.mark.parametrize(
"job_spec",
[_TEST_EVAL_PIPELINE_SPEC_JSON, _TEST_EVAL_PIPELINE_JOB],
)
def test_populate_eval_template_params(
self,
job_spec,
mock_pipeline_service_create,
mock_model_evaluate,
mock_pipeline_job_get,
mock_successfully_completed_eval_job,
mock_pipeline_bucket_exists,
mock_load_yaml_and_json,
):
aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

with mock.patch.object(
target=model_garden_service_client.ModelGardenServiceClient,
attribute="get_publisher_model",
return_value=gca_publisher_model.PublisherModel(
_TEXT_BISON_PUBLISHER_MODEL_DICT
),
):
my_model = preview_language_models.TextGenerationModel.get_tuned_model(
test_constants.ModelConstants._TEST_MODEL_RESOURCE_NAME
)

task_spec = preview_language_models.EvaluationTextGenerationSpec(
ground_truth_data="gs://my-bucket/ground-truth.jsonl",
)

formatted_template_params = (
_evaluatable_language_models._populate_eval_template_params(
task_spec=task_spec, model_name=my_model._model_resource_name
)
)

assert (
"batch_predict_gcs_destination_output_uri" in formatted_template_params
)
assert "model_name" in formatted_template_params
assert "evaluation_task" in formatted_template_params

# This should only be in the classification task pipeline template
assert "evaluation_class_labels" not in formatted_template_params
assert "target_column_name" not in formatted_template_params

@pytest.mark.usefixtures(
"get_model_with_tuned_version_label_mock",
"get_endpoint_with_models_mock",
)
@pytest.mark.parametrize(
"job_spec",
[_TEST_EVAL_PIPELINE_SPEC_JSON, _TEST_EVAL_PIPELINE_JOB],
)
def test_populate_template_params_for_classification_task(
self,
job_spec,
mock_pipeline_service_create,
mock_model_evaluate,
mock_pipeline_job_get,
mock_successfully_completed_eval_job,
mock_pipeline_bucket_exists,
mock_load_yaml_and_json,
):
aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

with mock.patch.object(
target=model_garden_service_client.ModelGardenServiceClient,
attribute="get_publisher_model",
return_value=gca_publisher_model.PublisherModel(
_TEXT_BISON_PUBLISHER_MODEL_DICT
),
):
my_model = preview_language_models.TextGenerationModel.get_tuned_model(
test_constants.ModelConstants._TEST_MODEL_RESOURCE_NAME
)

task_spec = preview_language_models.EvaluationTextClassificationSpec(
ground_truth_data="gs://my-bucket/ground-truth.jsonl",
target_column_name="test_targ_name",
class_names=["test_class_name_1", "test_class_name_2"],
)

formatted_template_params = (
_evaluatable_language_models._populate_eval_template_params(
task_spec=task_spec, model_name=my_model._model_resource_name
)
)

assert "evaluation_class_labels" in formatted_template_params
assert "target_field_name" in formatted_template_params

@pytest.mark.usefixtures(
"get_model_with_tuned_version_label_mock",
"get_endpoint_with_models_mock",
"mock_storage_blob_upload_from_filename",
)
@pytest.mark.parametrize(
"job_spec",
[_TEST_EVAL_PIPELINE_SPEC_JSON, _TEST_EVAL_PIPELINE_JOB],
)
def test_populate_template_params_with_dataframe_input(
self,
job_spec,
mock_pipeline_service_create,
mock_pipeline_job_get,
mock_successfully_completed_eval_job,
mock_pipeline_bucket_exists,
mock_load_yaml_and_json,
):
aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

with mock.patch.object(
target=model_garden_service_client.ModelGardenServiceClient,
attribute="get_publisher_model",
return_value=gca_publisher_model.PublisherModel(
_TEXT_BISON_PUBLISHER_MODEL_DICT
),
):
my_model = preview_language_models.TextGenerationModel.get_tuned_model(
test_constants.ModelConstants._TEST_MODEL_RESOURCE_NAME
)

task_spec = preview_language_models.EvaluationTextGenerationSpec(
ground_truth_data=_TEST_EVAL_DATA_DF,
)

formatted_template_params = (
_evaluatable_language_models._populate_eval_template_params(
task_spec=task_spec, model_name=my_model._model_resource_name
)
)

# The utility method should not modify task_spec
assert isinstance(task_spec.ground_truth_data, pd.DataFrame)

assert (
"batch_predict_gcs_destination_output_uri" in formatted_template_params
)
assert "model_name" in formatted_template_params
assert "evaluation_task" in formatted_template_params

# This should only be in the classification task pipeline template
assert "evaluation_class_labels" not in formatted_template_params
assert "target_column_name" not in formatted_template_params

def test_evaluate_raises_on_ga_language_model(
self,
):
aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

with mock.patch.object(
target=model_garden_service_client.ModelGardenServiceClient,
attribute="get_publisher_model",
return_value=gca_publisher_model.PublisherModel(
_TEXT_BISON_PUBLISHER_MODEL_DICT
),
):
model = language_models.TextGenerationModel.from_pretrained(
"text-bison@001"
)

with pytest.raises(AttributeError):
model.evaluate()

@pytest.mark.usefixtures(
"get_endpoint_with_models_mock",
)
@pytest.mark.parametrize(
"job_spec",
[_TEST_EVAL_PIPELINE_SPEC_JSON, _TEST_EVAL_PIPELINE_JOB],
)
@pytest.mark.parametrize(
"mock_request_urlopen_eval",
["https://us-kfp.pkg.dev/proj/repo/pack/latest"],
indirect=True,
)
def test_model_evaluation_text_generation_task_on_base_model(
self,
job_spec,
mock_pipeline_service_create_eval,
mock_pipeline_job_get_eval,
mock_successfully_completed_eval_job,
mock_pipeline_bucket_exists,
mock_load_yaml_and_json,
mock_request_urlopen_eval,
):
aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

with mock.patch.object(
target=model_garden_service_client.ModelGardenServiceClient,
attribute="get_publisher_model",
return_value=gca_publisher_model.PublisherModel(
_TEXT_BISON_PUBLISHER_MODEL_DICT
),
):
my_model = preview_language_models.TextGenerationModel.from_pretrained(
"text-bison@001"
)

eval_metrics = my_model.evaluate(
task_spec=preview_language_models.EvaluationTextGenerationSpec(
ground_truth_data="gs://my-bucket/ground-truth.jsonl",
),
)

assert isinstance(eval_metrics, preview_language_models.EvaluationMetric)

@pytest.mark.usefixtures(
"get_endpoint_with_models_mock",
)
@pytest.mark.parametrize(
"job_spec",
[
_TEST_EVAL_CLASSIFICATION_PIPELINE_SPEC_JSON,
_TEST_EVAL_CLASSIFICATION_PIPELINE_JOB,
],
)
@pytest.mark.parametrize(
"mock_request_urlopen_eval_classification",
["https://us-central1-kfp.pkg.dev/proj/repo/pack/latest"],
indirect=True,
)
def test_model_evaluation_text_classification_base_model_only_summary_metrics(
self,
job_spec,
mock_pipeline_service_create_eval_classification,
mock_pipeline_job_get_eval_classification,
mock_successfully_completed_eval_classification_job,
mock_pipeline_bucket_exists,
mock_load_yaml_and_json,
mock_request_urlopen_eval_classification,
):
aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

with mock.patch.object(
target=model_garden_service_client.ModelGardenServiceClient,
attribute="get_publisher_model",
return_value=gca_publisher_model.PublisherModel(
_TEXT_BISON_PUBLISHER_MODEL_DICT
),
):
my_model = preview_language_models.TextGenerationModel.from_pretrained(
"text-bison@001"
)

eval_metrics = my_model.evaluate(
task_spec=preview_language_models.EvaluationTextClassificationSpec(
ground_truth_data="gs://my-bucket/ground-truth.jsonl",
target_column_name="test_targ_name",
class_names=["test_class_name_1", "test_class_name_2"],
)
)

assert isinstance(
eval_metrics,
preview_language_models.EvaluationClassificationMetric,
)
assert eval_metrics.confidenceMetrics is None
assert eval_metrics.auPrc == _TEST_TEXT_CLASSIFICATION_METRICS["auPrc"]

@pytest.mark.parametrize(
"job_spec",
[
Expand Down
Loading