Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions tests/unit/vertexai/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ def test_compute_pointwise_metrics(self, api_transport):
scores = list(
test_result.metrics_table["test_pointwise_metric/score"].to_list()
)
assert scores == [5, 4] or scores == [4, 5]
assert sorted(scores) == [4, 5]
assert list(
test_result.metrics_table["test_pointwise_metric/explanation"].to_list()
) == [
Expand Down Expand Up @@ -994,9 +994,9 @@ def _pointwise_side_effect(**kwargs):
assert test_result.metrics_table["prompt"].equals(
_TEST_EVAL_DATASET_ALL_INCLUDED["prompt"]
)
assert list(
test_result.metrics_table["test_pointwise_metric_str/score"].to_list()
) == [5, 4]
assert sorted(
list(test_result.metrics_table["test_pointwise_metric_str/score"].to_list())
) == [4, 5]
assert list(
test_result.metrics_table["test_pointwise_metric_str/explanation"].to_list()
) == [
Expand Down Expand Up @@ -1049,9 +1049,9 @@ def test_compute_pointwise_metrics_metric_prompt_template_example(
"summarization_quality/explanation",
]
)
assert list(
test_result.metrics_table["summarization_quality/score"].to_list()
) == [5, 4]
assert sorted(
list(test_result.metrics_table["summarization_quality/score"].to_list())
) == [4, 5]
assert list(
test_result.metrics_table["summarization_quality/explanation"].to_list()
) == [
Expand Down Expand Up @@ -1166,9 +1166,9 @@ def _summarization_side_effect(**kwargs):
"source",
]
)
assert list(
test_result.metrics_table["summarization_quality/score"].to_list()
) == [5, 4]
assert sorted(
list(test_result.metrics_table["summarization_quality/score"].to_list())
) == [4, 5]
assert list(
test_result.metrics_table["summarization_quality/explanation"].to_list()
) == [
Expand Down Expand Up @@ -1595,9 +1595,9 @@ def test_compute_multiple_metrics(self, api_transport):
== 0.5
)

assert list(
test_result.metrics_table["summarization_quality/score"].to_list()
) == [5, 4]
assert sorted(
list(test_result.metrics_table["summarization_quality/score"].to_list())
) == [4, 5]
assert list(
test_result.metrics_table["summarization_quality/explanation"].to_list()
) == [
Expand Down Expand Up @@ -1884,7 +1884,9 @@ def test_runnable_response_eval_with_runnable_inference(self, api_transport):
"coherence/explanation",
]
)
assert list(test_result.metrics_table["coherence/score"].to_list()) == [5, 4]
assert sorted(
list(test_result.metrics_table["coherence/score"].to_list())
) == [4, 5]
assert list(test_result.metrics_table["coherence/explanation"].to_list()) == [
"explanation",
"explanation",
Expand Down
Loading