Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions vero/src/vero/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,10 +532,13 @@ def _ensure_budgeted_splits_tiered(self) -> None:

def _validate_budget_splits(self) -> None:
"""Warn if any budget splits do not exist in the dataset, and verify
that every agent-budgeted split is tiered ``non_viewable``.
that every agent-budgeted split is agent-evaluable.

``viewable`` would leak per-sample labels to the agent; ``no_access``
would make the budget unusable. Both are misconfigurations and raise.
``no_access`` would make the budget unusable and raises. ``viewable``
is allowed: a budget meters *compute* (each eval can be a real, paid
benchmark run), which is orthogonal to result visibility; it does mean
per-sample results reach the agent, so it is logged loudly rather than
rejected.
"""
if not self.budget:
return
Expand All @@ -558,12 +561,19 @@ def _validate_budget_splits(self) -> None:
pass

tier = resolve_split_access(b.split, self.split_accesses)
if tier != SplitAccessLevel.non_viewable:
if tier == SplitAccessLevel.no_access:
raise ValueError(
f"Agent-budgeted split '{b.split}' is tiered '{tier}', but a "
f"budgeted split must be 'non_viewable' (agent-evaluable with "
f"per-sample labels hidden). 'viewable' would leak labels and "
f"'no_access' would make the budget unusable."
f"Agent-budgeted split '{b.split}' is tiered 'no_access', "
f"which makes the budget unusable (the engine rejects agent "
f"evals of no_access splits). Tier it 'non_viewable' or "
f"'viewable', or drop the budget."
)
if tier == SplitAccessLevel.viewable:
logger.warning(
f"Agent-budgeted split '{b.split}' is tiered 'viewable': the "
f"budget meters eval compute, but per-sample results are "
f"fully visible to the agent. Use 'non_viewable' if labels "
f"must stay hidden."
)

def _maybe_make_db(self) -> ExperimentDatabase:
Expand Down
13 changes: 9 additions & 4 deletions vero/tests/test_policy_budget_splits.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,26 @@ def test_ensure_then_validate_passes_for_train_budget_default():
p._validate_budget_splits() # no raise


def test_validate_rejects_explicit_viewable_budgeted_split():
def test_validate_allows_explicit_viewable_budgeted_split(caplog):
# A budget meters *compute* (each eval can be a paid benchmark run), which
# is orthogonal to result visibility: viewable + budget is legitimate
# (e.g. a classic ML train split with per-task feedback). It warns loudly
# instead of raising.
p = _policy(
[SplitAccess.viewable("dev")],
[SplitBudget(split="dev", dataset_id="ds1", total_sample_budget=10)],
)
with pytest.raises(ValueError, match="non_viewable"):
p._validate_budget_splits()
with caplog.at_level("WARNING"):
p._validate_budget_splits() # no raise
assert any("viewable" in r.message for r in caplog.records)


def test_validate_rejects_explicit_no_access_budgeted_split():
p = _policy(
[SplitAccess.no_access("dev")],
[SplitBudget(split="dev", dataset_id="ds1", total_sample_budget=10)],
)
with pytest.raises(ValueError, match="non_viewable"):
with pytest.raises(ValueError, match="no_access"):
p._validate_budget_splits()


Expand Down