scaleapi · shehabyasser-scale · Jul 3, 2026
diff --git a/vero/src/vero/policy.py b/vero/src/vero/policy.py
@@ -532,10 +532,13 @@ def _ensure_budgeted_splits_tiered(self) -> None:
 
     def _validate_budget_splits(self) -> None:
         """Warn if any budget splits do not exist in the dataset, and verify
-        that every agent-budgeted split is tiered ``non_viewable``.
+        that every agent-budgeted split is agent-evaluable.
 
-        ``viewable`` would leak per-sample labels to the agent; ``no_access``
-        would make the budget unusable. Both are misconfigurations and raise.
+        ``no_access`` would make the budget unusable and raises. ``viewable``
+        is allowed: a budget meters *compute* (each eval can be a real, paid
+        benchmark run), which is orthogonal to result visibility; it does mean
+        per-sample results reach the agent, so it is logged loudly rather than
+        rejected.
         """
         if not self.budget:
             return
@@ -558,12 +561,19 @@ def _validate_budget_splits(self) -> None:
                 pass
 
             tier = resolve_split_access(b.split, self.split_accesses)
-            if tier != SplitAccessLevel.non_viewable:
+            if tier == SplitAccessLevel.no_access:
                 raise ValueError(
-                    f"Agent-budgeted split '{b.split}' is tiered '{tier}', but a "
-                    f"budgeted split must be 'non_viewable' (agent-evaluable with "
-                    f"per-sample labels hidden). 'viewable' would leak labels and "
-                    f"'no_access' would make the budget unusable."
+                    f"Agent-budgeted split '{b.split}' is tiered 'no_access', "
+                    f"which makes the budget unusable (the engine rejects agent "
+                    f"evals of no_access splits). Tier it 'non_viewable' or "
+                    f"'viewable', or drop the budget."
+                )
+            if tier == SplitAccessLevel.viewable:
+                logger.warning(
+                    f"Agent-budgeted split '{b.split}' is tiered 'viewable': the "
+                    f"budget meters eval compute, but per-sample results are "
+                    f"fully visible to the agent. Use 'non_viewable' if labels "
+                    f"must stay hidden."
                 )
 
     def _maybe_make_db(self) -> ExperimentDatabase:

diff --git a/vero/tests/test_policy_budget_splits.py b/vero/tests/test_policy_budget_splits.py
@@ -44,21 +44,26 @@ def test_ensure_then_validate_passes_for_train_budget_default():
     p._validate_budget_splits()  # no raise
 
 
-def test_validate_rejects_explicit_viewable_budgeted_split():
+def test_validate_allows_explicit_viewable_budgeted_split(caplog):
+    # A budget meters *compute* (each eval can be a paid benchmark run), which
+    # is orthogonal to result visibility: viewable + budget is legitimate
+    # (e.g. a classic ML train split with per-task feedback). It warns loudly
+    # instead of raising.
     p = _policy(
         [SplitAccess.viewable("dev")],
         [SplitBudget(split="dev", dataset_id="ds1", total_sample_budget=10)],
     )
-    with pytest.raises(ValueError, match="non_viewable"):
-        p._validate_budget_splits()
+    with caplog.at_level("WARNING"):
+        p._validate_budget_splits()  # no raise
+    assert any("viewable" in r.message for r in caplog.records)
 
 
 def test_validate_rejects_explicit_no_access_budgeted_split():
     p = _policy(
         [SplitAccess.no_access("dev")],
         [SplitBudget(split="dev", dataset_id="ds1", total_sample_budget=10)],
     )
-    with pytest.raises(ValueError, match="non_viewable"):
+    with pytest.raises(ValueError, match="no_access"):
         p._validate_budget_splits()