Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/bosh-lite-files/create-director-override.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/sh
# Overrides bbl's generated create-director.sh so we can apply extra ops files
# to the BOSH Lite director. Mirrors the stock bosh-lite-gcp plan-patch override
# and adds director-inotify.yml (raises fs.inotify limits on the director host
# so Noble app Envoy sidecars don't crash with "inotify_fd_ >= 0").
bosh create-env \
${BBL_STATE_DIR}/bosh-deployment/bosh.yml \
--state ${BBL_STATE_DIR}/vars/bosh-state.json \
--vars-store ${BBL_STATE_DIR}/vars/director-vars-store.yml \
--vars-file ${BBL_STATE_DIR}/vars/director-vars-file.yml \
--var-file gcp_credentials_json="${BBL_GCP_SERVICE_ACCOUNT_KEY_PATH}" \
-v project_id="${BBL_GCP_PROJECT_ID}" \
-v zone="${BBL_GCP_ZONE}" \
-o ${BBL_STATE_DIR}/bosh-deployment/gcp/cpi.yml \
-o ${BBL_STATE_DIR}/bosh-deployment/jumpbox-user.yml \
-o ${BBL_STATE_DIR}/bosh-deployment/uaa.yml \
-o ${BBL_STATE_DIR}/bosh-deployment/credhub.yml \
-o ${BBL_STATE_DIR}/bosh-deployment/bosh-lite.yml \
-o ${BBL_STATE_DIR}/bosh-deployment/bosh-lite-runc.yml \
-o ${BBL_STATE_DIR}/bosh-deployment/gcp/bosh-lite-vm-type.yml \
-o ${BBL_STATE_DIR}/bosh-deployment/gcp/director-inotify.yml \
-o ${BBL_STATE_DIR}/external-ip-gcp.yml \
-o ${BBL_STATE_DIR}/ip-forwarding.yml
15 changes: 15 additions & 0 deletions .github/ops-files/bosh-dns-noble-bosh-lite.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
# BOSH DNS recursor fix for Ubuntu Noble on BOSH Lite (GCP).
# bosh-deployment's dns.yml places the Noble bosh-dns config under the
# "bosh-dns-systemd" addon (NOT the "bosh-dns" addon, which only covers
# trusty/xenial/bionic/jammy) with disable_recursors: true. That blocks
# external DNS resolution (e.g. buildpacks.cloudfoundry.org) inside diego-cell
# containers, so app staging fails with "lookup ... server misbehaving".
# Enable recursion and forward to GCP's metadata resolver.
- type: replace
path: /addons/name=bosh-dns-systemd/jobs/name=bosh-dns/properties/disable_recursors
value: false
- type: replace
path: /addons/name=bosh-dns-systemd/jobs/name=bosh-dns/properties/recursors?
value:
- 169.254.169.254
8 changes: 6 additions & 2 deletions .github/ops-files/bosh-lite-vm-type.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
---
# Configure sizes for bosh-lite on gcp
# Configure sizes for bosh-lite on gcp.
# n2-standard-16 (16 vCPU / 64 GB): the whole cf-deployment runs as warden
# containers on this single director VM; on Ubuntu Noble each container runs a
# full systemd PID 1, so 32 GB (n2-standard-8) overcommits memory and a random
# instance-group agent fails to boot ("Timed out pinging VM"). 64 GB gives headroom.
- type: replace
path: /resource_pools/name=vms/cloud_properties/machine_type
value: n2-standard-8
value: n2-standard-16
- type: replace
path: /disk_pools/name=disks/disk_size
value: 250000
Expand Down
25 changes: 25 additions & 0 deletions .github/ops-files/director-inotify.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
---
# Raise inotify limits on the BOSH Lite director VM (the host running every
# warden container). On Ubuntu Noble each warden container runs systemd as PID 1
# (start_containers_with_systemd), a heavy inotify consumer, so the host's
# default fs.inotify.max_user_instances (128) is exhausted. App Envoy sidecars
# then abort with "assert failure: inotify_fd_ >= 0" (Exit status 134), which
# marks every app instance CRASHED even though staging succeeds.
#
# inotify limits are enforced at the host root user namespace (a new userns
# defaults to unlimited and inc_ucount checks every ancestor up to root), so
# this MUST be set on the director VM, NOT on the diego-cell.
#
# The os-conf release is already declared by bosh-deployment's bosh-lite.yml
# (which create-director-override.sh applies before this file, for its
# disable_agent job), so we only add the sysctl job here. Re-declaring the
# release fails with "releases[N].name 'os-conf' must be unique".
- type: replace
path: /instance_groups/name=bosh/jobs/-
value:
name: sysctl
release: os-conf
properties:
sysctl:
- fs.inotify.max_user_instances=1024
- fs.inotify.max_user_watches=524288
12 changes: 10 additions & 2 deletions .github/workflows/create-bosh-lite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ jobs:
cp ${GITHUB_WORKSPACE}/cli/.github/bosh-lite-files/bosh-lite-dns.tf terraform/
cp ${GITHUB_WORKSPACE}/cli/.github/bosh-lite-files/bosh-lite.tfvars vars/
cp ${GITHUB_WORKSPACE}/cli/.github/ops-files/bosh-lite-vm-type.yml bosh-deployment/gcp/
cp ${GITHUB_WORKSPACE}/cli/.github/ops-files/director-inotify.yml bosh-deployment/gcp/
# Overwrite the plan-patch's stock create-director-override.sh with ours
# (bbl runs *-override.sh in preference to the generated create-director.sh)
# so the director gets director-inotify.yml during bosh create-env.
cp ${GITHUB_WORKSPACE}/cli/.github/bosh-lite-files/create-director-override.sh create-director-override.sh
chmod +x create-director-override.sh
bbl up

- name: Authenticate to Google Cloud
Expand Down Expand Up @@ -131,7 +137,9 @@ jobs:
cd $env_name/bbl-state
eval "$(bbl print-env --shell-type posix)"

bosh update-runtime-config ${GITHUB_WORKSPACE}/bosh-deployment/runtime-configs/dns.yml --name dns
bosh update-runtime-config ${GITHUB_WORKSPACE}/bosh-deployment/runtime-configs/dns.yml \
-o ${GITHUB_WORKSPACE}/cli/.github/ops-files/bosh-dns-noble-bosh-lite.yml \
--name dns
STEMCELL_VERSION=$(bosh interpolate ${GITHUB_WORKSPACE}/cf-deployment/cf-deployment.yml --path /stemcells/alias=default/version)
bosh upload-stemcell "https://bosh.io/d/stemcells/bosh-warden-boshlite-ubuntu-noble?v=${STEMCELL_VERSION}"
bosh update-cloud-config ${GITHUB_WORKSPACE}/cf-deployment/iaas-support/bosh-lite/cloud-config.yml
Expand Down Expand Up @@ -167,7 +175,7 @@ jobs:
eval "$(bbl print-env --shell-type posix)"

echo "Deleting env ${env_name}"
bbl down --no-confirm --gcp-service-account-key=key.json
bbl down --no-confirm

echo "Deleting bbl state directory"
if gsutil ls gs://cf-cli-bosh-lites | grep -q /${env_name}/; then
Expand Down
Loading