Feature Request: Add Enhanced Deployment Error Reporting and Logging (#440)

* Enhance manifest stability with detailed error reporting and logging

* Enhance manifest stability with detailed error reporting and logging

* Refactored the getContainerErrors Function to enhance readability

* Added an early return for the getContainerErrors function

* Eliminated redundant conditionals

---------

Co-authored-by: benjamin <145829787+benjaminbob21@users.noreply.github.com>
Co-authored-by: Suneha Bose <123775811+bosesuneha@users.noreply.github.com>
Co-authored-by: David Gamero <david340804@gmail.com>
This commit is contained in:
Ogheneobukome Ejaife
2025-07-29 15:07:04 -04:00
committed by GitHub
parent c93bf5dafe
commit bf3422cff9
2 changed files with 564 additions and 23 deletions
+111 -23
View File
@@ -21,6 +21,14 @@ export async function checkManifestStability(
return
}
let rolloutStatusHasErrors = false
// Collect errors for reporting
// This will be used to throw a detailed error at the end if any rollout fails
// This is useful for debugging and understanding which resources failed
// their rollout status check
// It will also include the describe output for the resource that failed
// to provide more context on the failure
const rolloutErrors: string[] = []
for (let i = 0; i < resources.length; i++) {
const resource = resources[i]
@@ -38,20 +46,37 @@ export async function checkManifestStability(
)
checkForErrors([result])
} catch (ex) {
core.error(ex)
await kubectl.describe(
resource.type,
resource.name,
IS_SILENT,
resource.namespace
)
const errorMessage = `Rollout failed for ${resource.type}/${resource.name} in namespace ${resource.namespace}: ${ex.message || ex}`
core.error(errorMessage)
rolloutErrors.push(errorMessage)
// Get more detailed information
try {
const describeResult = await kubectl.describe(
resource.type,
resource.name,
IS_SILENT,
resource.namespace
)
core.info(
`Describe output for ${resource.type}/${resource.name}:\n${describeResult.stdout}`
)
} catch (describeEx) {
core.warning(
`Could not describe ${resource.type}/${resource.name}: ${describeEx}`
)
}
rolloutStatusHasErrors = true
}
}
if (resource.type == KubernetesConstants.KubernetesWorkload.POD) {
if (
resource.type.toLowerCase() ===
KubernetesConstants.KubernetesWorkload.POD.toLowerCase()
) {
try {
await checkPodStatus(kubectl, resource)
await exports.checkPodStatus(kubectl, resource)
} catch (ex) {
core.warning(
`Could not determine pod status: ${JSON.stringify(ex)}`
@@ -81,21 +106,31 @@ export async function checkManifestStability(
}
}
} catch (ex) {
core.warning(
`Could not determine service status of: ${resource.name} Error: ${ex}`
)
await kubectl.describe(
resource.type,
resource.name,
IS_SILENT,
resource.namespace
)
const errorMessage = `Could not determine service status of: ${resource.name} in namespace ${resource.namespace}. Error: ${ex.message || ex}`
core.warning(errorMessage)
try {
const describeResult = await kubectl.describe(
resource.type,
resource.name,
IS_SILENT,
resource.namespace
)
core.info(
`Describe output for service/${resource.name}:\n${describeResult.stdout}`
)
} catch (describeEx) {
core.warning(
`Could not describe service/${resource.name}: ${describeEx}`
)
}
}
}
}
if (rolloutStatusHasErrors) {
throw new Error('Rollout status error')
const detailedError = `Rollout status failed for the following resources:\n${rolloutErrors.join('\n')}`
throw new Error(detailedError)
}
}
@@ -108,6 +143,8 @@ export async function checkPodStatus(
let podStatus
let kubectlDescribeNeeded = false
let errorDetails = ''
for (let i = 0; i < iterations; i++) {
await sleep(sleepTimeout)
@@ -124,31 +161,56 @@ export async function checkPodStatus(
}
podStatus = await getPodStatus(kubectl, pod)
// Get container statuses for detailed error information
const containerErrors = getContainerErrors(podStatus)
switch (podStatus.phase) {
case 'Succeeded':
case 'Running':
if (isPodReady(podStatus)) {
console.log(`pod/${pod.name} is successfully rolled out`)
} else {
errorDetails = `Pod ${pod.name} is ${podStatus.phase} but not ready. ${containerErrors}`
core.error(errorDetails)
kubectlDescribeNeeded = true
}
break
case 'Pending':
if (!isPodReady(podStatus)) {
core.warning(`pod/${pod.name} rollout status check timed out`)
errorDetails = `Pod ${pod.name} rollout status check timed out (still Pending after ${(iterations * sleepTimeout) / 1000} seconds). ${containerErrors}`
core.warning(errorDetails)
kubectlDescribeNeeded = true
}
break
case 'Failed':
core.error(`pod/${pod.name} rollout failed`)
errorDetails = `Pod ${pod.name} rollout failed. ${containerErrors}`
core.error(errorDetails)
kubectlDescribeNeeded = true
break
default:
core.warning(`pod/${pod.name} rollout status: ${podStatus.phase}`)
errorDetails = `Pod ${pod.name} has unexpected status: ${podStatus.phase}. ${containerErrors}`
core.warning(errorDetails)
kubectlDescribeNeeded = true
}
if (kubectlDescribeNeeded) {
await kubectl.describe(POD, pod.name, IS_SILENT, pod.namespace)
try {
const describeResult = await kubectl.describe(
POD,
pod.name,
IS_SILENT,
pod.namespace
)
core.info(
`Describe output for pod/${pod.name}:\n${describeResult.stdout}`
)
} catch (describeEx) {
core.warning(`Could not describe pod/${pod.name}: ${describeEx}`)
}
// Throw error with detailed information
if (errorDetails) {
throw new Error(errorDetails)
}
}
}
@@ -182,6 +244,32 @@ function isPodReady(podStatus: any): boolean {
return allContainersAreReady
}
export function getContainerErrors(podStatus: any): string {
const errors: string[] = []
const collectErrors = (containers: any[], label: string) => {
containers?.forEach(({name, ready, state}) => {
if (ready) return
if (state?.waiting) {
errors.push(
`${label} '${name}' is waiting: ${state.waiting.reason} - ${state.waiting.message || 'No message'}`
)
} else if (state?.terminated) {
errors.push(
`${label} '${name}' terminated: ${state.terminated.reason} - ${state.terminated.message || 'No message'}`
)
} else {
errors.push(
`${label} '${name}' is not ready: ${JSON.stringify(state)}`
)
}
})
}
collectErrors(podStatus.containerStatuses, 'Container')
collectErrors(podStatus.initContainerStatuses, 'Init container')
return errors.length ? `Container issues: ${errors.join('; ')}` : ''
}
async function getService(kubectl: Kubectl, service: Resource) {
const serviceResult = await kubectl.getResource(
KubernetesConstants.DiscoveryAndLoadBalancerResource.SERVICE,