feat: handle job run error and change life-cycle status

This commit is contained in:
2024-12-18 10:08:30 +03:30
parent 03fe9910a3
commit 5e3f093f08
6 changed files with 15 additions and 5 deletions

2
.vscode/launch.json vendored
View File

@@ -10,7 +10,7 @@
"request": "launch", "request": "launch",
"mode": "auto", "mode": "auto",
"env": { "env": {
"FLINK_API_URL": "127.0.0.1:8081", "FLINK_API_URL": "flink.bz2:8081",
"SAVEPOINT_PATH": "/opt/flink/savepoints" "SAVEPOINT_PATH": "/opt/flink/savepoints"
}, },
"cwd": "${workspaceFolder}", "cwd": "${workspaceFolder}",

View File

@@ -14,6 +14,7 @@ import (
func (crd *Crd) Patch(jobUid types.UID, patchData map[string]interface{}) error { func (crd *Crd) Patch(jobUid types.UID, patchData map[string]interface{}) error {
job := GetJob(jobUid) job := GetJob(jobUid)
pkg.Logger.Debug("[patch-job]", zap.Any("jobUid", jobUid))
patchBytes, err := json.Marshal(patchData) patchBytes, err := json.Marshal(patchData)
if err != nil { if err != nil {

View File

@@ -54,6 +54,7 @@ var (
ErrNoJarId = errors.New("[managed-job] no jar id") ErrNoJarId = errors.New("[managed-job] no jar id")
ErrNoSavepointTriggerId = errors.New("[managed-job] no savepoint trigger id") ErrNoSavepointTriggerId = errors.New("[managed-job] no savepoint trigger id")
ErrNoSavepointPath = errors.New("[managed-job] no savepoint path") ErrNoSavepointPath = errors.New("[managed-job] no savepoint path")
ErrOnStartingJob = errors.New("[managed-job] error on starting job")
) )
type JobStatus string type JobStatus string

View File

@@ -26,7 +26,6 @@ func (crd Crd) watchFlinkJobs() rxgo.Observable {
} }
defer watcher.Stop() defer watcher.Stop()
for event := range watcher.ResultChan() { for event := range watcher.ResultChan() {
pkg.Logger.Debug("[crd] event received", zap.Any("type", event.Type))
unstructuredJob := event.Object.(*unstructured.Unstructured) unstructuredJob := event.Object.(*unstructured.Unstructured)
unstructuredMap, _, err := unstructured.NestedMap(unstructuredJob.Object) unstructuredMap, _, err := unstructured.NestedMap(unstructuredJob.Object)
if err != nil { if err != nil {
@@ -50,10 +49,10 @@ func (crd Crd) watchFlinkJobs() rxgo.Observable {
switch event.Type { switch event.Type {
case watch.Bookmark: case watch.Bookmark:
case watch.Modified: case watch.Modified:
pkg.Logger.Info("[crd] [watch] flink job modified", zap.String("jobName", job.GetName())) //pkg.Logger.Info("[crd] [watch] flink job modified", zap.String("jobName", job.GetName()))
crd.repsert(job) crd.repsert(job)
case watch.Added: case watch.Added:
pkg.Logger.Info("[crd] [watch] new flink job created") //pkg.Logger.Info("[crd] [watch] new flink job created")
crd.repsert(job) crd.repsert(job)
case watch.Deleted: case watch.Deleted:
} }

View File

@@ -48,10 +48,17 @@ func (job *ManagedJob) run(restoreMode bool) error {
jobId = &runJarResp.JobId jobId = &runJarResp.JobId
break break
} else { } else {
if strings.ContainsAny(err.Error(), ".jar does not exist") { if strings.Contains(err.Error(), ".jar does not exist") {
pkg.Logger.Error("[managed-job] [run] unhandled jar run Flink error", zap.Error(err))
shouldUpload = true shouldUpload = true
} else { } else {
pkg.Logger.Error("[managed-job] [run] unhandled jar run Flink error", zap.Error(err)) pkg.Logger.Error("[managed-job] [run] unhandled jar run Flink error", zap.Error(err))
stringErr := err.Error()
job.def.Status.Error = &stringErr
job.def.Status.JobStatus = ""
job.def.Status.LifeCycleStatus = v1alpha1.LifeCycleStatusFailed
job.crd.SetJobStatus(job.def.UID, job.def.Status)
return v1alpha1.ErrOnStartingJob
} }
} }
} }
@@ -66,6 +73,7 @@ func (job *ManagedJob) run(restoreMode bool) error {
}) })
return nil return nil
} }
shouldUpload = false
continue continue
} }
return nil return nil

View File

@@ -58,6 +58,7 @@ func (mgr *Manager) cycle(client *api.Client, crdInstance *crd.Crd) {
// Loop over job definitions as Kubernetes CRD // Loop over job definitions as Kubernetes CRD
for _, uid := range crd.GetAllJobKeys() { for _, uid := range crd.GetAllJobKeys() {
pkg.Logger.Debug("mgr.processingJobsIds", zap.Any("processingJobIds", mgr.processingJobsIds))
if lo.Contains(mgr.processingJobsIds, uid) { if lo.Contains(mgr.processingJobsIds, uid) {
pkg.Logger.Warn("[manager] already in process", zap.Any("uid", uid)) pkg.Logger.Warn("[manager] already in process", zap.Any("uid", uid))
continue continue