feat: handle upgrade job

This commit is contained in:
2024-12-13 15:00:43 +03:30
parent 89702d287a
commit 9b219d967e
12 changed files with 147 additions and 127 deletions

View File

@@ -1,7 +1,6 @@
package managed_job
import (
"errors"
"flink-kube-operator/internal/crd/v1alpha1"
"strings"
"time"
@@ -12,8 +11,8 @@ import (
"go.uber.org/zap"
)
// restore the job from savepoint and jarId in managedJob
func (job *ManagedJob) restore() error {
// run the job from savepoint and jarId in managedJob
func (job *ManagedJob) run() error {
var savepointPath string
if job.def.Status.LastSavepointPath == nil {
pkg.Logger.Error("[managed-job] [restore]", zap.Error(v1alpha1.ErrNoSavepointPath))
@@ -21,39 +20,48 @@ func (job *ManagedJob) restore() error {
} else {
savepointPath = *job.def.Status.LastSavepointPath
}
if job.def.Status.JarId == nil {
err := errors.New("missing jar id")
pkg.Logger.Error("[managed-job] [restore]", zap.Error(err))
return err
}
pkg.Logger.Info("[managed-job] [restore] restoring job", zap.String("name", job.def.GetName()), zap.String("savepointPath", savepointPath))
var jobId *string
for {
runJarResp, err := job.client.RunJar(api.RunOpts{
JarID: *job.def.Status.JarId,
AllowNonRestoredState: true,
EntryClass: job.def.Spec.EntryClass,
SavepointPath: savepointPath,
})
if err != nil {
if strings.ContainsAny(err.Error(), ".jar does not exist") {
err := job.upload()
if err != nil {
job.crd.Patch(job.def.UID, map[string]interface{}{
"status": map[string]interface{}{
"error": "[upload-error] " + err.Error(),
},
})
return nil
shouldUpload := false
if job.def.Status.JarId == nil {
err := v1alpha1.ErrNoJarId
pkg.Logger.Error("[managed-job] [run]", zap.Error(err))
shouldUpload = true
} else {
runJarResp, err := job.client.RunJar(api.RunOpts{
JarID: *job.def.Status.JarId,
AllowNonRestoredState: true,
EntryClass: job.def.Spec.EntryClass,
SavepointPath: savepointPath,
})
if err == nil {
pkg.Logger.Info("[managed-job] [run] jar successfully ran", zap.Any("run-jar-resp", runJarResp))
jobId = &runJarResp.JobId
break
} else {
if strings.ContainsAny(err.Error(), ".jar does not exist") {
shouldUpload = true
} else {
pkg.Logger.Error("[managed-job] [run] unhandled jar run Flink error", zap.Error(err))
}
continue
}
pkg.Logger.Error("[managed-job] [restore]", zap.Error(err))
return err
}
jobId = &runJarResp.JobId
pkg.Logger.Debug("[main] after run jar", zap.Any("run-jar-resp", runJarResp))
break
if shouldUpload {
err := job.upload()
if err != nil {
job.crd.Patch(job.def.UID, map[string]interface{}{
"status": map[string]interface{}{
"error": "[upload-error] " + err.Error(),
},
})
return nil
}
continue
}
return nil
}
// job.def.Status.JobId = &runJarResp.JobId
@@ -62,6 +70,7 @@ func (job *ManagedJob) restore() error {
job.crd.Patch(job.def.UID, map[string]interface{}{
"status": map[string]interface{}{
"jobId": jobId,
"runningJarURI": job.def.Spec.JarURI,
"jobStatus": v1alpha1.JobStatusCreating,
"lifeCycleStatus": v1alpha1.LifeCycleStatusRestoring,
"lastRestoredSavepointDate": job.def.Status.LastSavepointDate,