feat: handle upgrade job
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
package managed_job
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flink-kube-operator/internal/crd/v1alpha1"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -12,8 +11,8 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// restore the job from savepoint and jarId in managedJob
|
||||
func (job *ManagedJob) restore() error {
|
||||
// run the job from savepoint and jarId in managedJob
|
||||
func (job *ManagedJob) run() error {
|
||||
var savepointPath string
|
||||
if job.def.Status.LastSavepointPath == nil {
|
||||
pkg.Logger.Error("[managed-job] [restore]", zap.Error(v1alpha1.ErrNoSavepointPath))
|
||||
@@ -21,39 +20,48 @@ func (job *ManagedJob) restore() error {
|
||||
} else {
|
||||
savepointPath = *job.def.Status.LastSavepointPath
|
||||
}
|
||||
if job.def.Status.JarId == nil {
|
||||
err := errors.New("missing jar id")
|
||||
pkg.Logger.Error("[managed-job] [restore]", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
pkg.Logger.Info("[managed-job] [restore] restoring job", zap.String("name", job.def.GetName()), zap.String("savepointPath", savepointPath))
|
||||
var jobId *string
|
||||
for {
|
||||
runJarResp, err := job.client.RunJar(api.RunOpts{
|
||||
JarID: *job.def.Status.JarId,
|
||||
AllowNonRestoredState: true,
|
||||
EntryClass: job.def.Spec.EntryClass,
|
||||
SavepointPath: savepointPath,
|
||||
})
|
||||
if err != nil {
|
||||
if strings.ContainsAny(err.Error(), ".jar does not exist") {
|
||||
err := job.upload()
|
||||
if err != nil {
|
||||
job.crd.Patch(job.def.UID, map[string]interface{}{
|
||||
"status": map[string]interface{}{
|
||||
"error": "[upload-error] " + err.Error(),
|
||||
},
|
||||
})
|
||||
return nil
|
||||
shouldUpload := false
|
||||
if job.def.Status.JarId == nil {
|
||||
err := v1alpha1.ErrNoJarId
|
||||
pkg.Logger.Error("[managed-job] [run]", zap.Error(err))
|
||||
shouldUpload = true
|
||||
} else {
|
||||
runJarResp, err := job.client.RunJar(api.RunOpts{
|
||||
JarID: *job.def.Status.JarId,
|
||||
AllowNonRestoredState: true,
|
||||
EntryClass: job.def.Spec.EntryClass,
|
||||
SavepointPath: savepointPath,
|
||||
})
|
||||
if err == nil {
|
||||
pkg.Logger.Info("[managed-job] [run] jar successfully ran", zap.Any("run-jar-resp", runJarResp))
|
||||
jobId = &runJarResp.JobId
|
||||
break
|
||||
} else {
|
||||
if strings.ContainsAny(err.Error(), ".jar does not exist") {
|
||||
shouldUpload = true
|
||||
} else {
|
||||
pkg.Logger.Error("[managed-job] [run] unhandled jar run Flink error", zap.Error(err))
|
||||
}
|
||||
continue
|
||||
}
|
||||
pkg.Logger.Error("[managed-job] [restore]", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
jobId = &runJarResp.JobId
|
||||
pkg.Logger.Debug("[main] after run jar", zap.Any("run-jar-resp", runJarResp))
|
||||
break
|
||||
|
||||
if shouldUpload {
|
||||
err := job.upload()
|
||||
if err != nil {
|
||||
job.crd.Patch(job.def.UID, map[string]interface{}{
|
||||
"status": map[string]interface{}{
|
||||
"error": "[upload-error] " + err.Error(),
|
||||
},
|
||||
})
|
||||
return nil
|
||||
}
|
||||
continue
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// job.def.Status.JobId = &runJarResp.JobId
|
||||
@@ -62,6 +70,7 @@ func (job *ManagedJob) restore() error {
|
||||
job.crd.Patch(job.def.UID, map[string]interface{}{
|
||||
"status": map[string]interface{}{
|
||||
"jobId": jobId,
|
||||
"runningJarURI": job.def.Spec.JarURI,
|
||||
"jobStatus": v1alpha1.JobStatusCreating,
|
||||
"lifeCycleStatus": v1alpha1.LifeCycleStatusRestoring,
|
||||
"lastRestoredSavepointDate": job.def.Status.LastSavepointDate,
|
||||
|
||||
Reference in New Issue
Block a user