VM boot & setup timeouts

This commit is contained in:
AlexSSD7 2023-08-29 11:51:06 +01:00
commit 970664429a
6 changed files with 89 additions and 19 deletions

View file

@ -53,6 +53,7 @@ func NewBuildContext(logger *slog.Logger, baseISOPath string, outPath string, sh
Drives: []vm.DriveConfig{{ Drives: []vm.DriveConfig{{
Path: outPath, Path: outPath,
}}, }},
MemoryAlloc: 512,
UnrestrictedNetworking: true, UnrestrictedNetworking: true,
ShowDisplay: showVMDisplay, ShowDisplay: showVMDisplay,
InstallBaseUtilities: true, InstallBaseUtilities: true,

View file

@ -26,7 +26,9 @@ func Execute() {
var vmDebugFlag bool var vmDebugFlag bool
var unrestrictedNetworkingFlag bool var unrestrictedNetworkingFlag bool
var vmMemAllocFlag uint64 var vmMemAllocFlag uint32
var vmSSHSetupTimeoutFlag uint32
var vmOSUpTimeoutFlag uint32
// TODO: Version command. // TODO: Version command.
@ -39,5 +41,7 @@ func init() {
rootCmd.PersistentFlags().BoolVar(&vmDebugFlag, "vmdebug", false, "Enables the VM debug mode. This will open an accessible VM monitor. You can log in with root user and no password.") rootCmd.PersistentFlags().BoolVar(&vmDebugFlag, "vmdebug", false, "Enables the VM debug mode. This will open an accessible VM monitor. You can log in with root user and no password.")
rootCmd.PersistentFlags().BoolVar(&unrestrictedNetworkingFlag, "unrestricted-networking", false, "Enables unrestricted networking. This will allow the VM to connect to the internet.") rootCmd.PersistentFlags().BoolVar(&unrestrictedNetworkingFlag, "unrestricted-networking", false, "Enables unrestricted networking. This will allow the VM to connect to the internet.")
rootCmd.PersistentFlags().Uint64Var(&vmMemAllocFlag, "vm-mem-alloc", 512, "Specifies the VM memory allocation in KiB") rootCmd.PersistentFlags().Uint32Var(&vmMemAllocFlag, "vm-mem-alloc", 512, "Specifies the VM memory allocation in KiB")
rootCmd.PersistentFlags().Uint32Var(&vmOSUpTimeoutFlag, "vm-os-up-timeout", 30, "Specifies the VM OS-up timeout in seconds.")
rootCmd.PersistentFlags().Uint32Var(&vmSSHSetupTimeoutFlag, "vm-ssh-setup-timeout", 60, "Specifies the VM SSH server setup timeout in seconds. This cannot be lower than the OS-up timeout.")
} }

View file

@ -10,6 +10,7 @@ import (
"runtime" "runtime"
"sync" "sync"
"syscall" "syscall"
"time"
"log/slog" "log/slog"
@ -69,6 +70,9 @@ func runVM(passthroughArg string, fn func(context.Context, *vm.VM, *vm.FileManag
USBDevices: passthroughConfig, USBDevices: passthroughConfig,
ExtraPortForwardingRules: forwardPortsRules, ExtraPortForwardingRules: forwardPortsRules,
OSUpTimeout: time.Duration(vmOSUpTimeoutFlag) * time.Second,
SSHUpTimeout: time.Duration(vmSSHSetupTimeoutFlag) * time.Second,
UnrestrictedNetworking: unrestrictedNetworking, UnrestrictedNetworking: unrestrictedNetworking,
ShowDisplay: vmDebugFlag, ShowDisplay: vmDebugFlag,
} }

View file

@ -8,10 +8,10 @@ import (
) )
func WrapErrWithLog(err error, msg, log string) error { func WrapErrWithLog(err error, msg, log string) error {
return errors.Wrapf(err, "%v %v", msg, GetLogErrMsg(log)) return errors.Wrapf(err, "%v %v", msg, GetLogErrMsg(log, "log"))
} }
func GetLogErrMsg(s string) string { func GetLogErrMsg(s string, logLabel string) string {
logToInclude := strings.ReplaceAll(s, "\n", "\\n") logToInclude := strings.ReplaceAll(s, "\n", "\\n")
logToInclude = strings.TrimSuffix(logToInclude, "\\n") logToInclude = strings.TrimSuffix(logToInclude, "\\n")
logToInclude = ClearUnprintableChars(logToInclude, false) logToInclude = ClearUnprintableChars(logToInclude, false)
@ -22,5 +22,5 @@ func GetLogErrMsg(s string) string {
logToInclude = fmt.Sprintf("[%v chars trimmed]", origLogLen) + logToInclude[len(logToInclude)-maxLogLen:] logToInclude = fmt.Sprintf("[%v chars trimmed]", origLogLen) + logToInclude[len(logToInclude)-maxLogLen:]
} }
return fmt.Sprintf("(log: '%v')", logToInclude) return fmt.Sprintf("(%v: '%v')", logLabel, logToInclude)
} }

View file

@ -121,7 +121,7 @@ func (vm *VM) sshSetup() (ssh.Signer, error) {
case <-vm.ctx.Done(): case <-vm.ctx.Done():
return nil, vm.ctx.Err() return nil, vm.ctx.Err()
case <-time.After(time.Until(deadline)): case <-time.After(time.Until(deadline)):
return nil, fmt.Errorf("setup command timed out %v", utils.GetLogErrMsg(stdOutErrBuf.String())) return nil, fmt.Errorf("setup command timed out %v", utils.GetLogErrMsg(stdOutErrBuf.String(), "stdout/stderr log"))
case data := <-vm.serialStdoutCh: case data := <-vm.serialStdoutCh:
prefix := []byte("SERIAL STATUS: ") prefix := []byte("SERIAL STATUS: ")
stdOutErrBuf.WriteString(utils.ClearUnprintableChars(string(data), true)) stdOutErrBuf.WriteString(utils.ClearUnprintableChars(string(data), true))
@ -132,7 +132,7 @@ func (vm *VM) sshSetup() (ssh.Signer, error) {
if data[len(prefix)] != '0' { if data[len(prefix)] != '0' {
fmt.Fprintf(os.Stderr, "SSH SETUP FAILURE:\n%v", stdOutErrBuf.String()) fmt.Fprintf(os.Stderr, "SSH SETUP FAILURE:\n%v", stdOutErrBuf.String())
return nil, fmt.Errorf("non-zero setup command status code: '%v' %v", string(data[len(prefix)]), utils.GetLogErrMsg(stdOutErrBuf.String())) return nil, fmt.Errorf("non-zero setup command status code: '%v' %v", string(data[len(prefix)]), utils.GetLogErrMsg(stdOutErrBuf.String(), "stdout/stderr log"))
} }
return sshSigner, nil return sshSigner, nil

View file

@ -43,7 +43,10 @@ type VM struct {
serialReader *bufio.Reader serialReader *bufio.Reader
serialWrite *io.PipeWriter serialWrite *io.PipeWriter
serialWriteMu sync.Mutex serialWriteMu sync.Mutex
stderrBuf *bytes.Buffer qemuStderrBuf *bytes.Buffer
osUpTimeout time.Duration
sshUpTimeout time.Duration
serialStdoutCh chan []byte serialStdoutCh chan []byte
@ -61,11 +64,15 @@ type VMConfig struct {
CdromImagePath string CdromImagePath string
Drives []DriveConfig Drives []DriveConfig
MemoryAlloc uint64 // In KiB. MemoryAlloc uint32 // In KiB.
USBDevices []USBDevicePassthroughConfig USBDevices []USBDevicePassthroughConfig
ExtraPortForwardingRules []PortForwardingRule ExtraPortForwardingRules []PortForwardingRule
// Timeouts
OSUpTimeout time.Duration
SSHUpTimeout time.Duration
// Mostly debug-related options. // Mostly debug-related options.
UnrestrictedNetworking bool UnrestrictedNetworking bool
ShowDisplay bool ShowDisplay bool
@ -175,6 +182,24 @@ func NewVM(logger *slog.Logger, cfg VMConfig) (*VM, error) {
return nil, fmt.Errorf("cannot install base utilities with unrestricted networking disabled") return nil, fmt.Errorf("cannot install base utilities with unrestricted networking disabled")
} }
// NOTE: The default timeouts below have no relation to the default
// timeouts set by the CLI. These work only if no timeout was supplied
// in the config programmatically. Defaults set here are quite conservative.
osUpTimeout := time.Second * 60
if cfg.OSUpTimeout != 0 {
osUpTimeout = cfg.OSUpTimeout
}
sshUpTimeout := time.Second * 120
if cfg.SSHUpTimeout != 0 {
sshUpTimeout = cfg.SSHUpTimeout
}
if sshUpTimeout < osUpTimeout {
return nil, fmt.Errorf("vm ssh setup timeout cannot be lower than os up timeout")
}
// No errors beyond this point.
sysRead, userWrite := io.Pipe() sysRead, userWrite := io.Pipe()
userRead, sysWrite := io.Pipe() userRead, sysWrite := io.Pipe()
@ -207,7 +232,10 @@ func NewVM(logger *slog.Logger, cfg VMConfig) (*VM, error) {
serialRead: userRead, serialRead: userRead,
serialReader: userReader, serialReader: userReader,
serialWrite: userWrite, serialWrite: userWrite,
stderrBuf: stderrBuf, qemuStderrBuf: stderrBuf,
osUpTimeout: osUpTimeout,
sshUpTimeout: sshUpTimeout,
} }
vm.resetSerialStdout() vm.resetSerialStdout()
@ -235,6 +263,27 @@ func (vm *VM) Run() error {
globalErrs = append(globalErrs, err, errors.Wrap(vm.Cancel(), "cancel on error")) globalErrs = append(globalErrs, err, errors.Wrap(vm.Cancel(), "cancel on error"))
} }
bootReadyCh := make(chan struct{})
go func() {
select {
case <-time.After(vm.osUpTimeout):
vm.logger.Warn("A VM boot timeout detected, consider running with --vmdebug to investigate")
globalErrFn(fmt.Errorf("vm boot timeout %v", utils.GetLogErrMsg(string(vm.consumeSerialStdout()), "serial log")))
case <-bootReadyCh:
vm.logger.Info("The VM is up, setting it up")
}
}()
go func() {
select {
case <-time.After(vm.sshUpTimeout):
globalErrFn(fmt.Errorf("vm setup timeout %v", utils.GetLogErrMsg(string(vm.consumeSerialStdout()), "serial log")))
case <-vm.sshReadyCh:
vm.logger.Info("The VM is ready")
}
}()
vm.logger.Info("Booting the VM") vm.logger.Info("Booting the VM")
go func() { go func() {
@ -249,7 +298,8 @@ func (vm *VM) Run() error {
return return
} }
vm.logger.Info("Setting the VM up") // This will disable the timeout-handling goroutine.
close(bootReadyCh)
sshSigner, err := vm.sshSetup() sshSigner, err := vm.sshSetup()
if err != nil { if err != nil {
@ -284,8 +334,6 @@ func (vm *VM) Run() error {
// This is to notify everyone waiting for SSH to be up that it's ready to go. // This is to notify everyone waiting for SSH to be up that it's ready to go.
close(vm.sshReadyCh) close(vm.sshReadyCh)
vm.logger.Info("The VM is ready")
}() }()
_, err = vm.cmd.Process.Wait() _, err = vm.cmd.Process.Wait()
@ -296,14 +344,14 @@ func (vm *VM) Run() error {
errors.Wrap(cancelErr, "cancel"), errors.Wrap(cancelErr, "cancel"),
) )
return fmt.Errorf("%w %v", combinedErr, utils.GetLogErrMsg(vm.stderrBuf.String())) return fmt.Errorf("%w %v", combinedErr, utils.GetLogErrMsg(vm.qemuStderrBuf.String(), "qemu stderr log"))
} }
combinedErr := multierr.Combine( combinedErr := multierr.Combine(
append(globalErrs, errors.Wrap(cancelErr, "cancel on exit"))..., append(globalErrs, errors.Wrap(cancelErr, "cancel on exit"))...,
) )
if combinedErr != nil { if combinedErr != nil {
return fmt.Errorf("%w %v", combinedErr, utils.GetLogErrMsg(vm.stderrBuf.String())) return fmt.Errorf("%w %v", combinedErr, utils.GetLogErrMsg(vm.qemuStderrBuf.String(), "qemu stderr log"))
} }
return nil return nil
@ -389,7 +437,7 @@ func (vm *VM) runVMLoginHandler() error {
for { for {
select { select {
case <-vm.ctx.Done(): case <-vm.ctx.Done():
return nil return vm.ctx.Err()
case <-time.After(time.Second): case <-time.After(time.Second):
peek, err := vm.serialReader.Peek(vm.serialReader.Buffered()) peek, err := vm.serialReader.Peek(vm.serialReader.Buffered())
if err != nil { if err != nil {
@ -414,6 +462,19 @@ func (vm *VM) resetSerialStdout() {
vm.serialStdoutCh = make(chan []byte, 32) vm.serialStdoutCh = make(chan []byte, 32)
} }
func (vm *VM) consumeSerialStdout() []byte {
buf := bytes.NewBuffer(nil)
for {
select {
case data := <-vm.serialStdoutCh:
buf.Write(data)
default:
return buf.Bytes()
}
}
}
func (vm *VM) DialSSH() (*ssh.Client, error) { func (vm *VM) DialSSH() (*ssh.Client, error) {
if vm.sshConf == nil { if vm.sshConf == nil {
return nil, ErrSSHUnavailable return nil, ErrSSHUnavailable