2023-09-02 20:03:44 +01:00
// Linsk - A utility to access Linux-native file systems on non-Linux operating systems.
// Copyright (c) 2023 The Linsk Authors.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
2023-08-25 15:12:19 +01:00
package vm
import (
"bufio"
"bytes"
"context"
"fmt"
"io"
2023-09-06 13:53:11 +01:00
"os"
2023-08-25 15:12:19 +01:00
"os/exec"
"sync"
"sync/atomic"
"time"
2023-08-25 16:54:58 +01:00
"log/slog"
2023-09-01 11:44:49 +01:00
"github.com/AlexSSD7/linsk/osspecifics"
2023-09-02 11:10:14 +01:00
"github.com/AlexSSD7/linsk/qemucli"
2023-08-29 14:24:18 +01:00
"github.com/AlexSSD7/linsk/sshutil"
2023-08-27 15:53:44 +01:00
"github.com/AlexSSD7/linsk/utils"
2023-08-26 16:26:35 +01:00
"github.com/bramvdbogaerde/go-scp"
2023-08-25 15:12:19 +01:00
"github.com/phayes/freeport"
"github.com/pkg/errors"
"go.uber.org/multierr"
"golang.org/x/crypto/ssh"
)
2023-08-27 13:44:57 +01:00
type VM struct {
2023-08-25 16:54:58 +01:00
logger * slog . Logger
2023-08-25 15:12:19 +01:00
ctx context . Context
ctxCancel context . CancelFunc
cmd * exec . Cmd
sshMappedPort uint16
sshConf * ssh . ClientConfig
sshReadyCh chan struct { }
2023-08-27 15:30:51 +01:00
installSSH bool
2023-08-25 15:12:19 +01:00
serialRead * io . PipeReader
serialReader * bufio . Reader
serialWrite * io . PipeWriter
serialWriteMu sync . Mutex
2023-08-29 11:51:06 +01:00
qemuStderrBuf * bytes . Buffer
osUpTimeout time . Duration
sshUpTimeout time . Duration
2023-08-25 15:12:19 +01:00
serialStdoutCh chan [ ] byte
// These are to be interacted with using `atomic` package
disposed uint32
canceled uint32
2023-08-29 15:31:17 +01:00
2023-09-02 12:07:30 +01:00
originalCfg Config
2023-08-25 15:12:19 +01:00
}
2023-08-27 15:30:51 +01:00
type DriveConfig struct {
Path string
SnapshotMode bool
}
2023-08-31 16:23:40 +01:00
type TapConfig struct {
Name string
}
2023-09-02 12:07:30 +01:00
type Config struct {
2023-08-27 13:44:57 +01:00
CdromImagePath string
2023-08-30 13:13:08 +01:00
BIOSPath string
2023-08-27 15:53:44 +01:00
Drives [ ] DriveConfig
2023-08-27 13:44:57 +01:00
2023-08-29 11:51:06 +01:00
MemoryAlloc uint32 // In KiB.
2023-08-29 10:59:50 +01:00
2023-08-29 15:31:17 +01:00
PassthroughConfig PassthroughConfig
2023-08-27 13:44:57 +01:00
ExtraPortForwardingRules [ ] PortForwardingRule
2023-08-31 16:23:40 +01:00
// Networking
UnrestrictedNetworking bool
Taps [ ] TapConfig
2023-08-29 11:51:06 +01:00
// Timeouts
OSUpTimeout time . Duration
SSHUpTimeout time . Duration
2023-08-27 15:30:51 +01:00
// Mostly debug-related options.
2023-09-06 13:53:11 +01:00
Debug bool // This will show the display and forward all QEMU warnings/errors to stderr.
2023-08-31 16:23:40 +01:00
InstallBaseUtilities bool
2023-08-27 13:44:57 +01:00
}
2023-09-02 12:07:30 +01:00
func NewVM ( logger * slog . Logger , cfg Config ) ( * VM , error ) {
2023-08-25 15:12:19 +01:00
sshPort , err := freeport . GetFreePort ( )
if err != nil {
return nil , errors . Wrap ( err , "get free port for ssh server" )
}
2023-09-02 11:10:14 +01:00
baseCmd , cmdArgs , err := configureBaseVMCmd ( logger , cfg )
if err != nil {
return nil , errors . Wrap ( err , "configure base vm cmd" )
2023-08-31 16:23:40 +01:00
}
2023-09-02 11:10:14 +01:00
netCmdArgs , err := configureVMCmdNetworking ( logger , cfg , uint16 ( sshPort ) )
if err != nil {
return nil , errors . Wrap ( err , "configure vm cmd networking" )
2023-08-25 15:12:19 +01:00
}
2023-09-02 11:10:14 +01:00
cmdArgs = append ( cmdArgs , netCmdArgs ... )
2023-08-30 13:32:00 +01:00
2023-09-02 11:10:14 +01:00
driveCmdArgs , err := configureVMCmdDrives ( cfg )
if err != nil {
return nil , errors . Wrap ( err , "configure vm cmd drives" )
2023-08-29 15:31:17 +01:00
}
2023-09-02 11:10:14 +01:00
cmdArgs = append ( cmdArgs , driveCmdArgs ... )
2023-08-29 15:31:17 +01:00
2023-09-02 11:10:14 +01:00
usbCmdArgs := configureVMCmdUSBPassthrough ( cfg )
2023-08-29 15:31:17 +01:00
2023-09-02 11:10:14 +01:00
cmdArgs = append ( cmdArgs , usbCmdArgs ... )
2023-08-29 15:37:09 +01:00
2023-09-02 11:10:14 +01:00
blockDevArgs , err := configureVMCmdBlockDevicePassthrough ( logger , cfg )
if err != nil {
return nil , errors . Wrap ( err , "configure vm cmd block device passthrough" )
2023-08-27 15:30:51 +01:00
}
2023-09-02 11:10:14 +01:00
cmdArgs = append ( cmdArgs , blockDevArgs ... )
2023-08-27 15:30:51 +01:00
if cfg . InstallBaseUtilities && ! cfg . UnrestrictedNetworking {
2023-09-01 16:40:13 +01:00
return nil , fmt . Errorf ( "installation of base utilities is impossible with unrestricted networking disabled" )
2023-08-27 15:30:51 +01:00
}
2023-08-29 11:51:06 +01:00
// NOTE: The default timeouts below have no relation to the default
// timeouts set by the CLI. These work only if no timeout was supplied
// in the config programmatically. Defaults set here are quite conservative.
osUpTimeout := time . Second * 60
if cfg . OSUpTimeout != 0 {
osUpTimeout = cfg . OSUpTimeout
}
sshUpTimeout := time . Second * 120
if cfg . SSHUpTimeout != 0 {
sshUpTimeout = cfg . SSHUpTimeout
}
if sshUpTimeout < osUpTimeout {
return nil , fmt . Errorf ( "vm ssh setup timeout cannot be lower than os up timeout" )
}
2023-09-02 11:10:14 +01:00
encodedCmdArgs , err := qemucli . EncodeArgs ( cmdArgs )
if err != nil {
return nil , errors . Wrap ( err , "encode qemu cli args" )
}
2023-08-29 11:51:06 +01:00
// No errors beyond this point.
2023-08-25 15:12:19 +01:00
sysRead , userWrite := io . Pipe ( )
userRead , sysWrite := io . Pipe ( )
2023-09-02 12:14:02 +01:00
cmd := exec . Command ( baseCmd , encodedCmdArgs ... ) //#nosec G204 // I know, it's generally a bad idea to include variables into shell commands, but QEMU unfortunately does not accept anything else.
2023-08-25 15:12:19 +01:00
cmd . Stdin = sysRead
cmd . Stdout = sysWrite
stderrBuf := bytes . NewBuffer ( nil )
cmd . Stderr = stderrBuf
2023-09-06 13:53:11 +01:00
if cfg . Debug {
cmd . Stderr = io . MultiWriter ( cmd . Stderr , os . Stderr )
}
2023-08-28 11:35:57 +02:00
// This function is OS-specific.
2023-09-01 11:44:49 +01:00
osspecifics . SetNewProcessGroupCmd ( cmd )
2023-08-25 19:55:11 +01:00
2023-08-25 15:12:19 +01:00
userReader := bufio . NewReader ( userRead )
ctx , ctxCancel := context . WithCancel ( context . Background ( ) )
2023-08-27 13:44:57 +01:00
vm := & VM {
2023-08-25 15:12:19 +01:00
logger : logger ,
ctx : ctx ,
ctxCancel : ctxCancel ,
cmd : cmd ,
sshMappedPort : uint16 ( sshPort ) ,
sshReadyCh : make ( chan struct { } ) ,
2023-08-27 15:30:51 +01:00
installSSH : cfg . InstallBaseUtilities ,
2023-08-25 15:12:19 +01:00
2023-08-29 11:51:06 +01:00
serialRead : userRead ,
serialReader : userReader ,
serialWrite : userWrite ,
qemuStderrBuf : stderrBuf ,
osUpTimeout : osUpTimeout ,
sshUpTimeout : sshUpTimeout ,
2023-08-29 15:31:17 +01:00
originalCfg : cfg ,
2023-08-25 15:12:19 +01:00
}
2023-08-27 13:44:57 +01:00
vm . resetSerialStdout ( )
2023-08-25 15:12:19 +01:00
2023-08-27 13:44:57 +01:00
return vm , nil
2023-08-25 15:12:19 +01:00
}
2023-08-27 13:44:57 +01:00
func ( vm * VM ) Run ( ) error {
if atomic . AddUint32 ( & vm . disposed , 1 ) != 1 {
2023-08-25 15:12:19 +01:00
return fmt . Errorf ( "vm disposed" )
}
2023-08-27 13:44:57 +01:00
err := vm . cmd . Start ( )
2023-08-25 15:12:19 +01:00
if err != nil {
return errors . Wrap ( err , "start qemu cmd" )
}
2023-08-29 15:31:17 +01:00
go vm . runPeriodicHostMountChecker ( )
2023-08-25 15:12:19 +01:00
var globalErrsMu sync . Mutex
var globalErrs [ ] error
globalErrFn := func ( err error ) {
globalErrsMu . Lock ( )
defer globalErrsMu . Unlock ( )
2023-08-27 13:44:57 +01:00
globalErrs = append ( globalErrs , err , errors . Wrap ( vm . Cancel ( ) , "cancel on error" ) )
2023-08-25 15:12:19 +01:00
}
2023-08-29 11:51:06 +01:00
bootReadyCh := make ( chan struct { } )
go func ( ) {
select {
case <- time . After ( vm . osUpTimeout ) :
2023-08-30 13:38:34 +01:00
vm . logger . Warn ( "A VM boot timeout detected, consider running with --vm-debug to investigate" )
2023-08-29 11:51:06 +01:00
globalErrFn ( fmt . Errorf ( "vm boot timeout %v" , utils . GetLogErrMsg ( string ( vm . consumeSerialStdout ( ) ) , "serial log" ) ) )
case <- bootReadyCh :
vm . logger . Info ( "The VM is up, setting it up" )
}
} ( )
go func ( ) {
select {
case <- time . After ( vm . sshUpTimeout ) :
globalErrFn ( fmt . Errorf ( "vm setup timeout %v" , utils . GetLogErrMsg ( string ( vm . consumeSerialStdout ( ) ) , "serial log" ) ) )
case <- vm . sshReadyCh :
vm . logger . Info ( "The VM is ready" )
}
} ( )
2023-08-27 13:44:57 +01:00
vm . logger . Info ( "Booting the VM" )
2023-08-25 15:12:19 +01:00
go func ( ) {
2023-08-27 13:44:57 +01:00
_ = vm . runSerialReader ( )
_ = vm . Cancel ( )
2023-08-25 15:12:19 +01:00
} ( )
go func ( ) {
2023-08-27 13:44:57 +01:00
err = vm . runVMLoginHandler ( )
2023-08-25 15:12:19 +01:00
if err != nil {
globalErrFn ( errors . Wrap ( err , "run vm login handler" ) )
return
}
2023-08-29 11:51:06 +01:00
// This will disable the timeout-handling goroutine.
close ( bootReadyCh )
2023-08-25 16:54:58 +01:00
2023-08-27 13:44:57 +01:00
sshSigner , err := vm . sshSetup ( )
2023-08-25 15:12:19 +01:00
if err != nil {
globalErrFn ( errors . Wrap ( err , "set up ssh" ) )
return
}
2023-08-27 13:44:57 +01:00
vm . logger . Debug ( "Set up SSH server successfully" )
2023-08-25 15:12:19 +01:00
2023-08-27 13:44:57 +01:00
sshKeyScan , err := vm . scanSSHIdentity ( )
2023-08-25 15:12:19 +01:00
if err != nil {
globalErrFn ( errors . Wrap ( err , "scan ssh identity" ) )
return
}
2023-08-27 13:44:57 +01:00
vm . logger . Debug ( "Scanned SSH identity" )
2023-08-25 15:12:19 +01:00
knownHosts , err := ParseSSHKeyScan ( sshKeyScan )
if err != nil {
globalErrFn ( errors . Wrap ( err , "parse ssh key scan" ) )
return
}
2023-08-27 13:44:57 +01:00
vm . sshConf = & ssh . ClientConfig {
2023-08-25 15:12:19 +01:00
User : "root" ,
HostKeyCallback : knownHosts ,
Auth : [ ] ssh . AuthMethod {
ssh . PublicKeys ( sshSigner ) ,
} ,
Timeout : time . Second * 5 ,
}
// This is to notify everyone waiting for SSH to be up that it's ready to go.
2023-08-27 13:44:57 +01:00
close ( vm . sshReadyCh )
2023-08-25 15:12:19 +01:00
} ( )
2023-08-27 13:44:57 +01:00
_ , err = vm . cmd . Process . Wait ( )
cancelErr := vm . Cancel ( )
2023-08-25 15:12:19 +01:00
if err != nil {
combinedErr := multierr . Combine (
errors . Wrap ( err , "wait for cmd to finish execution" ) ,
errors . Wrap ( cancelErr , "cancel" ) ,
)
2023-08-29 11:51:06 +01:00
return fmt . Errorf ( "%w %v" , combinedErr , utils . GetLogErrMsg ( vm . qemuStderrBuf . String ( ) , "qemu stderr log" ) )
2023-08-25 15:12:19 +01:00
}
combinedErr := multierr . Combine (
append ( globalErrs , errors . Wrap ( cancelErr , "cancel on exit" ) ) ... ,
)
2023-08-25 16:54:58 +01:00
if combinedErr != nil {
2023-08-29 11:51:06 +01:00
return fmt . Errorf ( "%w %v" , combinedErr , utils . GetLogErrMsg ( vm . qemuStderrBuf . String ( ) , "qemu stderr log" ) )
2023-08-25 16:54:58 +01:00
}
2023-08-25 15:12:19 +01:00
2023-08-25 16:54:58 +01:00
return nil
2023-08-25 15:12:19 +01:00
}
2023-08-27 13:44:57 +01:00
func ( vm * VM ) Cancel ( ) error {
if atomic . AddUint32 ( & vm . canceled , 1 ) != 1 {
2023-08-25 15:12:19 +01:00
return nil
}
2023-08-27 13:44:57 +01:00
vm . logger . Warn ( "Canceling the VM context" )
2023-08-25 19:55:11 +01:00
var gracefulOK bool
2023-08-27 13:44:57 +01:00
sc , err := vm . DialSSH ( )
2023-08-25 19:55:11 +01:00
if err != nil {
if ! errors . Is ( err , ErrSSHUnavailable ) {
2023-08-29 10:59:50 +01:00
vm . logger . Warn ( "Failed to dial VM SSH to do graceful shutdown" , "error" , err . Error ( ) )
2023-08-25 19:55:11 +01:00
}
} else {
2023-08-28 11:35:57 +02:00
vm . logger . Warn ( "Sending poweroff command to the VM" )
2023-08-29 14:24:18 +01:00
_ , err = sshutil . RunSSHCmd ( context . Background ( ) , sc , "poweroff" )
2023-08-25 19:55:11 +01:00
_ = sc . Close ( )
if err != nil {
2023-08-29 10:59:50 +01:00
vm . logger . Warn ( "Could not power off the VM safely" , "error" , err . Error ( ) )
2023-08-25 19:55:11 +01:00
} else {
2023-08-27 13:44:57 +01:00
vm . logger . Info ( "Shutting the VM down safely" )
2023-08-25 19:55:11 +01:00
}
}
var interruptErr error
if ! gracefulOK {
2023-08-28 11:35:57 +02:00
if vm . cmd . Process == nil {
interruptErr = fmt . Errorf ( "process is not started" )
} else {
2023-09-01 11:44:49 +01:00
interruptErr = osspecifics . TerminateProcess ( vm . cmd . Process . Pid )
2023-08-28 11:35:57 +02:00
}
2023-08-25 19:55:11 +01:00
}
2023-08-27 13:44:57 +01:00
vm . ctxCancel ( )
2023-08-25 15:12:19 +01:00
return multierr . Combine (
2023-08-25 19:55:11 +01:00
errors . Wrap ( interruptErr , "interrupt cmd" ) ,
2023-08-27 13:44:57 +01:00
errors . Wrap ( vm . serialRead . Close ( ) , "close serial read pipe" ) ,
errors . Wrap ( vm . serialWrite . Close ( ) , "close serial write pipe" ) ,
2023-08-25 15:12:19 +01:00
)
}
2023-08-27 13:44:57 +01:00
func ( vm * VM ) runSerialReader ( ) error {
2023-08-25 15:12:19 +01:00
for {
2023-08-27 13:44:57 +01:00
raw , err := vm . serialReader . ReadBytes ( '\n' )
2023-08-25 15:12:19 +01:00
if err != nil {
return errors . Wrap ( err , "read from serial reader" )
}
select {
2023-08-27 13:44:57 +01:00
case vm . serialStdoutCh <- raw :
2023-08-25 15:12:19 +01:00
default :
// Message gets discarded if the buffer is full.
}
}
}
2023-08-27 13:44:57 +01:00
func ( vm * VM ) writeSerial ( b [ ] byte ) error {
vm . serialWriteMu . Lock ( )
defer vm . serialWriteMu . Unlock ( )
2023-08-25 15:12:19 +01:00
2023-08-28 11:35:57 +02:00
// What do you see below is a workaround for the way how serial console
// is implemented in QEMU/Windows pair. Apparently they are using polling,
// and this will ensure that we do not write faster than the polling rate.
for i := range b {
_ , err := vm . serialWrite . Write ( [ ] byte { b [ i ] } )
2023-08-29 13:29:46 +01:00
time . Sleep ( time . Millisecond )
2023-08-28 11:35:57 +02:00
if err != nil {
return errors . Wrapf ( err , "write char #%v" , i )
}
}
return nil
2023-08-25 15:12:19 +01:00
}
2023-08-27 13:44:57 +01:00
func ( vm * VM ) runVMLoginHandler ( ) error {
2023-08-25 15:12:19 +01:00
for {
select {
2023-08-27 13:44:57 +01:00
case <- vm . ctx . Done ( ) :
2023-08-29 11:51:06 +01:00
return vm . ctx . Err ( )
2023-08-25 15:12:19 +01:00
case <- time . After ( time . Second ) :
2023-08-27 13:44:57 +01:00
peek , err := vm . serialReader . Peek ( vm . serialReader . Buffered ( ) )
2023-08-25 15:12:19 +01:00
if err != nil {
return errors . Wrap ( err , "peek stdout" )
}
if bytes . Contains ( peek , [ ] byte ( "login:" ) ) {
2023-08-27 13:44:57 +01:00
err = vm . writeSerial ( [ ] byte ( "root\n" ) )
2023-08-25 15:12:19 +01:00
if err != nil {
2023-09-01 16:40:13 +01:00
return errors . Wrap ( err , "stdio write login" )
2023-08-25 15:12:19 +01:00
}
2023-08-27 13:44:57 +01:00
vm . logger . Debug ( "Logged into the VM serial" )
2023-08-25 15:12:19 +01:00
return nil
}
}
}
}
2023-08-27 13:44:57 +01:00
func ( vm * VM ) resetSerialStdout ( ) {
vm . serialStdoutCh = make ( chan [ ] byte , 32 )
2023-08-25 15:12:19 +01:00
}
2023-08-29 11:51:06 +01:00
func ( vm * VM ) consumeSerialStdout ( ) [ ] byte {
buf := bytes . NewBuffer ( nil )
for {
select {
case data := <- vm . serialStdoutCh :
buf . Write ( data )
default :
return buf . Bytes ( )
}
}
}
2023-08-27 13:44:57 +01:00
func ( vm * VM ) DialSSH ( ) ( * ssh . Client , error ) {
if vm . sshConf == nil {
2023-08-25 15:12:19 +01:00
return nil , ErrSSHUnavailable
}
2023-08-27 13:44:57 +01:00
return ssh . Dial ( "tcp" , "localhost:" + fmt . Sprint ( vm . sshMappedPort ) , vm . sshConf )
2023-08-25 15:12:19 +01:00
}
2023-08-27 13:44:57 +01:00
func ( vm * VM ) DialSCP ( ) ( * scp . Client , error ) {
if vm . sshConf == nil {
2023-08-26 16:26:35 +01:00
return nil , ErrSSHUnavailable
}
2023-08-27 13:44:57 +01:00
sc := scp . NewClient ( "localhost:" + fmt . Sprint ( vm . sshMappedPort ) , vm . sshConf )
2023-08-26 16:26:35 +01:00
err := sc . Connect ( )
if err != nil {
2023-09-01 16:40:13 +01:00
return nil , errors . Wrap ( err , "connect" )
2023-08-26 16:26:35 +01:00
}
return & sc , nil
}
2023-08-27 13:44:57 +01:00
func ( vm * VM ) SSHUpNotifyChan ( ) chan struct { } {
return vm . sshReadyCh
2023-08-25 15:12:19 +01:00
}
2023-08-29 15:31:17 +01:00
// It's always a user's responsibility to ensure that no drives are mounted
// in both host and guest system. This should serve as the last resort.
func ( vm * VM ) runPeriodicHostMountChecker ( ) {
if len ( vm . originalCfg . PassthroughConfig . Block ) == 0 {
return
}
for {
select {
case <- vm . ctx . Done ( ) :
return
case <- time . After ( time . Second ) :
for _ , dev := range vm . originalCfg . PassthroughConfig . Block {
2023-09-01 11:44:49 +01:00
seemsMounted , err := osspecifics . CheckDeviceSeemsMounted ( dev . Path )
2023-08-29 15:31:17 +01:00
if err != nil {
vm . logger . Warn ( "Failed to check if a passed device seems to be mounted" , "dev-path" , dev . Path )
continue
}
if seemsMounted {
2023-08-29 15:37:09 +01:00
_ = vm . cmd . Process . Kill ( )
2023-08-29 15:31:17 +01:00
panic ( fmt . Sprintf ( "CRITICAL: Passed-through device '%v' appears to have been mounted on the host OS. Forcefully exiting now to prevent data corruption." , dev . Path ) )
}
}
}
}
}