Documentation
¶
Index ¶
- func GetPodAddr() (string, error)
- func GetProcCurrentCPUMillicores(cpuTime float64, prevCPUTime float64, systemCPUTime float64, ...) float64
- func GetSystemCPU() (float64, error)
- func IsCRIURestoreError(err error) bool
- func NewBackendRepositoryClient(ctx context.Context, config types.AppConfig, token string) (pb.BackendRepositoryServiceClient, error)
- func NewContainerRepositoryClient(ctx context.Context, config types.AppConfig, token string) (pb.ContainerRepositoryServiceClient, error)
- func NewWorkerRepositoryClient(ctx context.Context, config types.AppConfig, token string) (pb.WorkerRepositoryServiceClient, error)
- type AssignedGpuDevices
- type CRIUManager
- type CedanaCRIUManager
- type ContainerInstance
- type ContainerLogMessage
- type ContainerLogger
- type ContainerMountManager
- type ContainerNetworkManager
- func (m *ContainerNetworkManager) ExposePort(containerId string, hostPort, containerPort int) error
- func (m *ContainerNetworkManager) Setup(containerId string, spec *specs.Spec, request *types.ContainerRequest) error
- func (m *ContainerNetworkManager) TearDown(containerId string) error
- func (m *ContainerNetworkManager) UpdateNetworkPermissions(containerId string, request *types.ContainerRequest) error
- type ContainerNvidiaManager
- func (c *ContainerNvidiaManager) AssignGPUDevices(containerId string, gpuCount uint32) ([]int, error)
- func (c *ContainerNvidiaManager) GetContainerGPUDevices(containerId string) []int
- func (c *ContainerNvidiaManager) InjectEnvVars(env []string) []string
- func (c *ContainerNvidiaManager) InjectMounts(mounts []specs.Mount) []specs.Mount
- func (c *ContainerNvidiaManager) UnassignGPUDevices(containerId string)
- type ContainerOptions
- type ContainerResources
- type ContainerRuntimeServer
- func (s *ContainerRuntimeServer) ContainerArchive(req *pb.ContainerArchiveRequest, ...) error
- func (s *ContainerRuntimeServer) ContainerCheckpoint(ctx context.Context, in *pb.ContainerCheckpointRequest) (*pb.ContainerCheckpointResponse, error)
- func (s *ContainerRuntimeServer) ContainerExec(ctx context.Context, in *pb.ContainerExecRequest) (*pb.ContainerExecResponse, error)
- func (s *ContainerRuntimeServer) ContainerKill(ctx context.Context, in *pb.ContainerKillRequest) (*pb.ContainerKillResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxCreateDirectory(ctx context.Context, in *pb.ContainerSandboxCreateDirectoryRequest) (*pb.ContainerSandboxCreateDirectoryResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxDeleteDirectory(ctx context.Context, in *pb.ContainerSandboxDeleteDirectoryRequest) (*pb.ContainerSandboxDeleteDirectoryResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxDeleteFile(ctx context.Context, in *pb.ContainerSandboxDeleteFileRequest) (*pb.ContainerSandboxDeleteFileResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxDownloadFile(ctx context.Context, in *pb.ContainerSandboxDownloadFileRequest) (*pb.ContainerSandboxDownloadFileResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxExec(ctx context.Context, in *pb.ContainerSandboxExecRequest) (*pb.ContainerSandboxExecResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxExposePort(ctx context.Context, in *pb.ContainerSandboxExposePortRequest) (*pb.ContainerSandboxExposePortResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxFindInFiles(ctx context.Context, in *pb.ContainerSandboxFindInFilesRequest) (*pb.ContainerSandboxFindInFilesResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxKill(ctx context.Context, in *pb.ContainerSandboxKillRequest) (*pb.ContainerSandboxKillResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxListExposedPorts(ctx context.Context, in *pb.ContainerSandboxListExposedPortsRequest) (*pb.ContainerSandboxListExposedPortsResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxListFiles(ctx context.Context, in *pb.ContainerSandboxListFilesRequest) (*pb.ContainerSandboxListFilesResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxListProcesses(ctx context.Context, in *pb.ContainerSandboxListProcessesRequest) (*pb.ContainerSandboxListProcessesResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxReplaceInFiles(ctx context.Context, in *pb.ContainerSandboxReplaceInFilesRequest) (*pb.ContainerSandboxReplaceInFilesResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxStatFile(ctx context.Context, in *pb.ContainerSandboxStatFileRequest) (*pb.ContainerSandboxStatFileResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxStatus(ctx context.Context, in *pb.ContainerSandboxStatusRequest) (*pb.ContainerSandboxStatusResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxStderr(ctx context.Context, in *pb.ContainerSandboxStderrRequest) (*pb.ContainerSandboxStderrResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxStdout(ctx context.Context, in *pb.ContainerSandboxStdoutRequest) (*pb.ContainerSandboxStdoutResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxUpdateNetworkPermissions(ctx context.Context, in *pb.ContainerSandboxUpdateNetworkPermissionsRequest) (*pb.ContainerSandboxUpdateNetworkPermissionsResponse, error)
- func (s *ContainerRuntimeServer) ContainerSandboxUploadFile(ctx context.Context, in *pb.ContainerSandboxUploadFileRequest) (*pb.ContainerSandboxUploadFileResponse, error)
- func (s *ContainerRuntimeServer) ContainerStatus(ctx context.Context, in *pb.ContainerStatusRequest) (*pb.ContainerStatusResponse, error)
- func (s *ContainerRuntimeServer) ContainerStreamLogs(req *pb.ContainerStreamLogsRequest, ...) error
- func (s *ContainerRuntimeServer) ContainerSyncWorkspace(ctx context.Context, in *pb.SyncContainerWorkspaceRequest) (*pb.SyncContainerWorkspaceResponse, error)
- func (s *ContainerRuntimeServer) Start() error
- func (s *ContainerRuntimeServer) Stop() error
- type ContainerRuntimeServerOpts
- type CreateCheckpointOpts
- type ErrCRIURestoreFailed
- type FileCacheManager
- func (cm *FileCacheManager) CacheAvailable() bool
- func (cm *FileCacheManager) CacheFilesInPath(sourcePath string)
- func (cm *FileCacheManager) EnableVolumeCaching(workspaceName string, volumeCacheMap map[string]string, spec *specs.Spec) error
- func (cm *FileCacheManager) GetClient() *blobcache.BlobCacheClient
- type FileLock
- type GPUInfoClient
- type GPUInfoStat
- type GPUManager
- type GPUMemoryUsageStats
- type GvisorResources
- type ImageClient
- func (c *ImageClient) Archive(ctx context.Context, bundlePath *PathInfo, imageId string, ...) error
- func (c *ImageClient) BuildAndArchiveImage(ctx context.Context, outputLogger *slog.Logger, ...) error
- func (c *ImageClient) Cleanup() error
- func (c *ImageClient) GetCLIPImageMetadata(imageId string) (*clipCommon.ImageMetadata, bool)
- func (c *ImageClient) GetSourceImageRef(imageId string) (string, bool)
- func (c *ImageClient) PullAndArchiveImage(ctx context.Context, outputLogger *slog.Logger, ...) error
- func (c *ImageClient) PullLazy(ctx context.Context, request *types.ContainerRequest, ...) (time.Duration, error)
- type NvidiaCRIUManager
- type NvidiaInfoClient
- type PathInfo
- type ProcUtil
- type ProcessMonitor
- type ProcessStats
- type RestoreOpts
- type RuncResources
- type StagedFile
- type StandardResources
- type Worker
- type WorkerUsageMetrics
- type WorkspaceStorageManager
- func (sm *WorkspaceStorageManager) Cleanup() error
- func (sm *WorkspaceStorageManager) Create(workspaceName string, storage storage.Storage)
- func (sm *WorkspaceStorageManager) Mount(workspaceName string, workspaceStorage *types.WorkspaceStorage) (storage.Storage, error)
- func (sm *WorkspaceStorageManager) Unmount(workspaceName string) error
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func GetPodAddr ¶
GetPodAddr gets the IP from the POD_IP env var. Returns an error if it fails to retrieve an IP.
func GetSystemCPU ¶
func IsCRIURestoreError ¶
func NewBackendRepositoryClient ¶
func NewBackendRepositoryClient(ctx context.Context, config types.AppConfig, token string) (pb.BackendRepositoryServiceClient, error)
NewBackendRepositoryClient creates a new backend repository client
func NewContainerRepositoryClient ¶
func NewContainerRepositoryClient(ctx context.Context, config types.AppConfig, token string) (pb.ContainerRepositoryServiceClient, error)
NewContainerRepositoryClient creates a new container repository client
func NewWorkerRepositoryClient ¶
func NewWorkerRepositoryClient(ctx context.Context, config types.AppConfig, token string) (pb.WorkerRepositoryServiceClient, error)
NewWorkerRepositoryClient creates a new worker repository client
Types ¶
type AssignedGpuDevices ¶
type AssignedGpuDevices struct {
}
type CRIUManager ¶
type CRIUManager interface {
Available() bool
CreateCheckpoint(ctx context.Context, runtime runtime.Runtime, checkpointId string, request *types.ContainerRequest) (string, error)
RestoreCheckpoint(ctx context.Context, runtime runtime.Runtime, opts *RestoreOpts) (int, error)
}
func InitializeCRIUManager ¶
func InitializeCRIUManager(ctx context.Context, config types.CRIUConfig) (CRIUManager, error)
InitializeCRIUManager initializes a new CRIU manager that can be used to checkpoint and restore containers -- depending on the mode, it will use either cedana or nvidia cuda checkpoint under the hood
func InitializeNvidiaCRIU ¶
func InitializeNvidiaCRIU(ctx context.Context, config types.CRIUConfig) (CRIUManager, error)
type CedanaCRIUManager ¶
type CedanaCRIUManager struct {
// contains filtered or unexported fields
}
func InitializeCedanaCRIU ¶
func (*CedanaCRIUManager) Available ¶
func (c *CedanaCRIUManager) Available() bool
func (*CedanaCRIUManager) CreateCheckpoint ¶
func (c *CedanaCRIUManager) CreateCheckpoint(ctx context.Context, rt runtime.Runtime, checkpointId string, request *types.ContainerRequest) (string, error)
func (*CedanaCRIUManager) RestoreCheckpoint ¶
func (c *CedanaCRIUManager) RestoreCheckpoint(ctx context.Context, rt runtime.Runtime, opts *RestoreOpts) (int, error)
type ContainerInstance ¶
type ContainerInstance struct {
Id string
StubId string
BundlePath string
Overlay *common.ContainerOverlay
Spec *specs.Spec
Err error
ExitCode int
Port int
OutputWriter *common.OutputWriter
LogBuffer *common.LogBuffer
Request *types.ContainerRequest
StopReason types.StopContainerReason
SandboxProcessManager *goproc.GoProcClient
SandboxProcessManagerReady bool
ContainerIp string
Runtime runtime.Runtime
OOMWatcher runtime.OOMWatcher
}
type ContainerLogMessage ¶
type ContainerLogger ¶
type ContainerLogger struct {
// contains filtered or unexported fields
}
func (*ContainerLogger) CaptureLogs ¶
func (r *ContainerLogger) CaptureLogs(request *types.ContainerRequest, logChan chan common.LogRecord) error
type ContainerMountManager ¶
type ContainerMountManager struct {
// contains filtered or unexported fields
}
func NewContainerMountManager ¶
func NewContainerMountManager(config types.AppConfig) *ContainerMountManager
func (*ContainerMountManager) RemoveContainerMounts ¶
func (c *ContainerMountManager) RemoveContainerMounts(containerId string)
RemoveContainerMounts removes all mounts for a container
func (*ContainerMountManager) SetupContainerMounts ¶
func (c *ContainerMountManager) SetupContainerMounts(ctx context.Context, request *types.ContainerRequest, outputLogger *slog.Logger) error
SetupContainerMounts initializes any external storage for a container
type ContainerNetworkManager ¶
type ContainerNetworkManager struct {
// contains filtered or unexported fields
}
func NewContainerNetworkManager ¶
func NewContainerNetworkManager(ctx context.Context, workerId string, workerRepoClient pb.WorkerRepositoryServiceClient, containerRepoClient pb.ContainerRepositoryServiceClient, config types.AppConfig, containerInstances *common.SafeMap[*ContainerInstance]) (*ContainerNetworkManager, error)
func (*ContainerNetworkManager) ExposePort ¶
func (m *ContainerNetworkManager) ExposePort(containerId string, hostPort, containerPort int) error
func (*ContainerNetworkManager) Setup ¶
func (m *ContainerNetworkManager) Setup(containerId string, spec *specs.Spec, request *types.ContainerRequest) error
func (*ContainerNetworkManager) TearDown ¶
func (m *ContainerNetworkManager) TearDown(containerId string) error
func (*ContainerNetworkManager) UpdateNetworkPermissions ¶
func (m *ContainerNetworkManager) UpdateNetworkPermissions(containerId string, request *types.ContainerRequest) error
type ContainerNvidiaManager ¶
type ContainerNvidiaManager struct {
// contains filtered or unexported fields
}
func (*ContainerNvidiaManager) AssignGPUDevices ¶
func (c *ContainerNvidiaManager) AssignGPUDevices(containerId string, gpuCount uint32) ([]int, error)
func (*ContainerNvidiaManager) GetContainerGPUDevices ¶
func (c *ContainerNvidiaManager) GetContainerGPUDevices(containerId string) []int
func (*ContainerNvidiaManager) InjectEnvVars ¶
func (c *ContainerNvidiaManager) InjectEnvVars(env []string) []string
func (*ContainerNvidiaManager) InjectMounts ¶
func (c *ContainerNvidiaManager) InjectMounts(mounts []specs.Mount) []specs.Mount
func (*ContainerNvidiaManager) UnassignGPUDevices ¶
func (c *ContainerNvidiaManager) UnassignGPUDevices(containerId string)
type ContainerOptions ¶
type ContainerResources ¶
type ContainerResources interface {
GetCPU(request *types.ContainerRequest) *specs.LinuxCPU
GetMemory(request *types.ContainerRequest) *specs.LinuxMemory
}
type ContainerRuntimeServer ¶
type ContainerRuntimeServer struct {
pb.UnimplementedContainerServiceServer
// contains filtered or unexported fields
}
ContainerRuntimeServer is a runtime-agnostic container server that works with any OCI runtime
func NewContainerRuntimeServer ¶
func NewContainerRuntimeServer(opts *ContainerRuntimeServerOpts) (*ContainerRuntimeServer, error)
NewContainerRuntimeServer creates a new runtime-agnostic container server
func (*ContainerRuntimeServer) ContainerArchive ¶
func (s *ContainerRuntimeServer) ContainerArchive(req *pb.ContainerArchiveRequest, stream pb.ContainerService_ContainerArchiveServer) error
ContainerArchive archives a container's filesystem
func (*ContainerRuntimeServer) ContainerCheckpoint ¶
func (s *ContainerRuntimeServer) ContainerCheckpoint(ctx context.Context, in *pb.ContainerCheckpointRequest) (*pb.ContainerCheckpointResponse, error)
ContainerCheckpoint creates a checkpoint of a running container
func (*ContainerRuntimeServer) ContainerExec ¶
func (s *ContainerRuntimeServer) ContainerExec(ctx context.Context, in *pb.ContainerExecRequest) (*pb.ContainerExecResponse, error)
ContainerExec executes a command inside a running container
func (*ContainerRuntimeServer) ContainerKill ¶
func (s *ContainerRuntimeServer) ContainerKill(ctx context.Context, in *pb.ContainerKillRequest) (*pb.ContainerKillResponse, error)
ContainerKill kills and removes a container using the worker's configured runtime
func (*ContainerRuntimeServer) ContainerSandboxCreateDirectory ¶
func (s *ContainerRuntimeServer) ContainerSandboxCreateDirectory(ctx context.Context, in *pb.ContainerSandboxCreateDirectoryRequest) (*pb.ContainerSandboxCreateDirectoryResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxDeleteDirectory ¶
func (s *ContainerRuntimeServer) ContainerSandboxDeleteDirectory(ctx context.Context, in *pb.ContainerSandboxDeleteDirectoryRequest) (*pb.ContainerSandboxDeleteDirectoryResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxDeleteFile ¶
func (s *ContainerRuntimeServer) ContainerSandboxDeleteFile(ctx context.Context, in *pb.ContainerSandboxDeleteFileRequest) (*pb.ContainerSandboxDeleteFileResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxDownloadFile ¶
func (s *ContainerRuntimeServer) ContainerSandboxDownloadFile(ctx context.Context, in *pb.ContainerSandboxDownloadFileRequest) (*pb.ContainerSandboxDownloadFileResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxExec ¶
func (s *ContainerRuntimeServer) ContainerSandboxExec(ctx context.Context, in *pb.ContainerSandboxExecRequest) (*pb.ContainerSandboxExecResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxExposePort ¶
func (s *ContainerRuntimeServer) ContainerSandboxExposePort(ctx context.Context, in *pb.ContainerSandboxExposePortRequest) (*pb.ContainerSandboxExposePortResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxFindInFiles ¶
func (s *ContainerRuntimeServer) ContainerSandboxFindInFiles(ctx context.Context, in *pb.ContainerSandboxFindInFilesRequest) (*pb.ContainerSandboxFindInFilesResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxKill ¶
func (s *ContainerRuntimeServer) ContainerSandboxKill(ctx context.Context, in *pb.ContainerSandboxKillRequest) (*pb.ContainerSandboxKillResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxListExposedPorts ¶
func (s *ContainerRuntimeServer) ContainerSandboxListExposedPorts(ctx context.Context, in *pb.ContainerSandboxListExposedPortsRequest) (*pb.ContainerSandboxListExposedPortsResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxListFiles ¶
func (s *ContainerRuntimeServer) ContainerSandboxListFiles(ctx context.Context, in *pb.ContainerSandboxListFilesRequest) (*pb.ContainerSandboxListFilesResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxListProcesses ¶
func (s *ContainerRuntimeServer) ContainerSandboxListProcesses(ctx context.Context, in *pb.ContainerSandboxListProcessesRequest) (*pb.ContainerSandboxListProcessesResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxReplaceInFiles ¶
func (s *ContainerRuntimeServer) ContainerSandboxReplaceInFiles(ctx context.Context, in *pb.ContainerSandboxReplaceInFilesRequest) (*pb.ContainerSandboxReplaceInFilesResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxStatFile ¶
func (s *ContainerRuntimeServer) ContainerSandboxStatFile(ctx context.Context, in *pb.ContainerSandboxStatFileRequest) (*pb.ContainerSandboxStatFileResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxStatus ¶
func (s *ContainerRuntimeServer) ContainerSandboxStatus(ctx context.Context, in *pb.ContainerSandboxStatusRequest) (*pb.ContainerSandboxStatusResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxStderr ¶
func (s *ContainerRuntimeServer) ContainerSandboxStderr(ctx context.Context, in *pb.ContainerSandboxStderrRequest) (*pb.ContainerSandboxStderrResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxStdout ¶
func (s *ContainerRuntimeServer) ContainerSandboxStdout(ctx context.Context, in *pb.ContainerSandboxStdoutRequest) (*pb.ContainerSandboxStdoutResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxUpdateNetworkPermissions ¶
func (s *ContainerRuntimeServer) ContainerSandboxUpdateNetworkPermissions(ctx context.Context, in *pb.ContainerSandboxUpdateNetworkPermissionsRequest) (*pb.ContainerSandboxUpdateNetworkPermissionsResponse, error)
func (*ContainerRuntimeServer) ContainerSandboxUploadFile ¶
func (s *ContainerRuntimeServer) ContainerSandboxUploadFile(ctx context.Context, in *pb.ContainerSandboxUploadFileRequest) (*pb.ContainerSandboxUploadFileResponse, error)
func (*ContainerRuntimeServer) ContainerStatus ¶
func (s *ContainerRuntimeServer) ContainerStatus(ctx context.Context, in *pb.ContainerStatusRequest) (*pb.ContainerStatusResponse, error)
ContainerStatus returns the status of a container
func (*ContainerRuntimeServer) ContainerStreamLogs ¶
func (s *ContainerRuntimeServer) ContainerStreamLogs(req *pb.ContainerStreamLogsRequest, stream pb.ContainerService_ContainerStreamLogsServer) error
ContainerStreamLogs streams container logs
func (*ContainerRuntimeServer) ContainerSyncWorkspace ¶
func (s *ContainerRuntimeServer) ContainerSyncWorkspace(ctx context.Context, in *pb.SyncContainerWorkspaceRequest) (*pb.SyncContainerWorkspaceResponse, error)
ContainerSyncWorkspace syncs workspace files
func (*ContainerRuntimeServer) Start ¶
func (s *ContainerRuntimeServer) Start() error
func (*ContainerRuntimeServer) Stop ¶
func (s *ContainerRuntimeServer) Stop() error
type ContainerRuntimeServerOpts ¶
type ContainerRuntimeServerOpts struct {
PodAddr string
Runtime runtime.Runtime // The runtime configured for this worker pool
ContainerInstances *common.SafeMap[*ContainerInstance]
ImageClient *ImageClient
ContainerRepoClient pb.ContainerRepositoryServiceClient
ContainerNetworkManager *ContainerNetworkManager
CreateCheckpoint func(ctx context.Context, opts *CreateCheckpointOpts) error
}
type CreateCheckpointOpts ¶
type ErrCRIURestoreFailed ¶
type ErrCRIURestoreFailed struct {
Stderr string
}
func (*ErrCRIURestoreFailed) Error ¶
func (e *ErrCRIURestoreFailed) Error() string
type FileCacheManager ¶
type FileCacheManager struct {
// contains filtered or unexported fields
}
func NewFileCacheManager ¶
func NewFileCacheManager(config types.AppConfig, client *blobcache.BlobCacheClient) *FileCacheManager
func (*FileCacheManager) CacheAvailable ¶
func (cm *FileCacheManager) CacheAvailable() bool
CacheAvailable checks if the file cache is available
func (*FileCacheManager) CacheFilesInPath ¶
func (cm *FileCacheManager) CacheFilesInPath(sourcePath string)
CacheFilesInPath caches files from a specified source path
func (*FileCacheManager) EnableVolumeCaching ¶
func (*FileCacheManager) GetClient ¶
func (cm *FileCacheManager) GetClient() *blobcache.BlobCacheClient
GetClient returns the blobcache client instance.
type FileLock ¶
type FileLock struct {
// contains filtered or unexported fields
}
func NewFileLock ¶
type GPUInfoClient ¶
type GPUInfoClient interface {
AvailableGPUDevices() ([]int, error)
GetGPUMemoryUsage(deviceIndex int) (GPUMemoryUsageStats, error)
}
type GPUInfoStat ¶
type GPUManager ¶
type GPUManager interface {
AssignGPUDevices(containerId string, gpuCount uint32) ([]int, error)
GetContainerGPUDevices(containerId string) []int
UnassignGPUDevices(containerId string)
InjectEnvVars(env []string) []string
InjectMounts(mounts []specs.Mount) []specs.Mount
}
func NewContainerNvidiaManager ¶
func NewContainerNvidiaManager(gpuCount uint32) GPUManager
type GPUMemoryUsageStats ¶
type GvisorResources ¶
type GvisorResources struct {
*StandardResources
}
func NewGvisorResources ¶
func NewGvisorResources() *GvisorResources
func (*GvisorResources) GetCPU ¶
func (g *GvisorResources) GetCPU(request *types.ContainerRequest) *specs.LinuxCPU
func (*GvisorResources) GetMemory ¶
func (g *GvisorResources) GetMemory(request *types.ContainerRequest) *specs.LinuxMemory
type ImageClient ¶
type ImageClient struct {
// contains filtered or unexported fields
}
func NewImageClient ¶
func NewImageClient(config types.AppConfig, workerId string, workerRepoClient pb.WorkerRepositoryServiceClient, fileCacheManager *FileCacheManager) (*ImageClient, error)
func (*ImageClient) Archive ¶
func (c *ImageClient) Archive(ctx context.Context, bundlePath *PathInfo, imageId string, progressChan chan int) error
Generate and upload archived version of the image for distribution
func (*ImageClient) BuildAndArchiveImage ¶
func (c *ImageClient) BuildAndArchiveImage(ctx context.Context, outputLogger *slog.Logger, request *types.ContainerRequest) error
func (*ImageClient) Cleanup ¶
func (c *ImageClient) Cleanup() error
func (*ImageClient) GetCLIPImageMetadata ¶
func (c *ImageClient) GetCLIPImageMetadata(imageId string) (*clipCommon.ImageMetadata, bool)
GetCLIPImageMetadata extracts CLIP image metadata from the archive
func (*ImageClient) GetSourceImageRef ¶
func (c *ImageClient) GetSourceImageRef(imageId string) (string, bool)
GetSourceImageRef retrieves the cached source image reference for a v2 image
func (*ImageClient) PullAndArchiveImage ¶
func (c *ImageClient) PullAndArchiveImage(ctx context.Context, outputLogger *slog.Logger, request *types.ContainerRequest) error
type NvidiaCRIUManager ¶
type NvidiaCRIUManager struct {
// contains filtered or unexported fields
}
func (*NvidiaCRIUManager) Available ¶
func (c *NvidiaCRIUManager) Available() bool
func (*NvidiaCRIUManager) CreateCheckpoint ¶
func (c *NvidiaCRIUManager) CreateCheckpoint(ctx context.Context, rt runtime.Runtime, checkpointId string, request *types.ContainerRequest) (string, error)
func (*NvidiaCRIUManager) RestoreCheckpoint ¶
func (c *NvidiaCRIUManager) RestoreCheckpoint(ctx context.Context, rt runtime.Runtime, opts *RestoreOpts) (int, error)
type NvidiaInfoClient ¶
type NvidiaInfoClient struct{}
func (*NvidiaInfoClient) AvailableGPUDevices ¶
func (c *NvidiaInfoClient) AvailableGPUDevices() ([]int, error)
func (*NvidiaInfoClient) GetGPUMemoryUsage ¶
func (c *NvidiaInfoClient) GetGPUMemoryUsage(deviceIndex int) (GPUMemoryUsageStats, error)
GetGpuMemoryUsage retrieves the memory usage of a specific NVIDIA GPU. It returns the total and used memory in bytes.
type PathInfo ¶
type PathInfo struct {
Path string
// contains filtered or unexported fields
}
func NewPathInfo ¶
type ProcUtil ¶
func NewProcUtil ¶
type ProcessMonitor ¶
type ProcessMonitor struct {
// contains filtered or unexported fields
}
func NewProcessMonitor ¶
func NewProcessMonitor(pid int, devices []specs.LinuxDeviceCgroup, gpuDeviceIds []int) *ProcessMonitor
func (*ProcessMonitor) GetStatistics ¶
func (m *ProcessMonitor) GetStatistics() (*ProcessStats, error)
type ProcessStats ¶
type ProcessStats struct {
CPU uint64 // in millicores
Memory process.MemoryInfoStat
IO process.IOCountersStat
NetIO net.IOCountersStat
GPU GPUInfoStat
}
type RestoreOpts ¶
type RestoreOpts struct {
// contains filtered or unexported fields
}
type RuncResources ¶
type RuncResources struct {
*StandardResources
}
func NewRuncResources ¶
func NewRuncResources() *RuncResources
type StagedFile ¶
type StandardResources ¶
type StandardResources struct {
// contains filtered or unexported fields
}
func NewStandardResources ¶
func NewStandardResources() *StandardResources
func (*StandardResources) GetCPU ¶
func (r *StandardResources) GetCPU(request *types.ContainerRequest) *specs.LinuxCPU
func (*StandardResources) GetMemory ¶
func (r *StandardResources) GetMemory(request *types.ContainerRequest) *specs.LinuxMemory
type Worker ¶
type Worker struct {
// contains filtered or unexported fields
}
func (*Worker) IsCRIUAvailable ¶
func (*Worker) RunContainer ¶
Spawn a single container and stream output to stdout/stderr
type WorkerUsageMetrics ¶
type WorkerUsageMetrics struct {
// contains filtered or unexported fields
}
func NewWorkerUsageMetrics ¶
func NewWorkerUsageMetrics( ctx context.Context, workerId string, config types.MonitoringConfig, gpuType string, ) (*WorkerUsageMetrics, error)
func (*WorkerUsageMetrics) EmitContainerUsage ¶
func (wm *WorkerUsageMetrics) EmitContainerUsage(ctx context.Context, request *types.ContainerRequest)
Periodically send metrics to track container duration
type WorkspaceStorageManager ¶
type WorkspaceStorageManager struct {
// contains filtered or unexported fields
}
func NewWorkspaceStorageManager ¶
func NewWorkspaceStorageManager(ctx context.Context, config types.StorageConfig, poolConfig types.WorkerPoolConfig, containerInstances *common.SafeMap[*ContainerInstance], cacheClient *blobcache.BlobCacheClient) (*WorkspaceStorageManager, error)
func (*WorkspaceStorageManager) Cleanup ¶
func (sm *WorkspaceStorageManager) Cleanup() error
func (*WorkspaceStorageManager) Create ¶
func (sm *WorkspaceStorageManager) Create(workspaceName string, storage storage.Storage)
func (*WorkspaceStorageManager) Mount ¶
func (sm *WorkspaceStorageManager) Mount(workspaceName string, workspaceStorage *types.WorkspaceStorage) (storage.Storage, error)
func (*WorkspaceStorageManager) Unmount ¶
func (sm *WorkspaceStorageManager) Unmount(workspaceName string) error