exporter

package
v0.0.0-...-2ef1dc1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 27, 2025 License: Apache-2.0 Imports: 21 Imported by: 0

Documentation

Overview

SPDX-FileCopyrightText: 2023 Rivos Inc.

SPDX-License-Identifier: Apache-2.0

SPDX-FileCopyrightText: 2023 Rivos Inc.

SPDX-License-Identifier: Apache-2.0

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func InitPromServer

func InitPromServer(config *Config) http.Handler

func MemToFloat

func MemToFloat(mem string) (float64, error)

convert slurm mem string to float64 bytes

Types

type AccountCsvFetcher

type AccountCsvFetcher struct {
	// contains filtered or unexported fields
}

func (*AccountCsvFetcher) FetchMetrics

func (acf *AccountCsvFetcher) FetchMetrics() ([]AccountLimitMetric, error)

func (*AccountCsvFetcher) ScrapeDuration

func (acf *AccountCsvFetcher) ScrapeDuration() time.Duration

func (*AccountCsvFetcher) ScrapeError

func (acf *AccountCsvFetcher) ScrapeError() prometheus.Counter

type AccountLimitMetric

type AccountLimitMetric struct {
	Account string
	// limit to the amount of resources for a particular account in the RUNNING state
	AllocatedMem  float64
	AllocatedCPU  float64
	AllocatedJobs float64
	// limit to the amount of resources that can be either PENDING or RUNNING
	TotalJobs float64
}

type AccountMetric

type AccountMetric struct {
	// contains filtered or unexported fields
}

type AtomicProcFetcher

type AtomicProcFetcher struct {
	sync.Mutex
	Info map[int64]*TraceInfo
	// contains filtered or unexported fields
}

func NewAtomicProFetcher

func NewAtomicProFetcher(sampleRate uint64) *AtomicProcFetcher

func (*AtomicProcFetcher) Add

func (m *AtomicProcFetcher) Add(trace *TraceInfo) error

func (*AtomicProcFetcher) Fetch

func (m *AtomicProcFetcher) Fetch() map[int64]*TraceInfo

type AtomicThrottledCache

type AtomicThrottledCache[C SlurmPrimitiveMetric] struct {
	sync.Mutex
	// contains filtered or unexported fields
}

func NewAtomicThrottledCache

func NewAtomicThrottledCache[C SlurmPrimitiveMetric](limit float64) *AtomicThrottledCache[C]

func (*AtomicThrottledCache[C]) FetchOrThrottle

func (atc *AtomicThrottledCache[C]) FetchOrThrottle(fetchFunc func() ([]C, error)) ([]C, error)

atomic fetch of either the cache or the collector reset & hydrate as necessary

type CliFlags

type CliFlags struct {
	SlurmLicEnabled      bool
	SlurmDiagEnabled     bool
	SlurmCliFallback     bool
	TraceEnabled         bool
	SacctEnabled         bool
	SlurmPollLimit       float64
	LogLevel             string
	ListenAddress        string
	MetricsPath          string
	SlurmSqueueOverride  string
	SlurmSinfoOverride   string
	SlurmDiagOverride    string
	SlurmAcctOverride    string
	TraceRate            uint64
	TracePath            string
	SlurmLicenseOverride string
}

type CliJsonLicMetricFetcher

type CliJsonLicMetricFetcher struct {
	// contains filtered or unexported fields
}

func (*CliJsonLicMetricFetcher) FetchMetrics

func (cjl *CliJsonLicMetricFetcher) FetchMetrics() ([]LicenseMetric, error)

func (*CliJsonLicMetricFetcher) ScrapeDuration

func (cjl *CliJsonLicMetricFetcher) ScrapeDuration() time.Duration

func (*CliJsonLicMetricFetcher) ScrapeError

func (cjl *CliJsonLicMetricFetcher) ScrapeError() prometheus.Counter

type CliOpts

type CliOpts struct {
	// contains filtered or unexported fields
}

type CliScraper

type CliScraper struct {
	// contains filtered or unexported fields
}

implements SlurmByteScraper by fetch data from cli

func NewCliScraper

func NewCliScraper(args ...string) *CliScraper

func (*CliScraper) Duration

func (cf *CliScraper) Duration() time.Duration

func (*CliScraper) FetchRawBytes

func (cf *CliScraper) FetchRawBytes() ([]byte, error)

type CoercedInt

type CoercedInt int

func (*CoercedInt) UnmarshalJSON

func (ci *CoercedInt) UnmarshalJSON(data []byte) error

type Config

type Config struct {
	TraceConf     *TraceConfig
	PollLimit     float64
	LogLevel      slog.Level
	ListenAddress string
	MetricsPath   string
	// contains filtered or unexported fields
}

func NewConfig

func NewConfig(cliFlags *CliFlags) (*Config, error)

type CpuSummaryMetric

type CpuSummaryMetric struct {
	Total    float64
	Idle     float64
	Load     float64
	PerState map[string]*PerStateMetric
}

type DiagMetric

type DiagMetric struct {
	ServerThreadCount     int              `json:"server_thread_count"`
	DBDAgentQueueSize     int              `json:"dbd_agent_queue_size"`
	RpcByUser             []UserRpcInfo    `json:"rpcs_by_user"`
	RpcByMessageType      []MessageRpcInfo `json:"rpcs_by_message_type"`
	BackfillJobCount      int              `json:"bf_backfilled_jobs"`
	BackfillCycleCountSum int              `json:"bf_cycle_sum"`
	BackfillCycleCounter  int              `json:"bf_cycle_counter"`
	BackfillLastDepth     int              `json:"bf_last_depth"`
	BackfillLastDepthTry  int              `json:"bf_last_depth_try"`
}

type DiagnosticsCollector

type DiagnosticsCollector struct {
	// contains filtered or unexported fields
}

func NewDiagsCollector

func NewDiagsCollector(config *Config) *DiagnosticsCollector

func (*DiagnosticsCollector) Collect

func (sc *DiagnosticsCollector) Collect(ch chan<- prometheus.Metric)

func (*DiagnosticsCollector) Describe

func (sc *DiagnosticsCollector) Describe(ch chan<- *prometheus.Desc)

type FeatureJobMetric

type FeatureJobMetric struct {
	// contains filtered or unexported fields
}

type IntFromOptionalStruct

type IntFromOptionalStruct int

func (*IntFromOptionalStruct) UnmarshalJSON

func (ffoo *IntFromOptionalStruct) UnmarshalJSON(data []byte) error

type JobCliFallbackFetcher

type JobCliFallbackFetcher struct {
	// contains filtered or unexported fields
}

func (*JobCliFallbackFetcher) FetchMetrics

func (jcf *JobCliFallbackFetcher) FetchMetrics() ([]JobMetric, error)

func (*JobCliFallbackFetcher) ScrapeDuration

func (jcf *JobCliFallbackFetcher) ScrapeDuration() time.Duration

func (*JobCliFallbackFetcher) ScrapeError

func (jcf *JobCliFallbackFetcher) ScrapeError() prometheus.Counter

type JobJsonFetcher

type JobJsonFetcher struct {
	// contains filtered or unexported fields
}

func (*JobJsonFetcher) FetchMetrics

func (jjf *JobJsonFetcher) FetchMetrics() ([]JobMetric, error)

func (*JobJsonFetcher) ScrapeDuration

func (jjf *JobJsonFetcher) ScrapeDuration() time.Duration

func (*JobJsonFetcher) ScrapeError

func (jjf *JobJsonFetcher) ScrapeError() prometheus.Counter

type JobMetric

type JobMetric struct {
	Account      string      `json:"account"`
	JobId        float64     `json:"job_id"`
	EndTime      float64     `json:"end_time"`
	JobState     string      `json:"job_state"`
	Partition    string      `json:"partition"`
	UserName     string      `json:"user_name"`
	Features     string      `json:"features"`
	JobResources JobResource `json:"job_resources"`
	StateReason  string      `json:"state_reason"`
}

type JobResource

type JobResource struct {
	AllocCpus  float64                  `json:"allocated_cpus"`
	AllocNodes map[string]*NodeResource `json:"allocated_nodes"`
}

type JobsCollector

type JobsCollector struct {
	// contains filtered or unexported fields
}

func NewJobsController

func NewJobsController(config *Config) *JobsCollector

func (*JobsCollector) Collect

func (jc *JobsCollector) Collect(ch chan<- prometheus.Metric)

func (*JobsCollector) Describe

func (jc *JobsCollector) Describe(ch chan<- *prometheus.Desc)

func (*JobsCollector) SetFetcher

func (jc *JobsCollector) SetFetcher(fetcher SlurmMetricFetcher[JobMetric])

type LicCollector

type LicCollector struct {
	// contains filtered or unexported fields
}

func NewLicCollector

func NewLicCollector(config *Config) *LicCollector

func (*LicCollector) Collect

func (lc *LicCollector) Collect(ch chan<- prometheus.Metric)

func (*LicCollector) Describe

func (lc *LicCollector) Describe(ch chan<- *prometheus.Desc)

type LicenseMetric

type LicenseMetric struct {
	LicenseName  string `json:"LicenseName"`
	Total        int    `json:"Total"`
	Used         int    `json:"Used"`
	Free         int    `json:"Free"`
	Remote       bool   `json:"Remote"`
	Reserved     int    `json:"Reserved"`
	LastConsumed int    `json:"LastConsumed"`
	LastDeficit  int    `json:"LastDeficit"`
}

type LimitCollector

type LimitCollector struct {
	// contains filtered or unexported fields
}

func NewLimitCollector

func NewLimitCollector(config *Config) *LimitCollector

func (*LimitCollector) Collect

func (lc *LimitCollector) Collect(ch chan<- prometheus.Metric)

func (*LimitCollector) Describe

func (lc *LimitCollector) Describe(ch chan<- *prometheus.Desc)

type MemSummaryMetric

type MemSummaryMetric struct {
	AllocMemory float64
	FreeMemory  float64
	RealMemory  float64
}

type MessageRpcInfo

type MessageRpcInfo struct {
	MessageType string                `json:"message_type"`
	TypeId      int                   `json:"type_id"`
	Count       int                   `json:"count"`
	AvgTime     IntFromOptionalStruct `json:"average_time"`
	TotalTime   int                   `json:"total_time"`
}

type MockFetchErrored

type MockFetchErrored struct{}

func (*MockFetchErrored) Duration

func (f *MockFetchErrored) Duration() time.Duration

func (*MockFetchErrored) FetchRawBytes

func (f *MockFetchErrored) FetchRawBytes() ([]byte, error)

type MockScraper

type MockScraper struct {
	CallCount int
	// contains filtered or unexported fields
}

implements SlurmByteScraper by pulling fixtures instead used exclusively for testing

func (*MockScraper) Duration

func (f *MockScraper) Duration() time.Duration

func (*MockScraper) FetchRawBytes

func (f *MockScraper) FetchRawBytes() ([]byte, error)

type NAbleFloat

type NAbleFloat float64

func (*NAbleFloat) UnmarshalJSON

func (naf *NAbleFloat) UnmarshalJSON(data []byte) error

type NAbleTime

type NAbleTime struct{ time.Time }

func (*NAbleTime) UnmarshalJSON

func (nat *NAbleTime) UnmarshalJSON(data []byte) error

report beginning of time in the case of N/A

type NodeCliFallbackFetcher

type NodeCliFallbackFetcher struct {
	// contains filtered or unexported fields
}

func (*NodeCliFallbackFetcher) FetchMetrics

func (cmf *NodeCliFallbackFetcher) FetchMetrics() ([]NodeMetric, error)

func (*NodeCliFallbackFetcher) ScrapeDuration

func (cmf *NodeCliFallbackFetcher) ScrapeDuration() time.Duration

func (*NodeCliFallbackFetcher) ScrapeError

func (cmf *NodeCliFallbackFetcher) ScrapeError() prometheus.Counter

type NodeJsonFetcher

type NodeJsonFetcher struct {
	// contains filtered or unexported fields
}

func (*NodeJsonFetcher) FetchMetrics

func (cmf *NodeJsonFetcher) FetchMetrics() ([]NodeMetric, error)

func (*NodeJsonFetcher) ScrapeDuration

func (cmf *NodeJsonFetcher) ScrapeDuration() time.Duration

func (*NodeJsonFetcher) ScrapeError

func (cmf *NodeJsonFetcher) ScrapeError() prometheus.Counter

type NodeMetric

type NodeMetric struct {
	AllocMemory float64  `json:"alloc_memory"`
	AllocCpus   float64  `json:"alloc_cpus"`
	Cpus        float64  `json:"cpus"`
	CpuLoad     float64  `json:"cpu_load"`
	FreeMemory  float64  `json:"free_memory"`
	Hostname    string   `json:"hostname"`
	IdleCpus    float64  `json:"idle_cpus"`
	Partitions  []string `json:"partitions"`
	RealMemory  float64  `json:"real_memory"`
	State       string   `json:"state"`
	Weight      float64  `json:"weight"`
}

type NodeResource

type NodeResource struct {
	Mem float64 `json:"memory"`
}

type NodesCollector

type NodesCollector struct {
	// contains filtered or unexported fields
}

func NewNodeCollecter

func NewNodeCollecter(config *Config) *NodesCollector

func (*NodesCollector) Collect

func (nc *NodesCollector) Collect(ch chan<- prometheus.Metric)

func (*NodesCollector) Describe

func (nc *NodesCollector) Describe(ch chan<- *prometheus.Desc)

func (*NodesCollector) SetFetcher

func (nc *NodesCollector) SetFetcher(fetcher SlurmMetricFetcher[NodeMetric])

type PartitionJobMetric

type PartitionJobMetric struct {
	// contains filtered or unexported fields
}

type PartitionMetric

type PartitionMetric struct {
	TotalCpus        float64
	RealMemory       float64
	FreeMemory       float64
	StateAllocMemory map[string]float64
	StateAllocCpus   map[string]float64
	StateNodeCount   map[string]float64
	CpuLoad          float64
	IdleCpus         float64
	Weight           float64
}

type PerStateMetric

type PerStateMetric struct {
	Cpus  float64
	Count float64
}

type SdiagResponse

type SdiagResponse struct {
	// Response coercible between slurm 23 and 24 data versions
	Meta struct {
		SlurmVersion SlurmVersion      `json:"Slurm"`
		Plugins      map[string]string `json:"plugins"`
		Plugin       map[string]string `json:"plugin"`
	} `json:"meta"`
	Statistics DiagMetric
	Errors     []string `json:"errors"`
	Warnings   []string `json:"warnings"`
}

func (*SdiagResponse) IsDataParserPlugin

func (sr *SdiagResponse) IsDataParserPlugin() bool

type SlurmByteScraper

type SlurmByteScraper interface {
	FetchRawBytes() ([]byte, error)
	Duration() time.Duration
}

interface for getting data from slurm used for dep injection/ease of testing & for add slurmrestd support later

type SlurmMetricFetcher

type SlurmMetricFetcher[M SlurmPrimitiveMetric] interface {
	FetchMetrics() ([]M, error)
	ScrapeDuration() time.Duration
	ScrapeError() prometheus.Counter
}

type SlurmPrimitiveMetric

type SlurmPrimitiveMetric interface {
	NodeMetric | JobMetric | DiagMetric | LicenseMetric | AccountLimitMetric
}

type SlurmVersion

type SlurmVersion struct {
	Version struct {
		Major CoercedInt `json:"major"`
		Micro CoercedInt `json:"micro"`
		Minor CoercedInt `json:"minor"`
	} `json:"version"`
	Release string `json:"release"`
}

type StateReasonMetric

type StateReasonMetric struct {
	// contains filtered or unexported fields
}

type StringByteScraper

type StringByteScraper struct {
	Callcount int
	// contains filtered or unexported fields
}

implements SlurmByteScraper by emmiting string payload instead used exclusively for testing

func (*StringByteScraper) Duration

func (es *StringByteScraper) Duration() time.Duration

func (*StringByteScraper) FetchRawBytes

func (es *StringByteScraper) FetchRawBytes() ([]byte, error)

type TraceCollector

type TraceCollector struct {
	ProcessFetcher *AtomicProcFetcher
	// contains filtered or unexported fields
}

func NewTraceCollector

func NewTraceCollector(config *Config) *TraceCollector

func (*TraceCollector) Collect

func (c *TraceCollector) Collect(ch chan<- prometheus.Metric)

func (*TraceCollector) Describe

func (c *TraceCollector) Describe(ch chan<- *prometheus.Desc)

type TraceConfig

type TraceConfig struct {
	// contains filtered or unexported fields
}

type TraceInfo

type TraceInfo struct {
	JobId      int64   `json:"job_id"`
	Pid        int64   `json:"pid"`
	Cpus       float64 `json:"cpus"`
	WriteBytes float64 `json:"write_bytes"`
	ReadBytes  float64 `json:"read_bytes"`
	Threads    float64 `json:"threads"`
	Mem        float64 `json:"mem"`
	Username   string  `json:"username"`
	Hostname   string  `json:"hostname"`
	// contains filtered or unexported fields
}

store a jobs published proc stats

type UserJobMetric

type UserJobMetric struct {
	// contains filtered or unexported fields
}

type UserRpcInfo

type UserRpcInfo struct {
	User      string                `json:"user"`
	UserId    int                   `json:"user_id"`
	Count     int                   `json:"count"`
	AvgTime   IntFromOptionalStruct `json:"average_time"`
	TotalTime int                   `json:"total_time"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL