Documentation
¶
Overview ¶
SPDX-FileCopyrightText: 2023 Rivos Inc.
SPDX-License-Identifier: Apache-2.0
SPDX-FileCopyrightText: 2023 Rivos Inc.
SPDX-License-Identifier: Apache-2.0
Index ¶
- func InitPromServer(config *Config) http.Handler
- func MemToFloat(mem string) (float64, error)
- type AccountCsvFetcher
- type AccountLimitMetric
- type AccountMetric
- type AtomicProcFetcher
- type AtomicThrottledCache
- type CliFlags
- type CliJsonLicMetricFetcher
- type CliOpts
- type CliScraper
- type CoercedInt
- type Config
- type CpuSummaryMetric
- type DiagMetric
- type DiagnosticsCollector
- type FeatureJobMetric
- type IntFromOptionalStruct
- type JobCliFallbackFetcher
- type JobJsonFetcher
- type JobMetric
- type JobResource
- type JobsCollector
- type LicCollector
- type LicenseMetric
- type LimitCollector
- type MemSummaryMetric
- type MessageRpcInfo
- type MockFetchErrored
- type MockScraper
- type NAbleFloat
- type NAbleTime
- type NodeCliFallbackFetcher
- type NodeJsonFetcher
- type NodeMetric
- type NodeResource
- type NodesCollector
- type PartitionJobMetric
- type PartitionMetric
- type PerStateMetric
- type SdiagResponse
- type SlurmByteScraper
- type SlurmMetricFetcher
- type SlurmPrimitiveMetric
- type SlurmVersion
- type StateReasonMetric
- type StringByteScraper
- type TraceCollector
- type TraceConfig
- type TraceInfo
- type UserJobMetric
- type UserRpcInfo
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func InitPromServer ¶
func MemToFloat ¶
convert slurm mem string to float64 bytes
Types ¶
type AccountCsvFetcher ¶
type AccountCsvFetcher struct {
// contains filtered or unexported fields
}
func (*AccountCsvFetcher) FetchMetrics ¶
func (acf *AccountCsvFetcher) FetchMetrics() ([]AccountLimitMetric, error)
func (*AccountCsvFetcher) ScrapeDuration ¶
func (acf *AccountCsvFetcher) ScrapeDuration() time.Duration
func (*AccountCsvFetcher) ScrapeError ¶
func (acf *AccountCsvFetcher) ScrapeError() prometheus.Counter
type AccountLimitMetric ¶
type AccountMetric ¶
type AccountMetric struct {
// contains filtered or unexported fields
}
type AtomicProcFetcher ¶
type AtomicProcFetcher struct {
sync.Mutex
Info map[int64]*TraceInfo
// contains filtered or unexported fields
}
func NewAtomicProFetcher ¶
func NewAtomicProFetcher(sampleRate uint64) *AtomicProcFetcher
func (*AtomicProcFetcher) Add ¶
func (m *AtomicProcFetcher) Add(trace *TraceInfo) error
func (*AtomicProcFetcher) Fetch ¶
func (m *AtomicProcFetcher) Fetch() map[int64]*TraceInfo
type AtomicThrottledCache ¶
type AtomicThrottledCache[C SlurmPrimitiveMetric] struct { sync.Mutex // contains filtered or unexported fields }
func NewAtomicThrottledCache ¶
func NewAtomicThrottledCache[C SlurmPrimitiveMetric](limit float64) *AtomicThrottledCache[C]
func (*AtomicThrottledCache[C]) FetchOrThrottle ¶
func (atc *AtomicThrottledCache[C]) FetchOrThrottle(fetchFunc func() ([]C, error)) ([]C, error)
atomic fetch of either the cache or the collector reset & hydrate as necessary
type CliFlags ¶
type CliFlags struct {
SlurmLicEnabled bool
SlurmDiagEnabled bool
SlurmCliFallback bool
TraceEnabled bool
SacctEnabled bool
SlurmPollLimit float64
LogLevel string
ListenAddress string
MetricsPath string
SlurmSqueueOverride string
SlurmSinfoOverride string
SlurmDiagOverride string
SlurmAcctOverride string
TraceRate uint64
TracePath string
SlurmLicenseOverride string
}
type CliJsonLicMetricFetcher ¶
type CliJsonLicMetricFetcher struct {
// contains filtered or unexported fields
}
func (*CliJsonLicMetricFetcher) FetchMetrics ¶
func (cjl *CliJsonLicMetricFetcher) FetchMetrics() ([]LicenseMetric, error)
func (*CliJsonLicMetricFetcher) ScrapeDuration ¶
func (cjl *CliJsonLicMetricFetcher) ScrapeDuration() time.Duration
func (*CliJsonLicMetricFetcher) ScrapeError ¶
func (cjl *CliJsonLicMetricFetcher) ScrapeError() prometheus.Counter
type CliScraper ¶
type CliScraper struct {
// contains filtered or unexported fields
}
implements SlurmByteScraper by fetch data from cli
func NewCliScraper ¶
func NewCliScraper(args ...string) *CliScraper
func (*CliScraper) Duration ¶
func (cf *CliScraper) Duration() time.Duration
func (*CliScraper) FetchRawBytes ¶
func (cf *CliScraper) FetchRawBytes() ([]byte, error)
type CoercedInt ¶
type CoercedInt int
func (*CoercedInt) UnmarshalJSON ¶
func (ci *CoercedInt) UnmarshalJSON(data []byte) error
type Config ¶
type CpuSummaryMetric ¶
type CpuSummaryMetric struct {
Total float64
Idle float64
Load float64
PerState map[string]*PerStateMetric
}
type DiagMetric ¶
type DiagMetric struct {
ServerThreadCount int `json:"server_thread_count"`
DBDAgentQueueSize int `json:"dbd_agent_queue_size"`
RpcByUser []UserRpcInfo `json:"rpcs_by_user"`
RpcByMessageType []MessageRpcInfo `json:"rpcs_by_message_type"`
BackfillJobCount int `json:"bf_backfilled_jobs"`
BackfillCycleCountSum int `json:"bf_cycle_sum"`
BackfillCycleCounter int `json:"bf_cycle_counter"`
BackfillLastDepth int `json:"bf_last_depth"`
BackfillLastDepthTry int `json:"bf_last_depth_try"`
}
type DiagnosticsCollector ¶
type DiagnosticsCollector struct {
// contains filtered or unexported fields
}
func NewDiagsCollector ¶
func NewDiagsCollector(config *Config) *DiagnosticsCollector
func (*DiagnosticsCollector) Collect ¶
func (sc *DiagnosticsCollector) Collect(ch chan<- prometheus.Metric)
func (*DiagnosticsCollector) Describe ¶
func (sc *DiagnosticsCollector) Describe(ch chan<- *prometheus.Desc)
type FeatureJobMetric ¶
type FeatureJobMetric struct {
// contains filtered or unexported fields
}
type IntFromOptionalStruct ¶
type IntFromOptionalStruct int
func (*IntFromOptionalStruct) UnmarshalJSON ¶
func (ffoo *IntFromOptionalStruct) UnmarshalJSON(data []byte) error
type JobCliFallbackFetcher ¶
type JobCliFallbackFetcher struct {
// contains filtered or unexported fields
}
func (*JobCliFallbackFetcher) FetchMetrics ¶
func (jcf *JobCliFallbackFetcher) FetchMetrics() ([]JobMetric, error)
func (*JobCliFallbackFetcher) ScrapeDuration ¶
func (jcf *JobCliFallbackFetcher) ScrapeDuration() time.Duration
func (*JobCliFallbackFetcher) ScrapeError ¶
func (jcf *JobCliFallbackFetcher) ScrapeError() prometheus.Counter
type JobJsonFetcher ¶
type JobJsonFetcher struct {
// contains filtered or unexported fields
}
func (*JobJsonFetcher) FetchMetrics ¶
func (jjf *JobJsonFetcher) FetchMetrics() ([]JobMetric, error)
func (*JobJsonFetcher) ScrapeDuration ¶
func (jjf *JobJsonFetcher) ScrapeDuration() time.Duration
func (*JobJsonFetcher) ScrapeError ¶
func (jjf *JobJsonFetcher) ScrapeError() prometheus.Counter
type JobMetric ¶
type JobMetric struct {
Account string `json:"account"`
JobId float64 `json:"job_id"`
EndTime float64 `json:"end_time"`
JobState string `json:"job_state"`
Partition string `json:"partition"`
UserName string `json:"user_name"`
Features string `json:"features"`
JobResources JobResource `json:"job_resources"`
StateReason string `json:"state_reason"`
}
type JobResource ¶
type JobResource struct {
AllocCpus float64 `json:"allocated_cpus"`
AllocNodes map[string]*NodeResource `json:"allocated_nodes"`
}
type JobsCollector ¶
type JobsCollector struct {
// contains filtered or unexported fields
}
func NewJobsController ¶
func NewJobsController(config *Config) *JobsCollector
func (*JobsCollector) Collect ¶
func (jc *JobsCollector) Collect(ch chan<- prometheus.Metric)
func (*JobsCollector) Describe ¶
func (jc *JobsCollector) Describe(ch chan<- *prometheus.Desc)
func (*JobsCollector) SetFetcher ¶
func (jc *JobsCollector) SetFetcher(fetcher SlurmMetricFetcher[JobMetric])
type LicCollector ¶
type LicCollector struct {
// contains filtered or unexported fields
}
func NewLicCollector ¶
func NewLicCollector(config *Config) *LicCollector
func (*LicCollector) Collect ¶
func (lc *LicCollector) Collect(ch chan<- prometheus.Metric)
func (*LicCollector) Describe ¶
func (lc *LicCollector) Describe(ch chan<- *prometheus.Desc)
type LicenseMetric ¶
type LimitCollector ¶
type LimitCollector struct {
// contains filtered or unexported fields
}
func NewLimitCollector ¶
func NewLimitCollector(config *Config) *LimitCollector
func (*LimitCollector) Collect ¶
func (lc *LimitCollector) Collect(ch chan<- prometheus.Metric)
func (*LimitCollector) Describe ¶
func (lc *LimitCollector) Describe(ch chan<- *prometheus.Desc)
type MemSummaryMetric ¶
type MessageRpcInfo ¶
type MessageRpcInfo struct {
MessageType string `json:"message_type"`
TypeId int `json:"type_id"`
Count int `json:"count"`
AvgTime IntFromOptionalStruct `json:"average_time"`
TotalTime int `json:"total_time"`
}
type MockFetchErrored ¶
type MockFetchErrored struct{}
func (*MockFetchErrored) Duration ¶
func (f *MockFetchErrored) Duration() time.Duration
func (*MockFetchErrored) FetchRawBytes ¶
func (f *MockFetchErrored) FetchRawBytes() ([]byte, error)
type MockScraper ¶
type MockScraper struct {
CallCount int
// contains filtered or unexported fields
}
implements SlurmByteScraper by pulling fixtures instead used exclusively for testing
func (*MockScraper) Duration ¶
func (f *MockScraper) Duration() time.Duration
func (*MockScraper) FetchRawBytes ¶
func (f *MockScraper) FetchRawBytes() ([]byte, error)
type NAbleFloat ¶
type NAbleFloat float64
func (*NAbleFloat) UnmarshalJSON ¶
func (naf *NAbleFloat) UnmarshalJSON(data []byte) error
type NAbleTime ¶
func (*NAbleTime) UnmarshalJSON ¶
report beginning of time in the case of N/A
type NodeCliFallbackFetcher ¶
type NodeCliFallbackFetcher struct {
// contains filtered or unexported fields
}
func (*NodeCliFallbackFetcher) FetchMetrics ¶
func (cmf *NodeCliFallbackFetcher) FetchMetrics() ([]NodeMetric, error)
func (*NodeCliFallbackFetcher) ScrapeDuration ¶
func (cmf *NodeCliFallbackFetcher) ScrapeDuration() time.Duration
func (*NodeCliFallbackFetcher) ScrapeError ¶
func (cmf *NodeCliFallbackFetcher) ScrapeError() prometheus.Counter
type NodeJsonFetcher ¶
type NodeJsonFetcher struct {
// contains filtered or unexported fields
}
func (*NodeJsonFetcher) FetchMetrics ¶
func (cmf *NodeJsonFetcher) FetchMetrics() ([]NodeMetric, error)
func (*NodeJsonFetcher) ScrapeDuration ¶
func (cmf *NodeJsonFetcher) ScrapeDuration() time.Duration
func (*NodeJsonFetcher) ScrapeError ¶
func (cmf *NodeJsonFetcher) ScrapeError() prometheus.Counter
type NodeMetric ¶
type NodeMetric struct {
AllocMemory float64 `json:"alloc_memory"`
AllocCpus float64 `json:"alloc_cpus"`
Cpus float64 `json:"cpus"`
CpuLoad float64 `json:"cpu_load"`
FreeMemory float64 `json:"free_memory"`
Hostname string `json:"hostname"`
IdleCpus float64 `json:"idle_cpus"`
Partitions []string `json:"partitions"`
RealMemory float64 `json:"real_memory"`
State string `json:"state"`
Weight float64 `json:"weight"`
}
type NodeResource ¶
type NodeResource struct {
Mem float64 `json:"memory"`
}
type NodesCollector ¶
type NodesCollector struct {
// contains filtered or unexported fields
}
func NewNodeCollecter ¶
func NewNodeCollecter(config *Config) *NodesCollector
func (*NodesCollector) Collect ¶
func (nc *NodesCollector) Collect(ch chan<- prometheus.Metric)
func (*NodesCollector) Describe ¶
func (nc *NodesCollector) Describe(ch chan<- *prometheus.Desc)
func (*NodesCollector) SetFetcher ¶
func (nc *NodesCollector) SetFetcher(fetcher SlurmMetricFetcher[NodeMetric])
type PartitionJobMetric ¶
type PartitionJobMetric struct {
// contains filtered or unexported fields
}
type PartitionMetric ¶
type PerStateMetric ¶
type SdiagResponse ¶
type SdiagResponse struct {
// Response coercible between slurm 23 and 24 data versions
Meta struct {
SlurmVersion SlurmVersion `json:"Slurm"`
Plugins map[string]string `json:"plugins"`
Plugin map[string]string `json:"plugin"`
} `json:"meta"`
Statistics DiagMetric
Errors []string `json:"errors"`
Warnings []string `json:"warnings"`
}
func (*SdiagResponse) IsDataParserPlugin ¶
func (sr *SdiagResponse) IsDataParserPlugin() bool
type SlurmByteScraper ¶
interface for getting data from slurm used for dep injection/ease of testing & for add slurmrestd support later
type SlurmMetricFetcher ¶
type SlurmMetricFetcher[M SlurmPrimitiveMetric] interface { FetchMetrics() ([]M, error) ScrapeDuration() time.Duration ScrapeError() prometheus.Counter }
type SlurmPrimitiveMetric ¶
type SlurmPrimitiveMetric interface {
NodeMetric | JobMetric | DiagMetric | LicenseMetric | AccountLimitMetric
}
type SlurmVersion ¶
type SlurmVersion struct {
Version struct {
Major CoercedInt `json:"major"`
Micro CoercedInt `json:"micro"`
Minor CoercedInt `json:"minor"`
} `json:"version"`
Release string `json:"release"`
}
type StateReasonMetric ¶
type StateReasonMetric struct {
// contains filtered or unexported fields
}
type StringByteScraper ¶
type StringByteScraper struct {
Callcount int
// contains filtered or unexported fields
}
implements SlurmByteScraper by emmiting string payload instead used exclusively for testing
func (*StringByteScraper) Duration ¶
func (es *StringByteScraper) Duration() time.Duration
func (*StringByteScraper) FetchRawBytes ¶
func (es *StringByteScraper) FetchRawBytes() ([]byte, error)
type TraceCollector ¶
type TraceCollector struct {
ProcessFetcher *AtomicProcFetcher
// contains filtered or unexported fields
}
func NewTraceCollector ¶
func NewTraceCollector(config *Config) *TraceCollector
func (*TraceCollector) Collect ¶
func (c *TraceCollector) Collect(ch chan<- prometheus.Metric)
func (*TraceCollector) Describe ¶
func (c *TraceCollector) Describe(ch chan<- *prometheus.Desc)
type TraceConfig ¶
type TraceConfig struct {
// contains filtered or unexported fields
}
type TraceInfo ¶
type TraceInfo struct {
JobId int64 `json:"job_id"`
Pid int64 `json:"pid"`
Cpus float64 `json:"cpus"`
WriteBytes float64 `json:"write_bytes"`
ReadBytes float64 `json:"read_bytes"`
Threads float64 `json:"threads"`
Mem float64 `json:"mem"`
Username string `json:"username"`
Hostname string `json:"hostname"`
// contains filtered or unexported fields
}
store a jobs published proc stats
type UserJobMetric ¶
type UserJobMetric struct {
// contains filtered or unexported fields
}
type UserRpcInfo ¶
type UserRpcInfo struct {
User string `json:"user"`
UserId int `json:"user_id"`
Count int `json:"count"`
AvgTime IntFromOptionalStruct `json:"average_time"`
TotalTime int `json:"total_time"`
}