// 首先确定监测指标:度量什么(Name), // 再确定监测点 package metrics import ( "fmt" "log" "net/http" "sync" "leafstalk/conf" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) var ( prometheusReporter *PrometheusReporter once sync.Once ) // PrometheusReporter reports metrics to prometheus type PrometheusReporter struct { serverType string project string countReportersMap map[string]*prometheus.CounterVec summaryReportersMap map[string]*prometheus.SummaryVec gaugeReportersMap map[string]*prometheus.GaugeVec histogramReportersMap map[string]*prometheus.HistogramVec additionalLabels map[string]string } func (p *PrometheusReporter) registerCustomMetrics( constLabels map[string]string, additionalLabelsKeys []string, spec *CustomMetricsSpec, ) { // 监测点指定比例的分布情况 for _, summary := range spec.Summaries { p.summaryReportersMap[summary.Name] = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: p.serverType, Subsystem: summary.Subsystem, Name: summary.Name, Help: summary.Help, Objectives: summary.Objectives, ConstLabels: constLabels, }, append(additionalLabelsKeys, summary.Labels...), ) } // 监测点数值变化度量,变大变小 for _, gauge := range spec.Gauges { p.gaugeReportersMap[gauge.Name] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: gauge.Subsystem, Name: gauge.Name, Help: gauge.Help, ConstLabels: constLabels, }, append(additionalLabelsKeys, gauge.Labels...), ) } // 监测点命中次数或 for _, counter := range spec.Counters { p.countReportersMap[counter.Name] = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: p.serverType, Subsystem: counter.Subsystem, Name: counter.Name, Help: counter.Help, ConstLabels: constLabels, }, append(additionalLabelsKeys, counter.Labels...), ) } } func (p *PrometheusReporter) registerMetrics( constLabels, additionalLabels map[string]string, spec *CustomMetricsSpec, ) { // constLabels["game"] = p.game // constLabels["serverType"] = p.serverType p.additionalLabels = additionalLabels additionalLabelsKeys := make([]string, 0, len(additionalLabels)) for key := range additionalLabels { additionalLabelsKeys = append(additionalLabelsKeys, key) } p.registerCustomMetrics(constLabels, additionalLabelsKeys, spec) // 定义多个指标,并选择响应的度量方式 // HandlerResponseTimeMs summary // 响应耗时分布统计 p.summaryReportersMap[ResponseTime] = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: p.serverType, Subsystem: "handler", Name: ResponseTime, Help: "处理一条消息耗时NS", Objectives: map[float64]float64{0.7: 0.02, 0.95: 0.005, 0.99: 0.001}, ConstLabels: constLabels, }, append([]string{"route", "status", "type", "code"}, additionalLabelsKeys...), ) // ProcessDelay summary // 用户请求延时分布 p.summaryReportersMap[ProcessDelay] = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: p.serverType, Subsystem: "handler", Name: ProcessDelay, Help: "处理消息延迟NS", Objectives: map[float64]float64{0.7: 0.02, 0.95: 0.005, 0.99: 0.001}, ConstLabels: constLabels, }, append([]string{"route", "type"}, additionalLabelsKeys...), ) // ConnectedClients gauge // 当前的客户端连接数 p.gaugeReportersMap[ConnectedClients] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "acceptor", Name: ConnectedClients, Help: "当前连接的客户端数目", ConstLabels: constLabels, }, additionalLabelsKeys, ) // 当前服务端连接数 p.gaugeReportersMap[CountServers] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "service_cluster", Name: CountServers, Help: "当前连接的服务端数量", ConstLabels: constLabels, }, append([]string{"type"}, additionalLabelsKeys...), ) // 通道可用容量 p.gaugeReportersMap[ChannelCapacity] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "channel", Name: ChannelCapacity, Help: "the available capacity of the channel", ConstLabels: constLabels, }, append([]string{"channel"}, additionalLabelsKeys...), ) // 丢弃RPC消息的数量 p.gaugeReportersMap[DroppedMessages] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "rpc_server", Name: DroppedMessages, Help: "the number of rpc server dropped messages (messages that are not handled)", ConstLabels: constLabels, }, additionalLabelsKeys, ) // 当前协程数量 p.gaugeReportersMap[Goroutines] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "sys", Name: Goroutines, Help: "当前协程数量", ConstLabels: constLabels, }, additionalLabelsKeys, ) // 当前堆大小 p.gaugeReportersMap[HeapSize] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "sys", Name: HeapSize, Help: "当前堆的大小", ConstLabels: constLabels, }, additionalLabelsKeys, ) // 当前堆栈对象数目 p.gaugeReportersMap[HeapObjects] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "sys", Name: HeapObjects, Help: "当前堆对象数目", ConstLabels: constLabels, }, additionalLabelsKeys, ) // 当前重试JOB数量 p.gaugeReportersMap[WorkerJobsRetry] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "worker", Name: WorkerJobsRetry, Help: "当前重试JOB数量", ConstLabels: constLabels, }, additionalLabelsKeys, ) //当前队列大小 p.gaugeReportersMap[WorkerQueueSize] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "worker", Name: WorkerQueueSize, Help: "当前队列大小", ConstLabels: constLabels, }, append([]string{"queue"}, additionalLabelsKeys...), ) // p.gaugeReportersMap[WorkerJobsTotal] = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: p.serverType, Subsystem: "worker", Name: WorkerJobsTotal, Help: "the total executed jobs", ConstLabels: constLabels, }, append([]string{"status"}, additionalLabelsKeys...), ) // 计数度量 p.countReportersMap[ExceededRateLimiting] = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: p.serverType, Subsystem: "acceptor", Name: ExceededRateLimiting, Help: "the number of blocked requests by exceeded rate limiting", ConstLabels: constLabels, }, additionalLabelsKeys, ) // 玩家请求的消息计数 p.countReportersMap[MessageHandler] = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: p.serverType, Subsystem: "messageHandler", Name: MessageHandler, Help: "玩家发送请求数量", ConstLabels: constLabels, }, append([]string{"handler"}, additionalLabelsKeys...), ) p.histogramReportersMap[MessageResponseTime] = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: p.serverType, Name: MessageResponseTime, Help: "各种消息响应耗时", }, []string{"handler"}, ) // 注册到注册表 toRegister := make([]prometheus.Collector, 0) for _, c := range p.countReportersMap { toRegister = append(toRegister, c) } for _, c := range p.gaugeReportersMap { toRegister = append(toRegister, c) } for _, c := range p.summaryReportersMap { toRegister = append(toRegister, c) } for _, c := range p.histogramReportersMap { toRegister = append(toRegister, c) } prometheus.MustRegister(toRegister...) } // StartPrometheusReporter gets the prometheus reporter singleton func StartPrometheusReporter( serverType string, config *conf.Config, // constLabels map[string]string, ) (*PrometheusReporter, error) { once.Do(func() { var ( port = config.GetInt("metrics.prometheus.port") game = config.GetString("projectName") additionalLabels = config.GetStringMapString("metrics.additionalTags") constLabels = config.GetStringMapString("metrics.constTags") ) prometheusReporter = &PrometheusReporter{ serverType: serverType, project: game, countReportersMap: make(map[string]*prometheus.CounterVec), summaryReportersMap: make(map[string]*prometheus.SummaryVec), gaugeReportersMap: make(map[string]*prometheus.GaugeVec), histogramReportersMap: make(map[string]*prometheus.HistogramVec), } spec, err := NewCustomMetricsSpec(config) if err != nil { log.Fatalf("NewCustomMetricsSpec error. %v", err) return } prometheusReporter.registerMetrics(constLabels, additionalLabels, spec) http.Handle("/metrics", promhttp.Handler()) go (func() { log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", port), nil)) })() }) return prometheusReporter, nil } // ReportSummary reports a summary metric func (p *PrometheusReporter) ReportSummary(metric string, labels map[string]string, value float64) error { sum := p.summaryReportersMap[metric] if sum != nil { labels = p.ensureLabels(labels) sum.With(labels).Observe(value) return nil } return ErrMetricNotKnown } // ReportCount reports a summary metric func (p *PrometheusReporter) ReportCount(metric string, labels map[string]string, count float64) error { cnt := p.countReportersMap[metric] if cnt != nil { labels = p.ensureLabels(labels) cnt.With(labels).Add(count) return nil } return ErrMetricNotKnown } // ReportGauge reports a gauge metric func (p *PrometheusReporter) ReportGauge(metric string, labels map[string]string, value float64) error { g := p.gaugeReportersMap[metric] if g != nil { labels = p.ensureLabels(labels) g.With(labels).Set(value) return nil } return ErrMetricNotKnown } // ReportHistogram reports a histogram metric func (p *PrometheusReporter) ReportHistogram(metric string, label string, value float64) error { g := p.histogramReportersMap[metric] if g != nil { g.WithLabelValues(label).Observe(value) return nil } return ErrMetricNotKnown } // ensureLabels checks if labels contains the additionalLabels values, // otherwise adds them with the default values func (p *PrometheusReporter) ensureLabels(labels map[string]string) map[string]string { for key, defaultVal := range p.additionalLabels { if _, ok := labels[key]; !ok { labels[key] = defaultVal } } return labels }