Collects health information. I<CollectHealth and CollectPerfMetrics cannot be true at the same time>. Defaults to C<false>.
The health information metrics are the following:
- health_status Overall health summary (0: normal | 1: non-critical | 2: critical | 3: fatal).
- percentage_remaining The module’s remaining life as a percentage value of factory expected life span.
- percentage_used The module’s used life as a percentage value of factory expected life span.
- power_on_time The lifetime the DIMM has been powered on in seconds.
- uptime The current uptime of the DIMM for the current power cycle in seconds.
- last_shutdown_time The time the system was last shutdown. The time is represented in epoch (seconds).
- media_temp The media’s current temperature in degrees Celsius.
- controller_temp The controller’s current temperature in degrees Celsius.
- max_media_temp The media’s the highest temperature reported in degrees Celsius.
- max_controller_temp The controller’s highest temperature reported in degrees Celsius.
+ health_status Overall health summary (0: normal | 1: non-critical | 2: critical | 3: fatal).
+ lifespan_remaining The module’s remaining life as a percentage value of factory expected life span.
+ lifespan_used The module’s used life as a percentage value of factory expected life span.
+ power_on_time The lifetime the DIMM has been powered on in seconds.
+ uptime The current uptime of the DIMM for the current power cycle in seconds.
+ last_shutdown_time The time the system was last shutdown. The time is represented in epoch (seconds).
+ media_temperature The media’s current temperature in degree Celsius.
+ controller_temperature The controller’s current temperature in degree Celsius.
+ max_media_temperature The media’s the highest temperature reported in degree Celsius.
+ max_controller_temperature The controller’s highest temperature reported in degree Celsius.
+ tsc_cycles The number of tsc cycles during each interval.
+ epoch The timestamp in seconds at which the metrics are collected from DCPMM DIMMs.
=item B<CollectPerfMetrics> I<true>|I<false>
media_write_ops Number of write operations performed to the physical media.
host_reads Number of read operations received from the CPU (memory controller).
host_writes Number of write operations received from the CPU (memory controller).
- read_hit_ratio Measures the efficiency of the buffer in the read path. Range of 0.0 - 0.75.
+ read_hit_ratio Measures the efficiency of the buffer in the read path. Range of 0.0 - 1.0.
write_hit_ratio Measures the efficiency of the buffer in the write path. Range of 0.0 - 1.0.
+ tsc_cycles The number of tsc cycles during each interval.
+ epoch The timestamp in seconds at which the metrics are collected from DCPMM DIMMs.
=item B<EnableDispatchAll> I<false>
if (strncmp(type_inst, "read_hit_ratio", strlen("read_hit_ratio")) == 0 ||
strncmp(type_inst, "write_hit_ratio", strlen("write_hit_ratio")) == 0 ||
- strncmp(type_inst, "media_temp", strlen("media_temp")) == 0 ||
- strncmp(type_inst, "controller_temp", strlen("controller_temp")) == 0) {
+ strncmp(type_inst, "media_temperature", strlen("media_temperature")) ==
+ 0 ||
+ strncmp(type_inst, "controller_temperature",
+ strlen("controller_temperature")) == 0) {
snprintf(n.message, sizeof(n.message), "Value: %0.2f", value);
} else {
snprintf(n.message, sizeof(n.message), "Value: %0.0f", value);
if (pmwatch_config.collect_perf_metrics) {
add_metric(dimm_num, "timestamp", "epoch",
PMWATCH_OP_BUF_EPOCH(&pmw_output_buf[i]));
- add_metric(dimm_num, "timestamp", "timestamp",
+ add_metric(dimm_num, "timestamp", "tsc_cycles",
PMWATCH_OP_BUF_TIMESTAMP(&pmw_output_buf[i]));
add_metric(dimm_num, "media", "total_bytes_read",
PMWATCH_OP_BUF_TOTAL_BYTES_READ(&pmw_output_buf[i]));
}
add_metric(dimm_num, "timestamp", "epoch",
PMWATCH_OP_BUF_EPOCH(&pmw_output_buf[i]));
- add_metric(dimm_num, "timestamp", "timestamp",
+ add_metric(dimm_num, "timestamp", "tsc_cycles",
PMWATCH_OP_BUF_TIMESTAMP(&pmw_output_buf[i]));
add_metric(dimm_num, "health", "health_status",
PMWATCH_OP_BUF_HEALTH_STATUS(&pmw_output_buf[i]));
- add_metric(dimm_num, "health", "percentage_remaining",
+ add_metric(dimm_num, "health", "lifespan_remaining",
PMWATCH_OP_BUF_PERCENTAGE_REMAINING(&pmw_output_buf[i]));
- add_metric(dimm_num, "health", "percentage_used",
+ add_metric(dimm_num, "health", "lifespan_used",
PMWATCH_OP_BUF_PERCENTAGE_USED(&pmw_output_buf[i]));
add_metric(dimm_num, "health", "power_on_time",
PMWATCH_OP_POWER_ON_TIME(&pmw_output_buf[i]));
PMWATCH_OP_BUF_UPTIME(&pmw_output_buf[i]));
add_metric(dimm_num, "health", "last_shutdown_time",
PMWATCH_OP_BUF_LAST_SHUTDOWN_TIME(&pmw_output_buf[i]));
- add_metric(dimm_num, "health", "media_temp",
+ add_metric(dimm_num, "health", "media_temperature",
PMWATCH_OP_BUF_MEDIA_TEMP(&pmw_output_buf[i]));
- add_metric(dimm_num, "health", "controller_temp",
+ add_metric(dimm_num, "health", "controller_temperature",
PMWATCH_OP_BUF_CONTROLLER_TEMP(&pmw_output_buf[i]));
- add_metric(dimm_num, "health", "max_media_temp",
+ add_metric(dimm_num, "health", "max_media_temperature",
PMWATCH_OP_BUF_MAX_MEDIA_TEMP(&pmw_output_buf[i]));
- add_metric(dimm_num, "health", "max_controller_temp",
+ add_metric(dimm_num, "health", "max_controller_temperature",
PMWATCH_OP_BUF_MAX_CONTROLLER_TEMP(&pmw_output_buf[i]));
}
}