ok
Direktori : /usr/lib/fm-agent/plugins/ |
Current File : //usr/lib/fm-agent/plugins/rabbitmq.py |
import agent_util from base64 import b64encode import string import urllib import logging try: # Python 2.x from httplib import HTTPConnection, HTTPSConnection, HTTPException except: from http.client import HTTPConnection, HTTPSConnection, HTTPException BYTES_TO_KB_KEYS = [ "memory", "nodes.mem_used", "nodes.io_read_bytes", "nodes.mem_used_details.rate", "nodes.io_read_bytes_details.rate", "nodes.io_write_bytes", "nodes.io_write_bytes_details.rate", ] def execute_query(config, command, option=""): if command == "nodes": # For nodes we want to double check we haven't got a local cache version # first. cached_result = agent_util.LOCAL_CACHE_RESULTS.get("rabbit@/api/nodes") if cached_result: logging.info( "Retrieved information from the local cache for rabbit@/api/nodes" ) return cached_result elif command == "queues": cached_result = agent_util.LOCAL_CACHE_RESULTS.get("rabbit@/queues") if cached_result: logging.info( "Retrieved information from the local cache for rabbit@/api/queues" ) auth = b64encode("%s:%s" % (config["username"], config["password"])) headers = { "Authorization": "Basic %s" % auth, } output = None conn_host = config.get("management_interface_host", "http://localhost") if conn_host.startswith("https"): conn = HTTPSConnection( string.replace(conn_host, "https://", ""), config["management_interface_port"], ) else: conn = HTTPConnection( string.replace(conn_host, "http://", ""), config["management_interface_port"], ) conn.request("GET", "/api/" + command, headers=headers) r = conn.getresponse() status, output = r.status, r.read() conn.close() if command == "nodes": # Save the latest result in our internal cache agent_util.LOCAL_CACHE_RESULTS["rabbit@/api/nodes"] = output elif command == "queues": agent_util.LOCAL_CACHE_RESULTS["rabbit@/queues"] = output return output class RabbitMQPlugin(agent_util.Plugin): textkey = "rabbitmq" label = "rabbitmq" DEFAULTS = { "management_interface_host": "http://localhost", "management_interface_port": 15672, "username": "guest", "password": "guest", } @classmethod def get_metadata(self, config): status = agent_util.SUPPORTED msg = None # check if rabbitmq is even installed installed = agent_util.which("rabbitmqctl") if not installed and not config.get("from_docker"): self.log.info("rabbitmqctl binary not found") status = agent_util.UNSUPPORTED msg = "rabbitmqctl binary not found" return {} INTERFACE_NOT_FOUND_ERROR = """ No [rabbitmq] config block was found in the config file and we were unable to access the HTTP management interface via the expected port (%s). Please verify that the RabbitMQ management plugin is installed. """ # check that management plugin is installed and running if config: new_config = self.DEFAULTS.copy() new_config.update(config) config = new_config else: config = self.DEFAULTS # check the specified port try: conn_host = config.get("management_interface_host", "http://localhost") if conn_host.startswith("https"): conn = HTTPSConnection( string.replace(conn_host, "https://", ""), config["management_interface_port"], ) else: conn = HTTPConnection( string.replace(conn_host, "http://", ""), config["management_interface_port"], ) conn.request("GET", "/") r = conn.getresponse() conn.close() except Exception: status = agent_util.MISCONFIGURED msg = INTERFACE_NOT_FOUND_ERROR % config["management_interface_port"] self.log.exception( "Interface not found %s" % config["management_interface_port"] ) self.log.info(msg) if status == agent_util.SUPPORTED and r.status != 200: status = agent_util.MISCONFIGURED msg = INTERFACE_NOT_FOUND_ERROR % config["management_interface_port"] self.log.info(msg) objects, queue_opts = None, None queues_schema = {"resource": "string", "vhost": "string", "queue": "string"} queue_opts = [] node_opts = [] if status == agent_util.SUPPORTED: try: overview = agent_util.json_loads(execute_query(config, "overview")) objects = [obj.title() for obj in overview["object_totals"].keys()] queues = agent_util.json_loads(execute_query(config, "queues")) for entry in queues: resource_name = entry["vhost"] + ":" + entry["name"] queue_opts.append( { "resource": resource_name, "queue": entry["name"], "vhost": entry["vhost"], } ) nodes = agent_util.json_loads(execute_query(config, "nodes")) node_opts = [node["name"] for node in nodes] except: self.log.exception("error querying rabbitmq management API") status = agent_util.MISCONFIGURED msg = "Unable to access RabbitMQ metrics. Please ensure that the management plugin is installed and that your credentials are valid in the agent config file." metadata = { "object_totals": { "label": "# of Objects", "options": objects, "status": status, "error_message": msg, }, "queue.messages": { "label": "Queue Length", "options": queue_opts, "options_schema": queues_schema, "status": status, "error_message": msg, }, "queue.memory": { "label": "Queue Memory Usage (kB)", "options": queue_opts, "options_schema": queues_schema, "status": status, "unit": "kB", "error_message": msg, }, "queue.messages_ready": { "label": "Queue number of messages ready for delivery", "options": queue_opts, "options_schema": queues_schema, "status": status, "unit": "count", "error_message": msg, }, "queue.messages_unacknowledged": { "label": "Queue number of unacknowledged messages", "options": queue_opts, "options_schema": queues_schema, "status": status, "unit": "count", "error_message": msg, }, "queue.message_stats.publish": { "label": "Queue messages published recently", "options": queue_opts, "options_schema": queues_schema, "status": status, "unit": "count", "error_message": msg, }, "queue.message_stats.publish_details.rate": { "label": "Queue message publishing rate", "options": queue_opts, "options_schema": queues_schema, "status": status, "unit": "count/sec", "error_message": msg, }, "queue.message_stats.deliver_get": { "label": "Queue messages delivered recently", "options": queue_opts, "options_schema": queues_schema, "status": status, "unit": "count", "error_message": msg, }, "queue.message_stats.deliver_get_details.rate": { "label": "Queue message delivery rate", "options": queue_opts, "options_schema": queues_schema, "status": status, "unit": "count", "error_message": msg, }, "queue.message_stats.redeliver": { "label": "Queue message redelivered recently", "options": queue_opts, "options_schema": queues_schema, "status": status, "unit": "count", "error_message": msg, }, "queue.message_stats.redeliver_details.rate": { "label": "Queue messages redeliver rate", "options": queue_opts, "options_schema": queues_schema, "status": status, "unit": "count/sec", "error_message": msg, }, "overview.queue_totals.messages_ready": { "label": "Cluster number of messages ready for delivery", "options": None, "status": status, "error_message": msg, "unit": "count", }, "overview.queue_totals.messages_unacknowledged": { "label": "Cluster number of unacknowledged messages", "options": None, "status": status, "error_message": msg, "unit": "count", }, "overview.message_stats.publish": { "label": "Cluster messages published recently", "options": None, "status": status, "error_message": msg, "unit": "count", }, "overview.message_stats.publish_details.rate": { "label": "Cluster messages publish rate", "options": None, "status": status, "error_message": msg, "unit": "msgs/sec", }, "overview.message_stats.deliver_get": { "label": "Cluster messages delivered to consumers recently", "options": None, "status": status, "error_message": msg, "unit": "count", }, "overview.message_stats.deliver_details.rate": { "label": "Cluster message delivery rate", "options": None, "status": status, "error_message": msg, "unit": "msgs/sec", }, "overview.message_stats.redeliver": { "label": "Cluster messages redelivered to consumers recently", "options": None, "status": status, "error_message": msg, "unit": "count", }, "overview.message_stats.redeliver_details.rate": { "label": "Cluster message redelivery rate", "options": None, "status": status, "error_message": msg, "unit": "msgs/sec", }, "nodes.mem_used": { "label": "Node total amount of memory used", "options": node_opts, "status": status, "error_message": msg, "unit": "kBs", }, "nodes.mem_used_details.rate": { "label": "Node memory used rate", "options": node_opts, "status": status, "error_message": msg, "unit": "kBs/sec", }, "nodes.io_read_count": { "label": "Node total number of read operations by the persister", "options": node_opts, "status": status, "error_message": msg, "unit": "count", }, "nodes.io_read_count_details.rate": { "label": "Node rate of read count", "options": node_opts, "status": status, "error_message": msg, "unit": "count/sec", }, "nodes.io_read_bytes": { "label": "Node total number of bytes read from disk by the persister", "options": node_opts, "status": status, "error_message": msg, "unit": "kBs", }, "nodes.io_read_bytes_details.rate": { "label": "Node rate of read kBs", "options": node_opts, "status": status, "error_message": msg, "unit": "kBs/sec", }, "nodes.io_write_bytes": { "label": "Node total number of bytes written to disk by the persister", "options": node_opts, "status": status, "error_message": msg, "unit": "kBs", }, "nodes.io_write_bytes_details.rate": { "label": "Node rate of written bytes", "options": node_opts, "status": status, "error_message": msg, "unit": "kBs/sec", }, "nodes.fd_used": { "label": "Node file descriptors used", "options": node_opts, "status": status, "error_message": msg, "unit": "count", }, "nodes.fd_used_details.rate": { "label": "Node file descriptors used rate", "options": node_opts, "status": status, "error_message": msg, "unit": "count/sec", }, "nodes.sockets_total": { "label": "Node sockets available", "options": node_opts, "status": status, "error_message": msg, "unit": "count", }, "nodes.sockets_used": { "label": "Node sockets used", "options": node_opts, "status": status, "error_message": msg, "unit": "count", }, "nodes.socktets_used_details.rate": { "label": "Node sockets used rate", "options": node_opts, "status": status, "error_message": msg, "unit": "count/sec", }, "nodes.proc_used": { "label": "Node number of Erlang processes in use", "options": node_opts, "status": status, "error_message": msg, "unit": "count", }, "nodes.proc_used_details.rate": { "label": "Node processor used rate", "options": node_opts, "status": status, "error_message": msg, "unit": "count/sec", }, } return metadata @classmethod def get_metadata_docker(self, container, config): if "management_interface_host" not in config: try: ip = agent_util.get_container_ip(container) config["management_interface_host"] = ip except Exception as e: self.log.exception(e) config["from_docker"] = True return self.get_metadata(config) def check(self, textkey, option, config): if config: new_config = self.DEFAULTS.copy() new_config.update(config) config = new_config else: config = self.DEFAULTS if "." in textkey: command, key = textkey.split(".", 1) if command.startswith("queue"): command += "s" try: json_data = execute_query(config, command, option) except Exception: self.log.exception( "Unable to get %s from %s" % (command, config.get("management_interface_host")) ) return None data = agent_util.json_loads(json_data) # If no object (queue, exchange, host, etc.) is specified, report an aggregate try: if isinstance(data, list): if command.startswith("nodes"): # Node calls return all nodes information, # need to filter them by option found_data = None for entry in data: if entry.get("name") == option: # Found our node found_data = entry break if not found_data: self.log.info( "Unable to find information for node %s" % option ) return None else: res = self._parse_dict_tree(key, found_data) elif command.startswith("queues"): # Queue calls are organized by vhost and queue name. # Need to filter them found_data = None vhost, queue_name = option.split(":") for entry in data: if ( entry.get("vhost") == vhost and entry.get("name") == queue_name ): found_data = entry break if not found_data: self.log.info( "Unable to find information for vhost %s queue %s" % (vhost, queue_name) ) return None else: res = self._parse_dict_tree(key, found_data) else: res = sum([obj[key] for obj in data]) else: if "." not in key: res = data[key] if not bool(res): res = None else: res = self._parse_dict_tree(key, data) except Exception: self.log.exception("Error gathering data") return None if key == BYTES_TO_KB_KEYS: res /= 1000.0 elif textkey == "object_totals": json_data = execute_query(config, "overview") res = agent_util.json_loads(json_data)["object_totals"][option.lower()] if textkey.endswith("rate") and res < 0: # Rates that go below 0 we will turn to 0. res = 0 return res def check_docker(self, container, textkey, option, config): if "management_interface_host" not in config: try: ip = agent_util.get_container_ip(container) config["management_interface_host"] = ip except Exception as e: self.log.exception(e) config["from_docker"] = True return self.check(textkey, option, config) def _parse_dict_tree(self, key, data): """ Using the key as the nodes, parse the data dictionary to extract the information. E.g metric_used_details.rate looks for data['metric_used_details']['rate'] """ entries = key.split(".") value = data for entry in entries: value = value.get(entry) if not bool(value): value = None break return value