はじめに
よく利用するコマンドについてAIのお力を借りて調査してみた備忘録です。
genesisコマンド
CVMの各種コンポーネントのマネージャーみたいなサービスであるgenesisを操作するためのコマンドです。パスは以下の通りです。
nutanix@NTNX-HOME-A-CVM:192.168.3.112:~$ which genesis /usr/local/nutanix/cluster/bin/genesis
catで確認
中身を確認してみました。Pythonスクリプトです。
#!/usr/bin/env python3 # # Copyright (c) 2012 Nutanix Inc. All rights reserved. # # Author: cui@nutanix.com # # This script creates and launches Genesis. # # Usage: # genesis --genesis_port=2100 import os GENESIS_VIRTUALENV_PATH = os.path.abspath(os.path.join(os.path.dirname( __file__), "../.venv/bin/bin/python3.9")) if os.environ.get("PYTHON_TARGET_VERSION") is None: os.environ["PYTHON_TARGET_VERSION"] = "3.9" if os.environ.get("PYTHON_TARGET_PATH") is None: os.environ["PYTHON_TARGET_PATH"] = GENESIS_VIRTUALENV_PATH # This import should be at the top after we have set py3 env variables. import env # Do gevent monkey patching immediately. from util.base.gevent_patch import gevent_monkey_patch gevent_monkey_patch() import datetime import gflags import signal import sys import time import traceback import pwd import cluster.client.genesis_utils as genesis_utils import util.base.log as log import cluster.client.cmsp.consts # For CMSP flags to be imported import util.cluster.consts # For FLAGS: nutanix_log_dir. import util.base.command_executor # For FLAGS: vfork_based_command_executor from cluster.client.service import service_utils as utils from util.forker import forker # For FLAGS.modify_fd_and_exec_binary_path from util.infrastructure.version import get_cluster_version from util.misc.greenlet_utils import * from util.misc.koverage import maybe_enable_code_coverage from util.base.debug import maybe_monkey_patch_gevent_greenlets FLAGS = gflags.FLAGS gflags.DEFINE_bool("foreground", False, "Run Genesis in foreground.") gflags.DEFINE_bool("genesis_self_monitoring", True, "Genesis to do self monitoring.") gflags.DEFINE_bool("genesis_upgrade", False, "Flag to indicate that genesis restarted because " "it is upgrading itself.") gflags.DEFINE_bool("genesis_debug_stack", True, "Flag to indicate whether signal handler need to be" " registered for debugging greenlet stacks.") gflags.DEFINE_boolean("version", None, "Print version info and exit") gflags.DEFINE_bool("force", True, "Flag to indicate if services should" "be forcefully stopped") CONFIG_HOME_DIR_MARKER = "/.config_home_dir_marker" __doc__ = ("Usage: %s " "start|stop [all|<service1> [<service2> ...]]|restart|status\n" % sys.argv[0]) genesis_lock_file = None def stop(service, exclude_self=False): lockfile = os.path.join(FLAGS.nutanix_lock_dir, service) if not os.path.exists(lockfile): sys.stderr.write("Service %s does not exist\n" % service) return False pids = utils.get_pids(lockfile) print("%s: Stopping %s (pids %r)" % (datetime.datetime.now(), service, pids)) success = utils.stop_service(lockfile, exclude_self) if success: remaining_pids = utils.get_pids(lockfile) log.CHECK(not remaining_pids, "Failed to stop %s with pids %s, remaining pids %s" % (service, pids, remaining_pids)) return success def check_genesis_status(): """ This utility check genesis service up and running Returns: True : If genesis is running False : If genesis is not running """ pids = utils.get_pids(genesis_lock_file) if pids: return True else: return False def stop_services(stop_all, services, force): """ This utility stops all local services, make rpc call to node manager. Args: stop_all: True if all services requested to stop services: List of services to stop force : gflag default value is True. """ ret = True if stop_all: ret = genesis_utils.stop_all_local_services() elif services: ret = genesis_utils.stop_local_services_with_lock_files(services) else: log.ERROR("Invalid parameter received stop_all : %d" "services: %s" % (stop_all, services)) if not ret and force: log.INFO("Node manager RPC failed for service stop stop_all: %d" " services: %s" % (stop_all, services)) for service in services: stop(service) def start(): skip_rclocal_marker="/home/nutanix/.skip_rclocal_check" if not os.path.exists(skip_rclocal_marker): rc_nutanix_script_path = "/usr/local/nutanix/rc.nutanix" if os.path.exists(rc_nutanix_script_path): msg = ("CRITICAL: Failed to start genesis because CVM is under upgrade " "and postinstall script runs are pending/failed. " "Please reach out to Nutanix support for more info.") print(msg) log.FATAL(msg) if not os.path.isdir(FLAGS.nutanix_log_dir): os.makedirs(FLAGS.nutanix_log_dir) pids = utils.get_pids(genesis_lock_file) if pids: sys.stderr.write("Genesis is already running, pids: %r\n" % pids) return True elif os.path.exists(CONFIG_HOME_DIR_MARKER): sys.stderr.write("Not starting Genesis because config_home_dir script " "did not complete execution successfully. Look for " "errors in /tmp/config_home_dir.log.\nRerun the script as " "root or manually fix the issues and delete file %s.\n" % CONFIG_HOME_DIR_MARKER) else: argv = [sys.argv[0]] # Filter out any command line parameters that are not flags for the # actualy Genesis service. argv.extend([arg for arg in sys.argv[1:] if arg.startswith("--")]) argv.append("--foreground=true") argv.append("--log_thread_id=true") if FLAGS.genesis_upgrade: argv.append("--genesis_upgrade=true") # Log starting genesis to data/logs/genesis.log. utils.start_service(node_manager=None, comp_name="genesis", exec_path=None, args=argv, lock_path=genesis_lock_file, block_func=None, rlimits={}, realtime_priority=-1, cpuset_exclude_cpu="-1", oom_score=-1, cgroup_memory_limit_mb=-1, command_line_args=sys.argv, enable_ssh_multiplexing=True) time.sleep(1) pids = utils.get_pids(genesis_lock_file) print("%s: Genesis started on pids %r" % (datetime.datetime.now(), pids)) return len(pids) > 0 def dump_gstacks(signum, frame): """ Dump all greenlet stack. """ log.INFO("%s" % format_greenlet_stacks()) def main(args): # For gevent versions 1.5.0, optionally use the vfork based command executor, # which is patched to use custom vfork syscall instead of # standard/patched os.fork # which had some memory leak issues (unrelated to gevent). # This was done to address ENG-244046/ ENG-231156. # https://docs.google.com/document/d/11rqzfH15M5rX-F_hoo2GfDZi03cjN4lY9xNnlTpcYbc/edit FLAGS.vfork_based_command_executor = True # Allow only nutanix user. if not pwd.getpwuid(os.getuid())[0] == 'nutanix': log.FATAL("Running as a %s user, please run as nutanix user" % pwd.getpwuid(os.getuid())[0]) os.umask(0o27) global genesis_lock_file genesis_lock_file = os.path.join(FLAGS.nutanix_lock_dir, "genesis") # Create directory for lock file if necessary. if not os.path.exists(FLAGS.nutanix_lock_dir): try: os.makedirs(FLAGS.nutanix_lock_dir) except (OSError, IOError) as e: log.FATAL(traceback.format_exc()) if FLAGS.foreground: while FLAGS.genesis_self_monitoring: cmd = ("%s --genesis_self_monitoring=false --logtostderr |& %s -o " "%s/genesis.out" % (" ".join(sys.argv), FLAGS.logpipe_path, FLAGS.nutanix_log_dir)) argv = [ "/bin/bash", "-lc", cmd ] try: os.spawnvp(os.P_WAIT, argv[0], argv) # Kill any subprocess of Genesis that may be lingering. utils.stop_service(genesis_lock_file, exclude_self=True) # Unfreeze nutanix mount points if left frozen during crash. xvdb is # instance store volume, and is not frozen or snapshotted. if os.path.isfile(FLAGS.frozen_mounts_file): os.system("mount | grep /home/nutanix/ | grep -v '^/dev/xvdb' | " "awk '{print $3}' | xargs -n 1 sudo fsfreeze -u") os.unlink(FLAGS.frozen_mounts_file) except Exception as ex: log.initialize() log.ERROR("Caught unexpected exception %s" % ex) time.sleep(2.0) if (FLAGS.genesis_debug_stack and "gevent.monkey" in sys.modules): FLAGS.debug_track_greenlets = True if FLAGS.debug: # default debug_greenlet_overrun_secs timeout is 5 secs FLAGS.debug_detect_greenlet_overruns = True # Use debug_enable_greenlet_cpu_time_taken gflag to monitor the CPU # time actually used by greenelet on heavy workload setups maybe_monkey_patch_gevent_greenlets() signal.signal(signal.SIGUSR1, dump_gstacks) # Initialize log only when actually running Genesis server. log.initialize() # Start code coverage for genesis, if enabled maybe_enable_code_coverage() try: GenesisServer().run() except: log.FATAL(traceback.format_exc()) else: if len(args) < 2 or args[1] == "help": sys.stderr.write("%s %s\n" % (__doc__, FLAGS)) sys.exit(1) if args[1] not in ["start", "stop", "restart", "status"]: sys.stderr.write("%s\n" % __doc__) sys.exit(1) stop_genesis = True stop_all = False genesis_running = False log.initialize() if args[1] == "start": if len(args) > 2: sys.stderr.write("%s\n" % __doc__) sys.exit(1) if not start(): print("%s: Failed to start genesis" % datetime.datetime.now()) sys.exit(1) elif args[1] == "stop": if len(args) > 2: genesis_running = check_genesis_status() if args[2] == "all": services = os.listdir(FLAGS.nutanix_lock_dir) stop_all = True else: services = args[2:] if 'genesis' not in services: stop_genesis = False else: services.remove('genesis') else: # If no parameters are specified, stop only genesis. services = [] stop_genesis = True if services and genesis_running: log.INFO("Requesting node manager for stopping the services.") stop_services(stop_all, services, FLAGS.force) elif services: log.INFO("Genesis is not running, stopping services by signals.") for service in services: stop(service) else: log.INFO("No parameters are specified, stop only genesis.") if stop_genesis: stop('genesis') print("%s: Services running on this node:" % datetime.datetime.now()) for lockfile in os.listdir(FLAGS.nutanix_lock_dir): pids = utils.get_pids(os.path.join(FLAGS.nutanix_lock_dir, lockfile)) print(" %s: %r" % (lockfile, pids)) elif args[1] == "restart": if len(args) > 3 or (len(args) == 3 and args[2] != "genesis"): sys.stderr.write("%s\n" % __doc__) sys.exit(1) if FLAGS.genesis_upgrade: exclude_self = True else: exclude_self = False if not stop("genesis", exclude_self): print("%s: Failed to stop genesis" % datetime.datetime.now()) sys.exit(1) if not start(): print("%s: Failed to start genesis" % datetime.datetime.now()) sys.exit(1) elif args[1] == "status": if len(args) > 2: sys.stderr.write("%s\n" % __doc__) sys.exit(1) print("{}: Services running on this node:". format(datetime.datetime.now())) service_status = utils.get_pids_oneshot() for key in sorted(service_status.keys()): print(" {}: {}".format(key, service_status[key])) def print_version_and_exit(): print(get_cluster_version(numeric=False)) sys.exit(0) if __name__ == "__main__": try: args_list = ["start", "restart", "--foreground=true", "--helpxml"] if any(arg in sys.argv for arg in args_list): # For above genesis commands, GenesisServer is needed as a server code to # make gflags of all services available for gflag ingestion. from cluster.genesis.server.server import GenesisServer FLAGS.debug_greenlet_overrun_secs = 5 FLAGS.modify_fd_and_exec_binary_path = os.path.abspath( os.path.join(__file__, "..", "modify_fd_and_exec")) args = FLAGS(sys.argv) if FLAGS.version: print_version_and_exit() main(args) except gflags.FlagsError as ex: sys.stderr.write("%s\n%s %s\n" % (str(ex), __doc__, FLAGS)) sys.exit(1) except KeyboardInterrupt: sys.stderr.write("Exiting on Ctrl-C\n") sys.exit(1) except Exception: if sys.stdout.isatty(): traceback.print_exc() sys.stderr.write("Failed to execute action %s, exiting...\n" % " ".join(args)) else: log.PRINT_EXC("Genesis Exception") sys.exit(1)
chat gptに聞いてみた
あくまでも参考程度でお願いします。
コメント