PR
スポンサーリンク

[Nutanix]genesisコマンドの中身をAIに調べてもらった

スポンサーリンク
Nutanix
Nutanix logo
記事内に広告が含まれています。
スポンサーリンク
スポンサーリンク

はじめに

よく利用するコマンドについてAIのお力を借りて調査してみた備忘録です。

genesisコマンド

CVMの各種コンポーネントのマネージャーみたいなサービスであるgenesisを操作するためのコマンドです。パスは以下の通りです。

nutanix@NTNX-HOME-A-CVM:192.168.3.112:~$ which genesis
/usr/local/nutanix/cluster/bin/genesis

catで確認

中身を確認してみました。Pythonスクリプトです。

#!/usr/bin/env python3
#
# Copyright (c) 2012 Nutanix Inc. All rights reserved.
#
# Author: cui@nutanix.com
#
# This script creates and launches Genesis.
#
# Usage:
# genesis --genesis_port=2100

import os
GENESIS_VIRTUALENV_PATH = os.path.abspath(os.path.join(os.path.dirname(
__file__), "../.venv/bin/bin/python3.9"))
if os.environ.get("PYTHON_TARGET_VERSION") is None:
os.environ["PYTHON_TARGET_VERSION"] = "3.9"

if os.environ.get("PYTHON_TARGET_PATH") is None:
os.environ["PYTHON_TARGET_PATH"] = GENESIS_VIRTUALENV_PATH

# This import should be at the top after we have set py3 env variables.
import env

# Do gevent monkey patching immediately.
from util.base.gevent_patch import gevent_monkey_patch
gevent_monkey_patch()

import datetime
import gflags
import signal
import sys
import time
import traceback
import pwd

import cluster.client.genesis_utils as genesis_utils
import util.base.log as log
import cluster.client.cmsp.consts # For CMSP flags to be imported
import util.cluster.consts # For FLAGS: nutanix_log_dir.
import util.base.command_executor # For FLAGS: vfork_based_command_executor

from cluster.client.service import service_utils as utils
from util.forker import forker # For FLAGS.modify_fd_and_exec_binary_path
from util.infrastructure.version import get_cluster_version
from util.misc.greenlet_utils import *
from util.misc.koverage import maybe_enable_code_coverage
from util.base.debug import maybe_monkey_patch_gevent_greenlets

FLAGS = gflags.FLAGS

gflags.DEFINE_bool("foreground", False,
"Run Genesis in foreground.")
gflags.DEFINE_bool("genesis_self_monitoring", True,
"Genesis to do self monitoring.")
gflags.DEFINE_bool("genesis_upgrade", False,
"Flag to indicate that genesis restarted because "
"it is upgrading itself.")
gflags.DEFINE_bool("genesis_debug_stack", True,
"Flag to indicate whether signal handler need to be"
" registered for debugging greenlet stacks.")
gflags.DEFINE_boolean("version", None, "Print version info and exit")

gflags.DEFINE_bool("force", True, "Flag to indicate if services should"
"be forcefully stopped")

CONFIG_HOME_DIR_MARKER = "/.config_home_dir_marker"

__doc__ = ("Usage: %s "
"start|stop [all|<service1> [<service2> ...]]|restart|status\n"
% sys.argv[0])

genesis_lock_file = None

def stop(service, exclude_self=False):
lockfile = os.path.join(FLAGS.nutanix_lock_dir, service)
if not os.path.exists(lockfile):
sys.stderr.write("Service %s does not exist\n" % service)
return False
pids = utils.get_pids(lockfile)
print("%s: Stopping %s (pids %r)" % (datetime.datetime.now(), service, pids))
success = utils.stop_service(lockfile, exclude_self)
if success:
remaining_pids = utils.get_pids(lockfile)
log.CHECK(not remaining_pids,
"Failed to stop %s with pids %s, remaining pids %s" %
(service, pids, remaining_pids))
return success

def check_genesis_status():
"""
This utility check genesis service up and running
Returns:
True : If genesis is running
False : If genesis is not running
"""
pids = utils.get_pids(genesis_lock_file)
if pids:
return True
else:
return False

def stop_services(stop_all, services, force):
"""
This utility stops all local services, make rpc call
to node manager.
Args:
stop_all: True if all services requested to stop
services: List of services to stop
force : gflag default value is True.
"""
ret = True
if stop_all:
ret = genesis_utils.stop_all_local_services()
elif services:
ret = genesis_utils.stop_local_services_with_lock_files(services)
else:
log.ERROR("Invalid parameter received stop_all : %d"
"services: %s" % (stop_all, services))

if not ret and force:
log.INFO("Node manager RPC failed for service stop stop_all: %d"
" services: %s" % (stop_all, services))
for service in services:
stop(service)

def start():
skip_rclocal_marker="/home/nutanix/.skip_rclocal_check"
if not os.path.exists(skip_rclocal_marker):
rc_nutanix_script_path = "/usr/local/nutanix/rc.nutanix"
if os.path.exists(rc_nutanix_script_path):
msg = ("CRITICAL: Failed to start genesis because CVM is under upgrade "
"and postinstall script runs are pending/failed. "
"Please reach out to Nutanix support for more info.")
print(msg)
log.FATAL(msg)

if not os.path.isdir(FLAGS.nutanix_log_dir):
os.makedirs(FLAGS.nutanix_log_dir)

pids = utils.get_pids(genesis_lock_file)
if pids:
sys.stderr.write("Genesis is already running, pids: %r\n" % pids)
return True
elif os.path.exists(CONFIG_HOME_DIR_MARKER):
sys.stderr.write("Not starting Genesis because config_home_dir script "
"did not complete execution successfully. Look for "
"errors in /tmp/config_home_dir.log.\nRerun the script as "
"root or manually fix the issues and delete file %s.\n"
% CONFIG_HOME_DIR_MARKER)
else:
argv = [sys.argv[0]]
# Filter out any command line parameters that are not flags for the
# actualy Genesis service.
argv.extend([arg for arg in sys.argv[1:] if arg.startswith("--")])
argv.append("--foreground=true")
argv.append("--log_thread_id=true")
if FLAGS.genesis_upgrade:
argv.append("--genesis_upgrade=true")

# Log starting genesis to data/logs/genesis.log.
utils.start_service(node_manager=None, comp_name="genesis", exec_path=None,
args=argv, lock_path=genesis_lock_file,
block_func=None, rlimits={}, realtime_priority=-1,
cpuset_exclude_cpu="-1", oom_score=-1,
cgroup_memory_limit_mb=-1,
command_line_args=sys.argv,
enable_ssh_multiplexing=True)
time.sleep(1)
pids = utils.get_pids(genesis_lock_file)
print("%s: Genesis started on pids %r" % (datetime.datetime.now(), pids))
return len(pids) > 0

def dump_gstacks(signum, frame):
"""
Dump all greenlet stack.
"""
log.INFO("%s" % format_greenlet_stacks())

def main(args):

# For gevent versions 1.5.0, optionally use the vfork based command executor,
# which is patched to use custom vfork syscall instead of
# standard/patched os.fork
# which had some memory leak issues (unrelated to gevent).
# This was done to address ENG-244046/ ENG-231156.
# https://docs.google.com/document/d/11rqzfH15M5rX-F_hoo2GfDZi03cjN4lY9xNnlTpcYbc/edit
FLAGS.vfork_based_command_executor = True

# Allow only nutanix user.
if not pwd.getpwuid(os.getuid())[0] == 'nutanix':
log.FATAL("Running as a %s user, please run as nutanix user"
% pwd.getpwuid(os.getuid())[0])

os.umask(0o27)
global genesis_lock_file
genesis_lock_file = os.path.join(FLAGS.nutanix_lock_dir, "genesis")

# Create directory for lock file if necessary.
if not os.path.exists(FLAGS.nutanix_lock_dir):
try:
os.makedirs(FLAGS.nutanix_lock_dir)
except (OSError, IOError) as e:
log.FATAL(traceback.format_exc())

if FLAGS.foreground:
while FLAGS.genesis_self_monitoring:
cmd = ("%s --genesis_self_monitoring=false --logtostderr |& %s -o "
"%s/genesis.out" %
(" ".join(sys.argv), FLAGS.logpipe_path, FLAGS.nutanix_log_dir))
argv = [ "/bin/bash", "-lc", cmd ]
try:
os.spawnvp(os.P_WAIT, argv[0], argv)
# Kill any subprocess of Genesis that may be lingering.
utils.stop_service(genesis_lock_file, exclude_self=True)
# Unfreeze nutanix mount points if left frozen during crash. xvdb is
# instance store volume, and is not frozen or snapshotted.
if os.path.isfile(FLAGS.frozen_mounts_file):
os.system("mount | grep /home/nutanix/ | grep -v '^/dev/xvdb' | "
"awk '{print $3}' | xargs -n 1 sudo fsfreeze -u")
os.unlink(FLAGS.frozen_mounts_file)

except Exception as ex:
log.initialize()
log.ERROR("Caught unexpected exception %s" % ex)

time.sleep(2.0)

if (FLAGS.genesis_debug_stack and "gevent.monkey" in sys.modules):
FLAGS.debug_track_greenlets = True
if FLAGS.debug:
# default debug_greenlet_overrun_secs timeout is 5 secs
FLAGS.debug_detect_greenlet_overruns = True
# Use debug_enable_greenlet_cpu_time_taken gflag to monitor the CPU
# time actually used by greenelet on heavy workload setups
maybe_monkey_patch_gevent_greenlets()
signal.signal(signal.SIGUSR1, dump_gstacks)
# Initialize log only when actually running Genesis server.
log.initialize()
# Start code coverage for genesis, if enabled
maybe_enable_code_coverage()
try:
GenesisServer().run()
except:
log.FATAL(traceback.format_exc())
else:
if len(args) < 2 or args[1] == "help":
sys.stderr.write("%s %s\n" % (__doc__, FLAGS))
sys.exit(1)

if args[1] not in ["start", "stop", "restart", "status"]:
sys.stderr.write("%s\n" % __doc__)
sys.exit(1)

stop_genesis = True
stop_all = False
genesis_running = False
log.initialize()
if args[1] == "start":
if len(args) > 2:
sys.stderr.write("%s\n" % __doc__)
sys.exit(1)
if not start():
print("%s: Failed to start genesis" % datetime.datetime.now())
sys.exit(1)

elif args[1] == "stop":
if len(args) > 2:
genesis_running = check_genesis_status()
if args[2] == "all":
services = os.listdir(FLAGS.nutanix_lock_dir)
stop_all = True
else:
services = args[2:]

if 'genesis' not in services:
stop_genesis = False
else:
services.remove('genesis')
else:
# If no parameters are specified, stop only genesis.
services = []
stop_genesis = True

if services and genesis_running:
log.INFO("Requesting node manager for stopping the services.")
stop_services(stop_all, services, FLAGS.force)
elif services:
log.INFO("Genesis is not running, stopping services by signals.")
for service in services:
stop(service)
else:
log.INFO("No parameters are specified, stop only genesis.")

if stop_genesis:
stop('genesis')

print("%s: Services running on this node:" % datetime.datetime.now())
for lockfile in os.listdir(FLAGS.nutanix_lock_dir):
pids = utils.get_pids(os.path.join(FLAGS.nutanix_lock_dir, lockfile))
print(" %s: %r" % (lockfile, pids))

elif args[1] == "restart":
if len(args) > 3 or (len(args) == 3 and args[2] != "genesis"):
sys.stderr.write("%s\n" % __doc__)
sys.exit(1)
if FLAGS.genesis_upgrade:
exclude_self = True
else:
exclude_self = False
if not stop("genesis", exclude_self):
print("%s: Failed to stop genesis" % datetime.datetime.now())
sys.exit(1)
if not start():
print("%s: Failed to start genesis" % datetime.datetime.now())
sys.exit(1)
elif args[1] == "status":
if len(args) > 2:
sys.stderr.write("%s\n" % __doc__)
sys.exit(1)
print("{}: Services running on this node:".
format(datetime.datetime.now()))
service_status = utils.get_pids_oneshot()
for key in sorted(service_status.keys()):
print(" {}: {}".format(key, service_status[key]))

def print_version_and_exit():
print(get_cluster_version(numeric=False))
sys.exit(0)

if __name__ == "__main__":
try:
args_list = ["start", "restart", "--foreground=true", "--helpxml"]
if any(arg in sys.argv for arg in args_list):
# For above genesis commands, GenesisServer is needed as a server code to
# make gflags of all services available for gflag ingestion.
from cluster.genesis.server.server import GenesisServer
FLAGS.debug_greenlet_overrun_secs = 5
FLAGS.modify_fd_and_exec_binary_path = os.path.abspath(
os.path.join(__file__, "..", "modify_fd_and_exec"))
args = FLAGS(sys.argv)
if FLAGS.version:
print_version_and_exit()
main(args)
except gflags.FlagsError as ex:
sys.stderr.write("%s\n%s %s\n" % (str(ex), __doc__, FLAGS))
sys.exit(1)
except KeyboardInterrupt:
sys.stderr.write("Exiting on Ctrl-C\n")
sys.exit(1)
except Exception:
if sys.stdout.isatty():
traceback.print_exc()
sys.stderr.write("Failed to execute action %s, exiting...\n" %
" ".join(args))
else:
log.PRINT_EXC("Genesis Exception")
sys.exit(1)

chat gptに聞いてみた

あくまでも参考程度でお願いします。

コメント

タイトルとURLをコピーしました