NodeJS Server 进程自动重启
背景:
NodeJS的特点是单进程,事件驱动,非阻塞式IO编程,当主进程抛异常挂掉,整个NodeJS Server就会停止。
对当前的NodeJS进程进行监控非常重要,NodeJS的进程的停止时,能在指定时间内重启动,继续提供服务。
思路:
1.起一个守护进程,用于与各子进程(虚拟进程)进行心跳通信,官运亨通护进程监测子进程是否有回应,若三次连接后没有回应,则将该进程进行重启。
2.子进程与守护进程进行心跳通信,若守护进程不存在,则子进程自动退出。
示例图:
守护进程:bootstrap.js
/** * @author wubocao * node程序启动入口模块 * 1:设置允许环境当前路径cwd为该文件目录 * 2:启动守护进程,运行主服务 * 3:监听关闭事件,关闭主服务并退出 */ //日志 console.log("start bootstrap"); var path = require("path"); var addDeamon = require("./deamon.js").addDeamon; var file = require.main.filename, path = path.dirname(file); process.chdir(path); var modulesNames = [], args = [], deamons = []; if (process.argv && process.argv.length) { for ( var i = 0, len = process.argv.length; i < len; i++) { if (process.argv[i] == '-m') { var names = process.argv[++i]; if (names) { modulesNames = modulesNames.concat(names.split("|")); } } else if (process.argv[i] == '-ppid') {//过滤掉ppid参数 i++; continue; } else { args.push(process.argv[i]); } } } // 可以在此处设置默认载入默认模块 if (modulesNames.length == 0) { console.log('please defined the modules like: node bootstrap.js -m main1.js -m main2.js'); return; // modulesNames.push('main'); } console.log(modulesNames); modulesNames.forEach(function(moduleName) { deamons.push(addDeamon(moduleName, args)); }); process.on("exit", function() { console.log("parent exit"); deamons.forEach(function(deamon) { deamon.stop(); }); }); process.on("SIGQUIT", function() { console.log("request for exit"); deamons.forEach(function(deamon) { deamon.stop(); }); process.exit(0); });
守护进程新建一个或者多个daemon对象,每一个daemon启动一个新的业务进程:daemon.js
/** * @author wubocao * 守护进程模块 * 使用addDeamon(model,args,option)来添加一个守护进程 * 该函数返回一个守护进程对象,通过调用该对象的stop和init来停止和重新启动该进程 * */ var cp = require("child_process"); var util = require("util"); //对象深拷贝 function copyObj(obj, stack) { stack = stack || []; var t; if (obj == null) { return t; } if (util.isArray(obj)) {// 数组 var instance = copyObj.getStack(obj, stack); if (instance) { return instance; } var len = obj.length; t = new Array(len); stack.push([ obj, t ]); for ( var i = 0; i < len; i++) { t[i] = copyObj(obj[i]); } } else if (typeof obj == "object") { var instance = copyObj.getStack(obj, stack); if (instance) { return instance; } t = {}; stack.push([ obj, t ]); for ( var k in obj) { t[k] = copyObj(obj[k]); } } else { t = obj; } return t; } copyObj.getStack = function(obj, stack) { for ( var i = stack.length; i--;) { if (stack[i][0] === obj) { return stack[i][1]; } } return null; }; // 守护进程对象 function deamon(model, args, option) { if (!model || typeof model != "string") { throw new Error("illegal model argument"); } var __args; if (args) { if (util.isArray(args)) { __args = copyObj(args); } else { __args = [ args ]; } } var __opt; if (typeof option == "object") { __opt = copyObj(option); } else { __opt = {}; } this.__model = model; this.__args = __args; this.__opt = __opt; this.__cpr = null; this.__cprid = 0; this.__heartbeat = 0; this.init(); } deamon.prototype = { init : function() { if (this.__cpr) { return; } this.__kill = false; console.log("deamon init"); var exeTime = this.__opt.timeout; var start = new Date().getTime(); var context = this; (function run() { console.log("process start"); context.__cpr = cp.fork(context.__model, context.__args, context.__opt); context.__cprid = context.__cpr.pid; context.__cpr.on("exit", function(e) { console.log("process exit"); if (context.__kill) { return; } if (exeTime > 0) { var end = new Date().getTime(); if (end - start < exeTime) { run(); } else { context.__cpr = null; context.__cprid = 0; } } else { run(); } }); context.__cpr.on("message", function(message) { if (typeof message == "object") { switch (message.name) { case "proccessInfo":// 进程信息(心跳检查) context.__messageCall && context.__messageCall(message.value); break; case "broadcast":// 经常广播消息 try { context.__cpr.send(message.value); } catch (e) { console.error("broadcast message error:", e); } break; } } }); })(); // 开始监控心跳 this.startHeartbeat(); }, stop : function() { if (this.__cpr) { console.log("deamon stop"); this.__kill = true; this.__cpr.disconnect(); this.__cpr.kill('SIGQUIT'); this.__cpr = null; this.__cprid = 0; } }, stopForce : function() { if (this.__cpr) { console.log("deamon stop force"); this.__kill = true; // this.__cpr.kill('SIGKILL'); cp.exec("kill -9 " + this.__cprid); this.__cpr = null; this.__cprid = 0; } }, getInfo : function(callback, msg) { if (this.__cpr) { this.__messageCall = callback; try { if (msg) { console.log("try get child process info with message[" + msg + "]"); } this.__cpr.send({ name : "proccessInfo", msg : msg || "" }); } catch (e) { console.error("send message 'proccessInfo' error:", e); } } else { console.error("no child process when get child process info"); } }, //开始心跳 startHeartbeat : function() { var deamon = this; //先停掉原来的心跳 this.stopHeartbeat(); //times为监控心跳连续失败次数 var times = 0; //心跳检查 function checkDeamon() { //做1500毫秒等待,判断deamon子进程是否挂掉 var t = setTimeout(function() { times++; t = 0; if (times >= 3) { console.log("heart check with no response more then 3 times,restart now"); times = 0; deamon.stopHeartbeat(); deamon.stopForce(); setTimeout(function() { deamon.init(); }, 1000); } }, 1500); deamon.getInfo(function(memInfo) { if (t != 0) { clearTimeout(t); t = 0; } times = 0; //console.log(memInfo); }, times > 0 ? "retry with times:" + times : ""); } //每5秒获取一下 this.__heartbeat = setInterval(checkDeamon, 5000); }, //停止心跳 stopHeartbeat : function() { this.__heartbeat = this.__heartbeat && clearInterval(this.__heartbeat); } }; exports.addDeamon = function(model, args, option) { args = args || []; // 过滤掉ppid参数 for ( var i = 0, len = args.length; i < len; i++) { if (args[i] == '-ppid') { i++; } } return new deamon(model, args.concat([ '-ppid', process.pid ]), option); }
监控进程monitor.js,此JS由业务JS引入,用于和daemon进行心跳通信,确保进程是活动进程:
require('./monitor/module_listener.js'); (function(){ // 开始心跳,与父进程联系 if (process.argv && process.argv.length) { for ( var i = 0, len = process.argv.length; i < len; i++) { if (process.argv[i] == '-ppid') {// ppid参数,由父进程启动的 console.log('startHB'); startHB(); break; } } } // 开始心跳 function startHB() { // 退出信号处理 process.on("SIGQUIT", function() { console.log("request for exit"); process.exit(0); }); // 与父进程断开联系信号处理 process.on("disconnect", function() { console.log("request for exit"); process.exit(-1); }); // 心跳消息处理 process.on("message", function(message) { console.log('child receive msg: ' + message); if (typeof message == "object") { if (message.name == "proccessInfo") { process.send({ name : "proccessInfo", value : process.memoryUsage() }); } } else if (typeof message === "string") { switch (message) { case "heartbeat":// 心跳回包 if (heartbeatTimer) { times = 0; clearTimeout(heartbeatTimer); heartbeatTimer = 0; } break; } } }); // times为监控心跳连续失败次数 var times = 0, heartbeatTimer; // 心跳检查 function checkParent() { // 做1500毫秒等待,判断deamon子进程是否挂掉 heartbeatTimer = setTimeout(function() { times++; t = 0; if (times >= 3) { times = 0; console.log("heart check with no response more then 3 times,exit now"); process.exit(-1); } }, 1500); times > 0 && console.log("try get parent heartbeat " + times + " times"); //心跳发包 process.send({ name : "broadcast", value : "heartbeat" }); } //每5秒获取下 setInterval(checkParent, 5000); } })();
业务示例:
main1.js:
var http = require('http'); console.log('init main1: pid = ' + process.pid); require('./monitor.js'); http.createServer(function(req, res){ res.writeHead(200, {'Content-Type': 'text/plain'}); res.end('Hello World, main1 \n'); }).listen('8938'); console.log('main1 server running at http://127.0.0.1:8938');
main2.js
var http = require('http'); console.log('init main2: pid = ' + process.pid); require('./monitor.js'); http.createServer(function(req, res){ res.writeHead(200, {'Content-Type': 'text/plain'}); res.write('afdfadfdafdas'); res.end('Hello World main 2\n'); }).listen('8937'); console.log('main2 server running at http://127.0.0.1:8937');
注意:需要在main1.js和main2.js中引入
启动进程:
node bootstrap.js -m main1.js -m main2.js
源码中,还包含了一个node.sh,用于管理start或者是restart, stop 等操作:
sudo chmod +x node.sh
./node.sh bootstrap.js -m main1.js -m main2.js start