Changeset - 95a492462291
[Not reviewed]
0 2 0
HanzZ - 13 years ago 2012-07-28 06:04:10
hanzz.k@gmail.com
Give backend some time to start before spawning new oen
2 files changed with 9 insertions and 1 deletions:
0 comments (0 inline, 0 general)
include/transport/networkpluginserver.h
Show inline comments
 
@@ -107,51 +107,52 @@ class NetworkPluginServer {
 
		void handleFTStartPayload(const std::string &payload);
 
		void handleFTFinishPayload(const std::string &payload);
 
		void handleFTDataPayload(Backend *b, const std::string &payload);
 
		void handleQueryPayload(Backend *b, const std::string &payload);
 

	
 
		void handleUserCreated(User *user);
 
		void handleRoomJoined(User *user, const Swift::JID &who, const std::string &room, const std::string &nickname, const std::string &password);
 
		void handleRoomLeft(User *user, const std::string &room);
 
		void handleUserReadyToConnect(User *user);
 
		void handleUserPresenceChanged(User *user, Swift::Presence::ref presence);
 
		void handleUserDestroyed(User *user);
 

	
 
		void handleBuddyUpdated(Buddy *buddy, const Swift::RosterItemPayload &item);
 
		void handleBuddyRemoved(Buddy *buddy);
 
		void handleBuddyAdded(Buddy *buddy, const Swift::RosterItemPayload &item);
 

	
 
		void handleBlockToggled(Buddy *buddy);
 

	
 
		void handleVCardUpdated(User *user, boost::shared_ptr<Swift::VCard> vcard);
 
		void handleVCardRequired(User *user, const std::string &name, unsigned int id);
 

	
 
		void handleFTStateChanged(Swift::FileTransfer::State state, const std::string &userName, const std::string &buddyName, const std::string &fileName, unsigned long size, unsigned long id);
 
		void handleFTAccepted(User *user, const std::string &buddyName, const std::string &fileName, unsigned long size, unsigned long ftID);
 
		void handleFTRejected(User *user, const std::string &buddyName, const std::string &fileName, unsigned long size);
 
		void handleFTDataNeeded(Backend *b, unsigned long ftid);
 

	
 
		void send(boost::shared_ptr<Swift::Connection> &, const std::string &data);
 

	
 
		void pingTimeout();
 
		void sendPing(Backend *c);
 
		Backend *getFreeClient(bool acceptUsers = true, bool longRun = false);
 

	
 
		UserManager *m_userManager;
 
		VCardResponder *m_vcardResponder;
 
		RosterResponder *m_rosterResponder;
 
		BlockResponder *m_blockResponder;
 
		Config *m_config;
 
		boost::shared_ptr<Swift::ConnectionServer> m_server;
 
		std::list<Backend *>  m_clients;
 
		Swift::Timer::ref m_pingTimer;
 
		Swift::Timer::ref m_collectTimer;
 
		Component *m_component;
 
		std::list<User *> m_waitingUsers;
 
		bool m_isNextLongRun;
 
		std::map<unsigned long, FileTransferManager::Transfer> m_filetransfers;
 
		FileTransferManager *m_ftManager;
 
		std::vector<std::string> m_crashedBackends;
 
		AdminInterface *m_adminInterface;
 
		bool m_startingBackend;
 
};
 

	
 
}
src/networkpluginserver.cpp
Show inline comments
 
@@ -184,192 +184,195 @@ static void SigCatcher(int n) {
 
	pid_t result;
 
	int status;
 
	// Read exit code from all children to not have zombies arround
 
	// WARNING: Do not put LOG4CXX_ here, because it can lead to deadlock
 
	while ((result = waitpid(-1, &status, WNOHANG)) > 0) {
 
		if (result != 0) {
 
			if (WIFEXITED(status)) {
 
				if (WEXITSTATUS(status) != 0) {
 
// 					LOG4CXX_ERROR(logger, "Backend can not be started, exit_code=" << WEXITSTATUS(status));
 
				}
 
			}
 
			else {
 
// 				LOG4CXX_ERROR(logger, "Backend can not be started");
 
			}
 
		}
 
	}
 
}
 
#endif
 

	
 
static void handleBuddyPayload(LocalBuddy *buddy, const pbnetwork::Buddy &payload) {
 
	buddy->setName(payload.buddyname());
 
	// Set alias only if it's not empty. Backends are allowed to send empty alias if it has
 
	// not changed.
 
	if (!payload.alias().empty()) {
 
		buddy->setAlias(payload.alias());
 
	}
 

	
 
	// Change groups if it's not empty. The same as above...
 
	std::vector<std::string> groups;
 
	for (int i = 0; i < payload.group_size(); i++) {
 
		groups.push_back(payload.group(i));
 
	}
 
	if (!groups.empty()) {
 
		buddy->setGroups(groups);
 
	}
 

	
 
	buddy->setStatus(Swift::StatusShow((Swift::StatusShow::Type) payload.status()), payload.statusmessage());
 
	buddy->setIconHash(payload.iconhash());
 
	buddy->setBlocked(payload.blocked());
 
}
 

	
 
NetworkPluginServer::NetworkPluginServer(Component *component, Config *config, UserManager *userManager, FileTransferManager *ftManager) {
 
	m_ftManager = ftManager;
 
	m_userManager = userManager;
 
	m_config = config;
 
	m_component = component;
 
	m_isNextLongRun = false;
 
	m_adminInterface = NULL;
 
	m_startingBackend = false;
 
	m_component->m_factory = new NetworkFactory(this);
 
	m_userManager->onUserCreated.connect(boost::bind(&NetworkPluginServer::handleUserCreated, this, _1));
 
	m_userManager->onUserDestroyed.connect(boost::bind(&NetworkPluginServer::handleUserDestroyed, this, _1));
 

	
 
	m_pingTimer = component->getNetworkFactories()->getTimerFactory()->createTimer(20000);
 
	m_pingTimer->onTick.connect(boost::bind(&NetworkPluginServer::pingTimeout, this));
 
	m_pingTimer->start();
 

	
 
	if (CONFIG_INT(m_config, "service.memory_collector_time") != 0) {
 
		m_collectTimer = component->getNetworkFactories()->getTimerFactory()->createTimer(CONFIG_INT(m_config, "service.memory_collector_time"));
 
		m_collectTimer->onTick.connect(boost::bind(&NetworkPluginServer::collectBackend, this));
 
		m_collectTimer->start();
 
	}
 

	
 
	m_vcardResponder = new VCardResponder(component->getIQRouter(), component->getNetworkFactories(), userManager);
 
	m_vcardResponder->onVCardRequired.connect(boost::bind(&NetworkPluginServer::handleVCardRequired, this, _1, _2, _3));
 
	m_vcardResponder->onVCardUpdated.connect(boost::bind(&NetworkPluginServer::handleVCardUpdated, this, _1, _2));
 
	m_vcardResponder->start();
 

	
 
	m_rosterResponder = new RosterResponder(component->getIQRouter(), userManager);
 
	m_rosterResponder->onBuddyAdded.connect(boost::bind(&NetworkPluginServer::handleBuddyAdded, this, _1, _2));
 
	m_rosterResponder->onBuddyRemoved.connect(boost::bind(&NetworkPluginServer::handleBuddyRemoved, this, _1));
 
	m_rosterResponder->onBuddyUpdated.connect(boost::bind(&NetworkPluginServer::handleBuddyUpdated, this, _1, _2));
 
	m_rosterResponder->start();
 

	
 
	m_blockResponder = new BlockResponder(component->getIQRouter(), userManager);
 
	m_blockResponder->onBlockToggled.connect(boost::bind(&NetworkPluginServer::handleBlockToggled, this, _1));
 
	m_blockResponder->start();
 

	
 
	m_server = component->getNetworkFactories()->getConnectionServerFactory()->createConnectionServer(Swift::HostAddress(CONFIG_STRING(m_config, "service.backend_host")), boost::lexical_cast<int>(CONFIG_STRING(m_config, "service.backend_port")));
 
	m_server->onNewConnection.connect(boost::bind(&NetworkPluginServer::handleNewClientConnection, this, _1));
 
	m_server->start();
 

	
 
	LOG4CXX_INFO(logger, "Listening on host " << CONFIG_STRING(m_config, "service.backend_host") << " port " << CONFIG_STRING(m_config, "service.backend_port"));
 

	
 
	unsigned long pid = exec_(CONFIG_STRING(m_config, "service.backend"), CONFIG_STRING(m_config, "service.backend_host").c_str(), CONFIG_STRING(m_config, "service.backend_port").c_str(), m_config->getConfigFile().c_str());
 
	LOG4CXX_INFO(logger, "Tried to spawn first backend with pid " << pid);
 
	LOG4CXX_INFO(logger, "Backend should now connect to Spectrum2 instance. Spectrum2 won't accept any connection before backend connects");
 

	
 
#ifndef _WIN32
 
	// wait if the backend process will still be alive after 1 second
 
	sleep(1);
 
	pid_t result;
 
	int status;
 
	result = waitpid(-1, &status, WNOHANG);
 
	if (result != 0) {
 
		if (WIFEXITED(status)) {
 
			if (WEXITSTATUS(status) != 0) {
 
				LOG4CXX_ERROR(logger, "Backend can not be started, exit_code=" << WEXITSTATUS(status) << ", possible error: " << strerror(WEXITSTATUS(status)));
 
			}
 
		}
 
		else {
 
			LOG4CXX_ERROR(logger, "Backend can not be started");
 
		}
 
	}
 

	
 
	signal(SIGCHLD, SigCatcher);
 
#endif
 

	
 
}
 

	
 
NetworkPluginServer::~NetworkPluginServer() {
 
	for (std::list<Backend *>::const_iterator it = m_clients.begin(); it != m_clients.end(); it++) {
 
		LOG4CXX_INFO(logger, "Stopping backend " << *it);
 
		std::string message;
 
		pbnetwork::WrapperMessage wrap;
 
		wrap.set_type(pbnetwork::WrapperMessage_Type_TYPE_EXIT);
 
		wrap.SerializeToString(&message);
 

	
 
		Backend *c = (Backend *) *it;
 
		send(c->connection, message);
 
	}
 

	
 
	m_pingTimer->stop();
 
	m_server->stop();
 
	m_server.reset();
 
	delete m_component->m_factory;
 
	delete m_vcardResponder;
 
	delete m_rosterResponder;
 
	delete m_blockResponder;
 
}
 

	
 
void NetworkPluginServer::handleNewClientConnection(boost::shared_ptr<Swift::Connection> c) {
 
	// Create new Backend instance
 
	Backend *client = new Backend;
 
	client->pongReceived = -1;
 
	client->connection = c;
 
	client->res = 0;
 
	client->init_res = 0;
 
	client->shared = 0;
 
	// Until we receive first PONG from backend, backend is in willDie state.
 
	client->willDie = true;
 
	// Backend does not accept new clients automatically if it's long-running
 
	client->acceptUsers = !m_isNextLongRun;
 
	client->longRun = m_isNextLongRun;
 

	
 
	m_startingBackend = false;
 

	
 
	LOG4CXX_INFO(logger, "New" + (client->longRun ? std::string(" long-running") : "") +  " backend " << client << " connected. Current backend count=" << (m_clients.size() + 1));
 

	
 
	m_clients.push_front(client);
 

	
 
	c->onDisconnected.connect(boost::bind(&NetworkPluginServer::handleSessionFinished, this, client));
 
	c->onDataRead.connect(boost::bind(&NetworkPluginServer::handleDataRead, this, client, _1));
 
	sendPing(client);
 

	
 
	// sendPing sets pongReceived to 0, but we want to have it -1 to ignore this backend
 
	// in first ::pingTimeout call, because it can be called right after this function
 
	// and backend wouldn't have any time to response to ping.
 
	client->pongReceived = -1;
 
}
 

	
 
void NetworkPluginServer::handleSessionFinished(Backend *c) {
 
	LOG4CXX_INFO(logger, "Backend " << c << " (ID=" << c->id << ") disconnected. Current backend count=" << (m_clients.size() - 1));
 

	
 
	// This backend will do, so we can't reconnect users to it in User::handleDisconnected call
 
	c->willDie = true;
 

	
 
	// If there are users associated with this backend, it must have crashed, so print error output
 
	// and disconnect users
 
	if (!c->users.empty()) {
 
		m_crashedBackends.push_back(c->id);
 
	}
 

	
 
	for (std::list<User *>::const_iterator it = c->users.begin(); it != c->users.end(); it++) {
 
		LOG4CXX_ERROR(logger, "Backend " << c << " (ID=" << c->id << ") disconnected (probably crashed) with active user " << (*it)->getJID().toString());
 
		(*it)->setData(NULL);
 
		(*it)->handleDisconnected("Internal Server Error, please reconnect.");
 
	}
 

	
 
	std::string message;
 
	pbnetwork::WrapperMessage wrap;
 
	wrap.set_type(pbnetwork::WrapperMessage_Type_TYPE_EXIT);
 
	wrap.SerializeToString(&message);
 

	
 
	send(c->connection, message);
 

	
 
	c->connection->onDisconnected.disconnect_all_slots();
 
	c->connection->onDataRead.disconnect_all_slots();
 
	c->connection->disconnect();
 
	c->connection.reset();
 

	
 
	m_clients.remove(c);
 
	delete c;
 
}
 

	
 
@@ -885,96 +888,99 @@ void NetworkPluginServer::handleDataRead(Backend *c, boost::shared_ptr<Swift::Sa
 
				handleQueryPayload(c, wrapper.payload());
 
				break;
 
			default:
 
				return;
 
		}
 
	}
 
}
 

	
 
void NetworkPluginServer::send(boost::shared_ptr<Swift::Connection> &c, const std::string &data) {
 
	// generate header - size of wrapper message
 
	uint32_t size = htonl(data.size());
 
	char *header = (char *) &size;
 

	
 
	// send header together with wrapper message
 
	c->write(Swift::createSafeByteArray(std::string(header, 4) + data));
 
}
 

	
 
void NetworkPluginServer::pingTimeout() {
 
	// TODO: move to separate timer, those 2 loops could be expensive
 
	// Some users are connected for weeks and they are blocking backend to be destroyed and its memory
 
	// to be freed. We are finding users who are inactive for more than "idle_reconnect_time" seconds and
 
	// reconnect them to long-running backend, where they can idle hapilly till the end of ages.
 
	time_t now = time(NULL);
 
	std::vector<User *> usersToMove;
 
	unsigned long diff = CONFIG_INT(m_config, "service.idle_reconnect_time");
 
	if (diff != 0) {
 
		for (std::list<Backend *>::const_iterator it = m_clients.begin(); it != m_clients.end(); it++) {
 
			// Users from long-running backends can't be moved
 
			if ((*it)->longRun) {
 
				continue;
 
			}
 

	
 
			// Find users which are inactive for more than 'diff'
 
			BOOST_FOREACH(User *u, (*it)->users) {
 
				if (now - u->getLastActivity() > diff) {
 
					usersToMove.push_back(u);
 
				}
 
			}
 
		}
 

	
 
		// Move inactive users to long-running backend.
 
		BOOST_FOREACH(User *u, usersToMove) {
 
			LOG4CXX_INFO(logger, "Moving user " << u->getJID().toString() << " to long-running backend");
 
			if (!moveToLongRunBackend(u))
 
				break;
 
		}
 
	}
 

	
 
	// We have to remove startingBackend flag otherwise 1 broken backend start could
 
	// block the backend.
 
	m_startingBackend = false;
 

	
 
	// check ping responses
 
	std::vector<Backend *> toRemove;
 
	for (std::list<Backend *>::const_iterator it = m_clients.begin(); it != m_clients.end(); it++) {
 
		// pong has been received OR backend just connected and did not have time to answer the ping
 
		// request.
 
		if ((*it)->pongReceived || (*it)->pongReceived == -1) {
 
			// Don't send another ping if pongReceived == -1, because we've already sent one
 
			// when registering backend.
 
			if ((*it)->pongReceived) {
 
				sendPing((*it));
 
			}
 
		}
 
		else {
 
			LOG4CXX_INFO(logger, "Disconnecting backend " << (*it) << " (ID=" << (*it)->id << "). PING response not received.");
 
			toRemove.push_back(*it);
 
		}
 

	
 
		if ((*it)->users.size() == 0) {
 
			LOG4CXX_INFO(logger, "Disconnecting backend " << (*it) << " (ID=" << (*it)->id << "). There are no users.");
 
			toRemove.push_back(*it);
 
		}
 
	}
 

	
 
	BOOST_FOREACH(Backend *b, toRemove) {
 
		handleSessionFinished(b);
 
	}
 

	
 
	m_pingTimer->start();
 
}
 

	
 
void NetworkPluginServer::collectBackend() {
 
	// Stop accepting new users to backend with the biggest memory usage. This prevents backends
 
	// which are leaking to eat whole memory by connectin new users to legacy network.
 
	LOG4CXX_INFO(logger, "Collect backend called, finding backend which will be set to die");
 
	unsigned long max = 0;
 
	Backend *backend = NULL;
 
	for (std::list<Backend *>::const_iterator it = m_clients.begin(); it != m_clients.end(); it++) {
 
		if ((*it)->res > max) {
 
			max = (*it)->res;
 
			backend = (*it);
 
		}
 
	}
 

	
 
	if (backend) {
 
		if (m_collectTimer) {
 
			m_collectTimer->start();
 
		}
 
@@ -1427,57 +1433,58 @@ void NetworkPluginServer::handleFTRejected(User *user, const std::string &buddyN
 

	
 
void NetworkPluginServer::handleFTStateChanged(Swift::FileTransfer::State state, const std::string &userName, const std::string &buddyName, const std::string &fileName, unsigned long size, unsigned long id) {
 
	User *user = m_userManager->getUser(userName);
 
	if (!user) {
 
		// TODO: FIXME We have to remove filetransfer when use disconnects
 
		return;
 
	}
 
	if (state.state == Swift::FileTransfer::State::Transferring) {
 
		handleFTAccepted(user, buddyName, fileName, size, id);
 
	}
 
	else if (state.state == Swift::FileTransfer::State::Canceled) {
 
		handleFTRejected(user, buddyName, fileName, size);
 
	}
 
}
 

	
 
void NetworkPluginServer::sendPing(Backend *c) {
 

	
 
	std::string message;
 
	pbnetwork::WrapperMessage wrap;
 
	wrap.set_type(pbnetwork::WrapperMessage_Type_TYPE_PING);
 
	wrap.SerializeToString(&message);
 

	
 
	if (c->connection) {
 
		LOG4CXX_INFO(logger, "PING to " << c << " (ID=" << c->id << ")");
 
		send(c->connection, message);
 
		c->pongReceived = false;
 
	}
 
// 	LOG4CXX_INFO(logger, "PING to " << c);
 
}
 

	
 
NetworkPluginServer::Backend *NetworkPluginServer::getFreeClient(bool acceptUsers, bool longRun) {
 
	NetworkPluginServer::Backend *c = NULL;
 

	
 
	// Check all backends and find free one
 
	for (std::list<Backend *>::const_iterator it = m_clients.begin(); it != m_clients.end(); it++) {
 
		if ((*it)->willDie == false && (*it)->acceptUsers == acceptUsers && (*it)->users.size() < CONFIG_INT(m_config, "service.users_per_backend") && (*it)->connection && (*it)->longRun == longRun) {
 
			c = *it;
 
			// if we're not reusing all backends and backend is full, stop accepting new users on this backend
 
			if (!CONFIG_BOOL(m_config, "service.reuse_old_backends")) {
 
				if (c->users.size() + 1 >= CONFIG_INT(m_config, "service.users_per_backend")) {
 
					c->acceptUsers = false;
 
				}
 
			}
 
			break;
 
		}
 
	}
 

	
 
	// there's no free backend, so spawn one.
 
	if (c == NULL) {
 
	if (c == NULL && !m_startingBackend) {
 
		m_isNextLongRun = longRun;
 
		m_startingBackend = true;
 
		exec_(CONFIG_STRING(m_config, "service.backend"), CONFIG_STRING(m_config, "service.backend_host").c_str(), CONFIG_STRING(m_config, "service.backend_port").c_str(), m_config->getConfigFile().c_str());
 
	}
 

	
 
	return c;
 
}
 

	
 
}
0 comments (0 inline, 0 general)