// DLC_MasterServer.cpp : Defines the entry point for the console application.
//

/*
	Implemented RPC's:
		//RPC_ADD
		RPC_CLEAR							-	Clear the job list
		RPC_WAITSIGNAL				- wait all jobs before that signal
*/


#include "stdafx.h"
#include "DJS_Server.h"
#include <set>

#define  DLC_MASTERSERVER_VERSION 1.0f
#define  INFO_INTERVAL 2
#define  SERVERSTATUS_INTERVAL 1

#define COMMAND_UPDATE "update"
#define COMMAND_ABORT  "abort"

void Log( const char* szFormat,... )
{
	FILE *f;
	fopen_s( &f, "DJS_Server.log", "at" );
	if( f )
	{
		char timestr[32];
		_strtime_s( timestr,sizeof(timestr) );

		char datestr[128];
		_strdate_s( datestr,sizeof(datestr) );

		va_list arglist;
		va_start(arglist, szFormat);
		char szBuf[1024];
		vsprintf_s( szBuf, 1024, szFormat, arglist);
		fprintf_s( f, "[%s][%s] %s\n",datestr,timestr,szBuf );
		fclose(f);
	}
}

//
// Clears the screen
//
void clrscr()
{
	COORD coordScreen = { 0, 0 };
	DWORD cCharsWritten;
	CONSOLE_SCREEN_BUFFER_INFO csbi;
	DWORD dwConSize;
	HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);

	GetConsoleScreenBufferInfo(hConsole, &csbi);
	dwConSize = csbi.dwSize.X * csbi.dwSize.Y;
	FillConsoleOutputCharacter(hConsole, TEXT(' '),
		dwConSize,
		coordScreen,
		&cCharsWritten);
	GetConsoleScreenBufferInfo(hConsole, &csbi);
	FillConsoleOutputAttribute(hConsole,
		csbi.wAttributes,
		dwConSize,
		coordScreen,
		&cCharsWritten);
	SetConsoleCursorPosition(hConsole, coordScreen);
}

int CDistributedJobService_Server::Init( bool bUpdateMode )
{
	m_bUpdateMode = bUpdateMode;
	if( false == NetInit( NULL ) )
	{
		printf("###############################################################################\n");
		printf("### Crytek's distributed job service - MASTER SERVER  Version %2.2f          ###\n", DLC_MASTERSERVER_VERSION);
		printf("###############################################################################\n\n");
		printf(">>>>  Can't create the server! Terminate the program. <<<<\n");
		return -1;
	}

	m_szClientName = Client_GetClientName();
	m_dLastInfoTime		= -INFO_INTERVAL;
	m_dLastServerStatusTime = -SERVERSTATUS_INTERVAL;

	//clear the log file
	FILE* f = NULL;
	fopen_s( &f, "DJS_Server.log", "wt" );
	fclose(f);

	Log( "Server Started\n");
	return 1;
}

void CDistributedJobService_Server::Done()
{
	printf("      Master server shutdown.\n");
	NetShutdown();
	printf("      Good bye.\n");
	Log( "Shutdown.\n");
}

void CDistributedJobService_Server::Run()
{
	for(;;)
	{
		Sleep(100);
		NetFrame();
		ReciveJobs();
		DistributeJobs();
		SendServerStatus();
		ShowInfo();
	}
}

void CDistributedJobService_Server::Command_ClearJobList()
{
	std::vector<CommandInfo> &sentCommands = Server_GetAllSentCommands();

	sentCommands.clear();
	QuelleManager.ClearAllJob();
}

void CDistributedJobService_Server::ShowInfo()
{
	//Show info if neccesary
	double dTime = GetTime();
	if( dTime - m_dLastInfoTime >  INFO_INTERVAL )
	{
//		system("cls");
		clrscr();
		printf("###############################################################################\n");
		printf("### Crytek's distributed job system - MASTER SERVER  Version %2.2f           ###\n", DLC_MASTERSERVER_VERSION);
		printf("###############################################################################\n");
		printf("HostName: %s \n\n", m_szClientName);
		m_dLastInfoTime = dTime;

		//client info:
		int nCC = Server_GetClientCount();
		int nActiveClientNumber = 0;
		int nConnectedClientNumber = 0;
		printf("Free clients:\n");
		for( int i = 0; i < nCC; ++i )
		{
			if( Server_IsClientConnected(i) )
			{
				nConnectedClientNumber++;
				if( Server_GetClientBusy(i) == false && Server_GetClientUnusable(i) == false )
				{
					++nActiveClientNumber;
					printf("%s  ", Server_GetClientName(i) );
				}
			}
		}

		printf("\n\n");
		printf("Unusable clients:\n");
		for( int i = 0; i < nCC; ++i )
			if( Server_IsClientConnected(i) )
			{
				if (Server_GetClientUnusable(i) == true)
					printf("%s  ", Server_GetClientName(i) );
			}

		std::vector<CommandInfo> &sentCommands = Server_GetAllSentCommands();
		__w64 int nSentCommandSize = sentCommands.size();
		
		int nNotDone = GetNotDoneCommandsCount();

		printf("\n\nActive clients:\n");
		int nAClientWithJob = 0;
		for( int i = 0; i < nCC; ++i )
			if( Server_IsClientConnected(i) && Server_GetClientVersion(i) == Server_GetExpectedClientVersion() ) // && Server_GetClientBusy(i) )
			{
				printf("%s ", Server_GetClientName(i) );
/*
				__w64 int j;
				for( j = 0; j < nSentCommandSize; ++j )
					if( false == sentCommands[j].done)
					{
						if( sentCommands[j].iClient == i )
							break;
					}
					if( j != nSentCommandSize )
					{
						//printf("%s -> %s\n", Server_GetClientName(i), sentCommands[j].command.str );
					}
				++nAClientWithJob;
				*/
			}

			{
				bool bAnyBadVersion = false;
				for( int i = 0; i < nCC; ++i )
					if( Server_IsClientConnected(i) && Server_GetClientVersion(i) != Server_GetExpectedClientVersion() )
						bAnyBadVersion = true;

				printf("\n\nWrong Client Version:\n");
				for( int i = 0; i < nCC; ++i )
				{
					if( Server_IsClientConnected(i) && Server_GetClientVersion(i) != Server_GetExpectedClientVersion() )
					{
						printf("%s ", Server_GetClientName(i) );
					}
				}
			}
/*
		if( nNotDone != nAClientWithJob )
		{
			printf("\n\nClient(s) with not normal work:\n");
			for( int i = 0; i < nCC; ++i )
				if( Server_IsClientConnected(i) ) //&& Server_GetClientBusy(i) )
				{
					__w64 int j;
					for( j = 0; j < nSentCommandSize; ++j )
						if( false == sentCommands[j].done)
						{
							if( sentCommands[j].iClient == i )
								break;
						}
					if( j == nSentCommandSize )
						printf("%s -> %s  ", Server_GetClientName(i), Server_GetClientCommand(i) );
				}
		}
*/

		printf("\n\nClient number: %d/%d, %d job distributed. Jobs Pending(%d), Left (%d): \n\n", nActiveClientNumber,nConnectedClientNumber, nNotDone, QuelleManager.GetPendingJobNumber(),QuelleManager.GetLeftJobNumber() );
		//printf("\nNote: clients run more than 1 thread, it can be in the free and the active list too in the same time.\n\nDebug:\n");

		nSentCommandSize = sentCommands.size();
		for( __w64 int i = 0; i < nSentCommandSize; ++i )
		{
			int minutes,seconds;
			GetTimeDiff( sentCommands[i].timeStart,minutes,seconds );
			printf("%6s -> %3d %3d %s  (%2d:%02d)\n",Server_GetClientName(sentCommands[i].iClient),i,
				sentCommands[i].guid, sentCommands[i].command.str, minutes,seconds );
		}

		QuelleManager.ShowDebugInfo();
	}
}

void CDistributedJobService_Server::ReciveJobs()
{
	FixedString Command;
	while( Server_GetRemoteCommandRequest( &Command ) )
	{
		if ( strcmp(Command.str,"RPC_CLEAR") == 0 )
		{
			Log("RPC: CLEAR JOB LIST\n");
			Command_ClearJobList();
		}
		else
			QuelleManager.InsertNotRegisteredJob( &Command );
	}
}

//////////////////////////////////////////////////////////////////////////
void CDistributedJobService_Server::JobFinished( int nCommandIndex,const char *reason )
{
	std::vector<CommandInfo> &sentCommands = Server_GetAllSentCommands();
	if (nCommandIndex < 0 || nCommandIndex >= (int)sentCommands.size())
		return;

	CommandInfo &cmd = sentCommands[nCommandIndex];

	int guid = cmd.guid;
	int iClient = cmd.iClient;

	char cmdstr[1024];
	strcpy_s(cmdstr,cmd.command.str);
	
	if (!cmd.done)
		cmd.success = false;

	bool bJobSuccess = cmd.success;

	bool bJobStillRunningOnOtherClients = false;
	{
		for( unsigned int j = 0; j < sentCommands.size(); j++ )
		{
			if (j != nCommandIndex && (sentCommands[j].guid == guid && guid >= 0))
			{
				// there`re other commands doing the same job.
				bJobStillRunningOnOtherClients = true;
				break;
			}
		}
	}

	if (bJobSuccess || !bJobStillRunningOnOtherClients)
	{
		int minutes,seconds;
		GetTimeDiff( sentCommands[nCommandIndex].timeStart,minutes,seconds );

		Log( "<%s>\t Job Finished: %s : %s, (%s) (Time: %d:%d)",Server_GetClientName(iClient),cmdstr,(cmd.success?"ok":"fail"),reason,minutes,seconds );

		// If we properly finished job, or if it is only was running on one client.
		QuelleManager.JobFinished( sentCommands[nCommandIndex] );
	}
	sentCommands.erase( sentCommands.begin() + nCommandIndex );


	// Abort all others who may be processing the same command.
	if (bJobSuccess)
	{
		for( unsigned int j = 0; j < sentCommands.size(); )
		{
			if (sentCommands[j].guid == guid && guid >= 0)
			{
				// This command is for the same Job, tell client to quit it.
				char str[1024];
				sprintf_s( str,"%s %s",COMMAND_ABORT,cmdstr );
				Server_SendSimpleCommandToClient( sentCommands[j].iClient,str ); // Send abort for command for specific job
				// Delete same command from sent commands list.
				sentCommands.erase(sentCommands.begin()+j);
			}
			else
				j++;
		}
	}
}

//////////////////////////////////////////////////////////////////////////
void CDistributedJobService_Server::DistributeJobs()
{
	//Check the clients
	int nCC = Server_GetClientCount();
	double dDropTime = GetTime() - 180;
	for( int i = 0; i < nCC; ++i )
	{
		int iClient = i;
		if( Server_IsClientConnected(iClient) && Server_GetClientStatusRecivedTime(iClient) < dDropTime )
		{
			Log("<%s>\t Drop a client because of time out.\n", Server_GetClientName(iClient) );
			Server_TerminateClient(i);

			//redistribute the commands.. (the client id can be taken over by other client..
			std::vector<CommandInfo> &sentCommands = Server_GetAllSentCommands();
			for( unsigned int j = 0; j < sentCommands.size();  )
			{
				//not this client
				if( sentCommands[j].iClient != i )
				{
					++j;
					continue;
				}

				if( !sentCommands[j].done )
					sentCommands[j].success = false;

				JobFinished( j,"Client gone Offline" );
			}
		}
	}

	//Check old unfinished jobs
	std::vector<CommandInfo> &sentCommands = Server_GetAllSentCommands();
	for( unsigned int i = 0; i < sentCommands.size(); ++i )
	{
		int iClient = sentCommands[i].iClient;
		if( sentCommands[i].done )
		{
			//have problem...
			if( !sentCommands[i].success )
			{
				if( Server_IsClientConnected(iClient) )
					Server_SetClientUnusable(iClient);
				
				JobFinished( i,"Job Failed" );
			}
			else
				JobFinished( i,"" );
			--i;
			continue;
		}

		//Job losted.. put back to my joblist..
		if (!Server_IsClientConnected(iClient))
		{
			//put the job into the first place - because it guaranteed that it will be finished before the wait signal
			if( ! sentCommands[i].done )
			{
				sentCommands[i].success = false;
			}
			JobFinished(i,"Client not connected");

			--i;
			continue;
		}
	}

	//search a free client
	int iFreeClient = Server_GetAvailableClient();
	if( -1 == iFreeClient )
		return;										//no available client
	if (Server_GetClientUnusable(iFreeClient))
		return;

	//search a job to send
	CommandInfo* pInfo;
	if( QuelleManager.GetTheNextJobToSent( &pInfo,false ) )
	{
		if( NULL != pInfo )
		{
			m_dLastSendJobTime = GetTime();
			Server_SendCommandToClient(iFreeClient,pInfo);
		}
	}
	else
	{
		// No jobs to left, but we have free clients and some pending jobs, so try to execute it on free clients maybe they will finish it sooner.
		if (QuelleManager.GetPendingJobNumber() > 0 && QuelleManager.GetLeftJobNumber() == 0 && 
				(GetTime() - m_dLastSendJobTime) > 5) // Wait at least 5 seconds.
		{
			if( QuelleManager.GetTheNextJobToSent( &pInfo,true ) )
			{
				if (NULL != pInfo)
				{
					bool bSentToThisClientAlready = false;
					std::vector<CommandInfo> &sentCommands = Server_GetAllSentCommands();
					int nSentCommandSize = (int)sentCommands.size();
					for(int i = 0; i < nSentCommandSize; ++i )
					{
						if (pInfo->guid == sentCommands[i].guid && iFreeClient == sentCommands[i].iClient)
						{
							bSentToThisClientAlready = true;
							break;
						}
					}
					// Check that this command was already sent to the same client, prevents same machine executing same command on multiple threads.
					if (!bSentToThisClientAlready)
					{
						m_dLastSendJobTime = GetTime();
						Server_SendCommandToClient(iFreeClient,pInfo);
					}
				}
			}
		}
	}
}

//////////////////////////////////////////////////////////////////////////
int CDistributedJobService_Server::GetNotDoneCommandsCount()
{
	std::vector<CommandInfo> &sentCommands = Server_GetAllSentCommands();
	int nSentCommandSize = (int)sentCommands.size();
	int nNotDone = 0;
	std::set<int> command_guids;
	for(int i = 0; i < nSentCommandSize; ++i )
	{
		if (command_guids.find(sentCommands[i].guid) != command_guids.end()) 
			continue; // already counted this command.
		command_guids.insert(sentCommands[i].guid);

		if( false == sentCommands[i].done ) //|| false == sentCommands[i].success)
			++nNotDone;
	}
	return nNotDone;
}

//////////////////////////////////////////////////////////////////////////
void CDistributedJobService_Server::SendServerStatus()
{
	double dTimeDiff = GetTime() - m_dLastServerStatusTime;
	if (dTimeDiff > SERVERSTATUS_INTERVAL) // Send status every second or so.
	{
		m_dLastServerStatusTime = GetTime();

		int i;
		ServerStatus status;
		memset(&status,0,sizeof(status));

		//int nNotDone = GetNotDoneCommandsCount();

		status.nJobsLeft = QuelleManager.GetPendingJobNumber() + QuelleManager.GetLeftJobNumber();

		status.nClients = 0;
		status.nFreeClients = 0;
		int nCC = Server_GetClientCount();

		for(i = 0; i < nCC; ++i )
		{
			if( Server_IsClientConnected(i) )
			{
				status.nClients++;
				if( Server_GetClientBusy(i) == false )
					status.nFreeClients++;
			}
		}

		for(i = 0; i < nCC; ++i )
		{
			if( Server_IsClientConnected(i) )
			{
				Server_SendServerStatusToClient(i,status);

				if (m_bUpdateMode)
				{
					// Send update messages.
					if (!Server_GetClientUpdated(i) && Server_GetClientVersion(i) != Server_GetExpectedClientVersion())
					{
						Server_SetClientUpdated(i);
						Server_SendCommandToClient(i,COMMAND_UPDATE,-2,0,1000);
					}
				}
			}
		}

		std::vector<int> clientsToAbort;
		std::vector<CommandInfo> &sentCommands = Server_GetAllSentCommands();
		if (status.nJobsLeft == 0 && sentCommands.size() > 0)
		{
			for (int i = 0; i < (int)sentCommands.size(); i++)
			{
				if (!sentCommands[i].done && sentCommands[i].guid >= 0)
				{
					Server_SendSimpleCommandToClient( sentCommands[i].iClient,COMMAND_ABORT );
				}
			}
		}
	}
}
