/* 
	libarticulator.h

	Copyright (C) 2004 Annosoft, LLC. Garland, Texas. All rights reserved.     
	This program and header file contains the confidential trade secret 
	information of Annosoft, LLC.  Use, disclosure, or copying without 
	written consent is strictly prohibited. 

*/
/**
@file libarticulator.h
@brief
		This header file declares interfaces and functions for performing cleanup
		on Lipsync results. 

		This module provides the ability to convert phoneme syncronization results into
		frames which are composed of more than one articulation. Each frame can
		have more than one phoneme contributing to the rending.

		The phoneme_mixture articulator architecture takes as input a phoneme list
		generated by the Lipsync SDK and creates phoneme mixtures (percentages)
		on a quantized boundary (frame rate). At a given frame/time t the best mouth 
		positions are approximated by 1 or more phonemes with specified percentages.
		The resulting visual display can then be morphed based on a mixture of the
		phonemes and the percentage of contribution.

  @date 
  20060129 - added SetMaxFrameDelta to IPhnMixtureArticulator
*/



#ifndef _H_LIBARTICULATOR
#define _H_LIBARTICULATOR


#include "liblipsync.h"


////////////////////////////////////////////////////////////////////////////////////
// phoneme based articulator
//
// The phoneme based articulator. This represents the best visual unit for
// a given time value as the weighted sum of different phonemes
// display systems may use this data to generate  a morph target which represents
// a mixture of all the phonemes (and their associated weights)
//
////////////////////////////////////////////////////////////////////////////////////

/// maximum number of articulators
#define MAX_PHN_ARTICULATORS 10		// the maximum number of articulators in a given frame

///////////////////////////////////////////////////////////////////////////////
// Name: CPhonemeMixtureArticulation
/**@ingroup sdk_structures
   @brief
    This class is the data for one articulation frame. It is generated by and returned from
    IPhnMixtureArticulator.
   
   This class represents a single articulation, which can be rendered by
   mixing the visemes for the phoneme list specified in this structure.
  
   Morph target animation can be used. Simply morph in the individual constituents at
   the specified weights, and a smooth animation will be produced.
  
   These multi morphs may have a cleaner look than simple, phoneme-time display
   
   @see IPhnMixtureArticulator, _PrintArticulationMarkers
*/
class CPhonemeMixtureArticulation
{

public:
	/**@brief The t_phoneme structure is a single constituent to the overall phoneme mixture. 
        
	    The t_phoneme type is used to represent a single {phone, weight} tuple.
        @see CPhonemeMixtureArticulation
     */
	typedef struct t_phoneme
	{
        /// the phoneme label
		char strPhoneme[3];	
        /// weight of the phoneme in the articulation (0..1]
		float weight;		
	};
	/// number of constituent phonemes making up the articulation
	long	  m_nConstituents;	
    /// the consitutuent records
	t_phoneme m_constituents[MAX_PHN_ARTICULATORS]; 
    /// start time of the articulation
	long	  msStart;	
    /// end time of the articulation
	long	  msEnd;	
};

///////////////////////////////////////////////////////////////////////////////
// Name: CPhonemeArticulationEnvelop
/**@ingroup sdk_structures
   @brief This structure provided a different view of the articulation data. 
   The phoneme articulation envelop represents a single phoneme (not a mixture of phonemes per frame), 
   and it's articulation changes.
   
   This class provides a structure representing a different way to look at the
   articulation information. It is generated along with the CPhonemeMixtureArticulation
   records and provides a different view of approximately the same data.
  
   Each envelop gives the start and stop time for the whole envelop, as an array representing
   each of the envelop steps (quantized to the frame rate). Envelops are sorted by msStart time.
  
   Instead of viewing individual frames as consisting of multiple phonemes, the phonemes
   are channelized into envelops so that each phoneme can be seen in isolation.
  
   This is useful for visualization or other processes where we are more interested in the
   contributions of each phoneme than the individual frame realization of the data as in
   CPhonemeMixtureArticulation.
   @see
   IPhnMixtureArticulator::GenerateEnvelopsFromArticulations
*/
class CPhonemeArticulationEnvelop
{
public:
    /// phoneme label
    char strPhoneme[3]; 
    /// start time of the articulation. onset point
    long msStart;       
    /// end time of the articulation. finish point
    long msEnd;         
    
    /** @brief the t_env_item structure is the unrolled envelop for the phoneme
        between msStart and msEnd.
        
       The envelop items will be quantized to the frame rate of the
       articulator. 
       
       The peak of the envelop can be found by looking for the t_env with the
       highest weight.*/
    typedef struct t_env_item
	{
        /// millisecond time of the envelop item
		long  msStart;      
        /// end time of the envelop item
        long  msEnd;        
        /// weight (0..1]
		float weight;		
	} t_env;

    /// number of array items in CPhonemeArticulationEnvelop::env.
    long    n_env;        
    /// the envelop. an array of t_env structures.
    t_env *   env;        
};
///////////////////////////////////////////////////////////////////////////////
// Name: articulation_flags_t
/**@brief bitfield options for IPhnMixtureArticulator processing

These flags are bitfield options for the IPhnMixtureArticulator.
The flags can be set by calling IPhnMixtureArticulator::SetFlags.

Currently the only flag supported is af_phn_smooth, this controls
whether or not additional smoothing is enabled in the articulator

@see IPhnMixtureArticulator::SetFlags
**/ 
enum articulator_flags_t
{
	af_none = 0x00, ///< no flags, normal processing
	/**@brief turn on phoneme smoothing. <P>
		This adds a second process to prevent excess jumping
		in articulation values. **/
	af_phn_smooth = 0x01
};  

///////////////////////////////////////////////////////////////////////////////
// Name:IPhnMixtureArticulator
/**@ingroup sdk_interfaces
   @brief
	This class is used to generate smooth articulations from phoneme
	syncronization (ISyncResultsCollection)

	IPhnMixtureArticulator is an abstract interface that implements an articulator. 
    This class takes as 
	input an ISyncResultsCollection and renders articulation units for the 
	recognized phonemes. The articulation results are accessed using methods 
	provided by this interface.

	The PhnMixtureArticulator generates for each frame a list of 
	phonemes and energies (normalized to 1.0) which when combined 
	create a visually appealling result for each frame. It uses 
	theoretically sound methods to create realistic mouth movements. 
	It can be used with both Textbased and Textless recognition results 
	and will improve the look and feel of both.<emit \<br/\>>

	To create an Articulator interface, use ::CreatePhonemeArticulator. 
	Optionally, it can be given a rule file (examples included in the SDK) 
	and this rule file can be used. If no rule file is specified, the default 
	rules are used.

	The interface can be destroyed by calling its "Release" method, 
	or by calling DestroyPhonemeArticulator.

    The ::articulator_flags_t::af_phn_smooth flag and/or IPhnMixtureArticulator::SetMaxFrameDelta 
    can be used to control the rate of change between articulations. 

	@see 
    -::CreatePhonemeArticulator
    - CPhonemeMixtureArticulation
    - CPhonemeArticulationEnvelop
    - _PrintArticulationMarkers
*/
class IPhnMixtureArticulator
{
protected:
	virtual ~IPhnMixtureArticulator() {} // use Release();
public:

    ///////////////////////////////////////////////////////////////////////////////
	// Name::LoadArticulatorRules
	/**@brief
	This method will load an articulation rules formatted buffer from the specified string. 
    This may be loaded from disk, resource, or otherwise. 
	
	This allows applications to override default articulation rules 
	provided by IPhnMixtureArticulator. For more information about 
	the format of the articulation rule file, please contact annosoft.
    @note Information about this format is available only to licensed SDK customers and
    is available on request.
	@param	szRuleFileBuffer - [in] rule buffer. loaded from file or resource, etc.
	@param	numBufferBytes	 - [in] size, in bytes, of szRuleFileBuffer
	@return
		serror - or appropriate \ref error_codes
*/
	virtual serror LoadArticulatorRules(const char *szRuleFileBuffer, long numBufferBytes) = 0;
	
    ///////////////////////////////////////////////////////////////////////////////
	// Name: SetFrameRate
	/**@brief
	   This sets the articulator frame rate and controls the number of CPhonemeMixtureArticulation
       records generated.
	   
	   The articulator will output articulations at intervals 
	   of the specified frame rate. It is possible for some articulations
	   to be larger than this, but they will always be even multiples of
	   the frame rate.
	   default [20 fps]
	   
	   @param framesPerSec - [in] new frame rate - units : frames per second
       
       @see _PrintArticulationMarkers */

	virtual void SetFrameRate(float framesPerSec) = 0;


    ////////////////////////////////////////////////////////////////////////////////
    // Name: SetMaxFrameDelta
    /**@brief This method sets the smoothing factor to control frame to frame differences<P>
        
        When articulator_flags_t::af_phn_smooth is turned on, the differences from frame
        to frame of an individual phoneme weight are controlled. The default is .25.
        This specifies the maximum allowed transition for a phoneme from one frame to the next.

        This method can be called to modify this value. note that the behavior is dependent
        on the frame rate, higher frame rates might need proportionally lower values.
        
        This method turns on af_phn_smooth flag automatically. 
        
        @param maxFrameDelta - [in] maximum phn_env or phn_vis weight change from frame to frame. 
            Values between (0-1), where small values generate smoother data at the expense
            of crispness. Higher values increase 'crispness'.

        @see IPhnMixtureArticulator::SetFlags.
    **/
    virtual void SetMaxFrameDelta(float maxFrameDelta) = 0;

	//////////////////////////////////////////////////////////////
	// Name: SetMaxPhonemesPerArticulation 
	/**@brief
	   This method changes the number of phonemes allowable in a single frame.
	   
	   This method allows applications to control the maximum
	   number of phoneme mixtures that make up a single articulation
	   If an animation system (or for performance reasons), the number of
	   articulators should be constrained. set the maximum to this value.
	   
       When in use, the articulator will apply heuristic rules to determine
       the best phonemes to include in the mixture. 
	   
	   @param maxArticulators - [in] maximum to allow [1.. MAX_PHN_ARTICULATORS)
	  
	   @note
	   If set to one, a single best articulation will be returned
	   This doesn't short circuit the calculation but rather, after processing the data
	   does a post process step to pick out the best articulator for each frame.
	   it returns an error only if maxArticulators is out of range.
       
       @see _PrintArticulationMarkers */
	virtual serror SetMaxPhonemesPerArticulation(long maxArticulators) = 0;

    ///////////////////////////////////////////////////////////////////////////
	// Name: GenerateArticulations
	/**@brief
	This method will create the articulation list given an ISyncResultsCollection
	
	 This method, given an ISyncResultsCollection, will generate articulations 
	 based on linguistic rules and then make accessible the articulation results 
	 as a series of CPhonemeMixtureArticulation records.

	 The result is accessed by calling the objects IPhnMixtureArticulator::begin 
	 and IPhnMixtureArticulator::end methods to retrieve iterators to the resultant data.

	 @param pCollection - [in] sync results. These can either be Text Based results or results generated from a textless process. The returned articulations will be an improvement in both cases.
	 @param pProgress - [in] progress meter. Currently not used. Can be NULL.
	
	 @return
		kNoError if successful, otherwise an appropriate error code
	
	@see IPhnMixtureArticulator::begin, IPhnMixtureArticulator::end
    @see _PrintArticulationMarkers */
	virtual serror GenerateArticulations(ISyncResultsCollection* pCollection, CProgress *pProgress) = 0;
    
    ///////////////////////////////////////////////////////////////////////////
	// Name: GenerateArticulations
    /**@brief
	    This method will create an articulation list based on an ISyncResultsCollection, 
        given a start and end time.
		
	    This method, given an ISyncResultsCollection, will generate articulations 
	    based on linguistic rules and then make accessible the articulation results 
	    as a series of CPhonemeMixtureArticulation records.

	    It only generates articulations in for the CSyncMarker 's in the range of 
	    msStart and msEnd. 

	    The result is accessed by calling the objects IPhnMixtureArticulator::begin 
	    and IPhnMixtureArticulator::end methods to retrieve iterators to the resultant data.

	    @param pCollection - [in] sync results. These can either be Text Based results or results generated from a textless process. The returned articulations will be an improvement in both cases.
	    @param pProgress - [in] progress meter. Currently not used. Can be NULL.
	    @param startMs - [in] start time. markers before this time won't be included in the result
	    @param endMs - [in] end time. markers after this time won't be included in the results

	 @note
	 If startMs and endMs are both -1, the entire CSyncResultsCollection is processed.
	
	 @return
		kNoError if ok, otherwise an appropriate error code
	
	@see IPhnMixtureArticulator::begin, IPhnMixtureArticulator::end
    @see _PrintArticulationMarkers
*/
	virtual serror GenerateArticulations(ISyncResultsCollection* pCollection, CProgress* pProgress,
					long startMs, long endMs) = 0;

    ///////////////////////////////////////////////////////////////////////////
    // Name: begin
    /**@brief
			STL style access to the generated articulation records.
		
			After articulations are generated through IPhnMixtureArticulator::GenerateArticulations, the 
			results are accessed through the same object by calling IPhnMixtureArticulator::begin to 
			retrieve the first entry and by calling IPhnMixtureArticulator::end to retrieve the element 
			beyond the last entry. This is fashioned in the same way that STL 
			iterators are fashioned. 
		@return
			The first articulation record.

		@see IPhnMixtureArticulator::end, IPhnMixtureArticulator::GenerateArticulations
        @see _PrintArticulationMarkers
	*/
	 virtual CPhonemeMixtureArticulation* begin() = 0;

	///////////////////////////////////////////////////////////////////////////
    // Name:end
    /**@brief
			STL style access to the generated articulation records.
		
			After articulations are generated through IPhnMixtureArticulator::GenerateArticulations, the 
			results are accessed through the same object by calling IPhnMixtureArticulator::begin to 
			retrieve the first entry and by calling IPhnMixtureArticulator::end to retrieve the element 
			beyond the last entry. This is fashioned in the same way that STL 
			iterators are fashioned. 
		@return
			The pointer to the end of the array. STL style. This is not a valid record rather
            a marker 1 past the last valid record

		@see IPhnMixtureArticulator::begin, IPhnMixtureArticulator::GenerateArticulations
        CPhonemeMixtureArticulation
    */
	virtual CPhonemeMixtureArticulation* end() = 0;

    ///////////////////////////////////////////////////////////////////////////
    // Name: add_artic
	/**@brief
		Utility method to add an articulation record back into this object. 
        It is helpful when creating envelope records from previous results.
		
			This method can be called iteratively to push
			articulation records back into the articulation system.

			This can be used to recreate the articulations so
			that the envelops can be regenerated. It is no
			trivial to do partial regeneration of env records and
			since the env generation code is fast. this is an easier method

			Note: it is assumed that the articulations are added in order
			no sorting will be done.
		
			@param  in	- [in] the articulation to add to the list
			@param bFirst -	[in] is this the first time. When set to true,
                this will clear the CPhonemeMixtureArticulation list in this object.
	*/
	virtual void  add_artic(CPhonemeMixtureArticulation& in, bool bFirst) = 0;


	///////////////////////////////////////////////////////////////////////////
	// Name: Release 
    /**@brief This method destroy this object. Alternative to DestroyPhonemeArticulator
    
       @see _PrintArticulationMarkers */
	virtual void Release() = 0;		


	///////////////////////////////////////////////////////////////////////////
	// Name: SetFlags
	/**@brief This method changes the processing features of the articulator<P>
		Valid Values are bitfields of ::articulator_flags_t
		@param flags - [in] bitfield of ::articulator_flags_t or 0 to restore defaults.
	**/
	virtual serror SetFlags(ulong flags) = 0;

    ///////////////////////////////////////////////////////////////////////////
    // Name: GenerateEnvelopsFromArticulations
    /**@brief:
        This method is used to generate CPhonemeArticulationEnvelop records.

        This method is used to generate CPhonemeArticulationEnvelop records. It is a post processing
        step and GenerateArticulations must be called. To adjust the frame rate, use
        the SetFrameRate method and then GenerateArticulations again, followed by a call to this 
        method.

        To access the results, applications should call env_begin and env_end after
        this method is called.
        See Also:
            IPhnMixtureArticulator::env_begin
            IPhnMixtureArticulator::env_end
        @version 2.0.8.0 or higher
    */

    virtual serror GenerateEnvelopsFromArticulations() = 0;

    
    ///////////////////////////////////////////////////////////////////////////
    // Name: env_begin
	/**@brief
		STL style access to the generated envelops.
	
		After envelops are generated through IPhnMixtureArticulator::GenerateEnvelopsFromArticulations, 
        the results are accessed through the same object by calling IPhnMixtureArticulator::env_begin to 
		retrieve the first entry and by calling IPhnMixtureArticulator::env_end to retrieve the element 
		beyond the last entry. This is fashioned in the same way that STL 
		iterators are fashioned. 
	@return
		The first env record.

	@see
        IPhnMixtureArticulator::env_end, IPhnMixtureArticulator::GenerateEnvelopsFromArticulations
        CPhonemeArticulationEnvelop
    @version 2.0.8.0 or higher
	*/
	 virtual CPhonemeArticulationEnvelop* env_begin() = 0;

	///////////////////////////////////////////////////////////////////////////
    // Name: env_end 
    /**@brief
			STL style access to the generated envelops.
		
			After envelops are generated through IPhnMixtureArticulator::GenerateEnvelopsFromArticulations, 
            the results are accessed through the same object by calling IPhnMixtureArticulator::env_begin to 
			retrieve the first entry and by calling IPhnMixtureArticulator::env_end to retrieve the element 
			beyond the last entry. This is fashioned in the same way that STL 
			iterators are fashioned. 
		@return
			The pointer to the end of the array. STL style. This is not a valid record rather
            a marker 1 past the last valid record

		@see
            IPhnMixtureArticulator::env_begin, IPhnMixtureArticulator::GenerateEnvelopsFromArticulations
            CPhonemeArticulationEnvelop
        @version 2.0.8.0 or higher
	*/
	virtual CPhonemeArticulationEnvelop* env_end() = 0;

     //////////////////////////////////////////////////////////////////////////
    // Name: Quantize
    /**@brief This method quantizes the internal articulation list to generate
       the best single CPhonemeMixtureArticulation for the data.
       It uses the articulations within the specified frame to summarize the
       the information into a single articulation.
       
        This method, given a start and stop time as well as a cap on the number of phonemes
        generated will calculate the best single mixture articulator that represents
        the specified window.
       
        @param msStart - [in] start time. if -1, use the beginning of our inner data
        @param msEnd - [in] end time. if -1 use the end of our inner data
        @param maxPhonemes - number of phonemes, maximum to be returned. -1 will use the 
       value specified by SetMaxPhonemesPerArticulation, or the default class value
       @return
        a pointer to the new articulation. This pointer is points to an object allocated
        and managed by this IPhnMixtureArticulator. The application does not need to dispose
        of this object. Subsequent calls to this object will replace the data of the pointer
        with new data. Meaning the application should copy the pointer to a local structure
       to repeatedly use this method.
       @see
       CPhonemeMixtureArticulation
    */
    virtual CPhonemeMixtureArticulation* Quantize(long msStart, long msEnd, long maxPhonemes) = 0;

};


///////////////////////////////////////////////////////////////////////////
// Name: CreatePhonemeArticulator
/**@ingroup sdk_functions 
   @brief This DLL function creates a new IPhnMixtureArticulator
   
   The CreatePhonemeArticulator function is used to create an
   IPhnMixtureArticulator object that can be used to generate
   articulation records based on syncronization results.
   
   @param ppArticulator :  [out] instantiated articulator
   @param flags :          [in] must be zero
   @see
   IPhnMixtureArticulator, DestroyPhonemeArticulator
   @see
    _PrintArticulationMarkers
*/
LIBLIP_API serror
CreatePhonemeArticulator(IPhnMixtureArticulator** ppArticulator, ulong flags);

///////////////////////////////////////////////////////////////////////////////
// Name: DestroyPhonemeArticulator
/**@ingroup sdk_functions
   @brief This DLL method is used to destroy a IPhnMixtureArticulator.
   
   @param pArticulator - [in] the object to destroy
   @see IPhnMixtureArticulator, CreatePhonemeArticulator
*/
LIBLIP_API void
DestroyPhonemeArticulator(IPhnMixtureArticulator* pArticulator);





#endif	
