Realtime Audio Tracking with Unity

addon: a very low latency minimal audio analysis tool comes again from Kejiro as open code 🙂 – check this out: https://github.com/keijiro/jp.keijiro.libsoundio

With some scripts you may track live audio input from your microphone to be processed with some very nice Fast Fourier Transform Algorithms to extract nice and smooth frequency bands to play with in Unity. This package works for Android too 🙂

audio analysis tools in C#

This first script does the audio analysis magic 🙂 – It tracks 8 audio bands and some smoothed ticking values for later usage.

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.Android;

public class the_audio_visualizer : MonoBehaviour
{

    AudioSource my_audiosource;
    
    public bool use_mic = true;

    string selectedDeviceName = "";

     float[] spectrum = new float[512];

    float[] freq_band   = new float[8];

      float[] smoothed_freq_band   = new float[8];

     float[] band_buffer   = new float[8];
     float[] buffer_decr   = new float[8];

    // continuous ticking time vars based on bands!
    public static float[] smoothed_ticks  = new float[8];


    
public static float[] freq_band_highest = new float[8];
public static float[] mapped_band = new float[8];
float[] mapped_buffer = new float[8];
public float sensitizer = .999f;

 
    void Start()
    {
        Permission.RequestUserPermission(Permission.Microphone);
        my_audiosource = GetComponent<AudioSource>();

        

        if(use_mic){

            try{
                // try to init mic
                selectedDeviceName = Microphone.devices[0].ToString();
                Debug.Log("starting up :" + selectedDeviceName);
                my_audiosource.clip = Microphone.Start(selectedDeviceName, true , 1, 22050);
                   
                //wait for the mic to actually get real mic data!
                while(!(Microphone.GetPosition(null)>0)){}

                my_audiosource.Play();
                
            }
            catch{
                // cound not init mic
                Debug.Log("SORRY DUDE - MIC IS GONE :)");
            }

        }
         doAudioProfile(1f);
    }

   
    void Update()
    {
        
        if( my_audiosource.clip == null ){ return; }

        getSpectrumData();
        makeFrequencyBands();
        operateBandBuffer();
        createMappedBands();
 
    }



   void doAudioProfile(float _v) {

         for (int i=0; i< 8; i++) {

             freq_band_highest[i] = _v;
            smoothed_ticks[i] = .1f;
  }

    Debug.Log("initalized audio profile");
}


    void getSpectrumData(){

        my_audiosource.GetSpectrumData(spectrum, 0, FFTWindow.Blackman);
    }

    void makeFrequencyBands(){


    int count = 0;
    
 

  for (int i=0; i< 8; i++) {

    float average = 0;
    int sampleCount = (int)Mathf.Pow(2, i)*2;

    
    for (int j=0; j<sampleCount; j++) {

      average += spectrum[count]* ( count+1);
      count++;

    }

    average /= count;
    freq_band[i] = average * 8;
  }

 
    }

    void operateBandBuffer(){

         for (int i=0; i<8; i++) {


             smoothed_freq_band[i] = Mathf.Lerp( smoothed_freq_band[i], freq_band[i], .1f);

            if (freq_band[i]>band_buffer[i]) {

                band_buffer[i] =  smoothed_freq_band[i];
                buffer_decr[i] = .015f;   
                }

                if (freq_band[i]<band_buffer[i]) {

                band_buffer[i] -= buffer_decr[i] ;
                buffer_decr[i] *= 1.1f;
                }
        }


    }




    void createMappedBands() {

        for (int i=0; i<8; i++) {

            if (smoothed_freq_band[i] > freq_band_highest[i]) {

                freq_band_highest[i] = smoothed_freq_band[i];
            }  

            

            mapped_band[i] = (smoothed_freq_band[i] /freq_band_highest[i]);
            mapped_buffer[i] = (band_buffer[i] /freq_band_highest[i]);

            smoothed_ticks[i] += mapped_band[i] *.01f;

            // slowly become more sensitive but not too much!
           // if (freq_band_highest[i] >.001) {
            freq_band_highest[i] *= sensitizer;
          //  }
        }
        }


}

This script pipes all valuable data to the shader itself.

using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class fft_to_shader_pipeline : MonoBehaviour
{
   
   public Material shadermat;

    void Start()
    {
        
    }

    // Update is called once per frame
    void Update()
    {
        
        shadermat.SetFloat("_b0", the_audio_visualizer.mapped_band[0]);
        shadermat.SetFloat("_b1", the_audio_visualizer.mapped_band[1]);
        shadermat.SetFloat("_b2", the_audio_visualizer.mapped_band[2]);

        shadermat.SetFloat("_b3", the_audio_visualizer.mapped_band[3]);
        shadermat.SetFloat("_b4", the_audio_visualizer.mapped_band[4]);
        shadermat.SetFloat("_b5", the_audio_visualizer.mapped_band[5]);
        shadermat.SetFloat("_b6", the_audio_visualizer.mapped_band[6]);
        shadermat.SetFloat("_b7", the_audio_visualizer.mapped_band[7]);


         shadermat.SetFloat("_buff0", the_audio_visualizer.freq_band_highest[0]);
         shadermat.SetFloat("_buff1", the_audio_visualizer.freq_band_highest[1]);
         shadermat.SetFloat("_buff2", the_audio_visualizer.freq_band_highest[2]);

         shadermat.SetFloat("_buff3", the_audio_visualizer.freq_band_highest[3]);
         shadermat.SetFloat("_buff4", the_audio_visualizer.freq_band_highest[4]);
         shadermat.SetFloat("_buff5", the_audio_visualizer.freq_band_highest[5]);
         shadermat.SetFloat("_buff6", the_audio_visualizer.freq_band_highest[6]);
         shadermat.SetFloat("_buff7", the_audio_visualizer.freq_band_highest[7]);



            float _lowticks = 
            (the_audio_visualizer.smoothed_ticks[0]
            + the_audio_visualizer.smoothed_ticks[1]
            )*.5f;


             float _midticks = 
            (the_audio_visualizer.smoothed_ticks[3]
            + the_audio_visualizer.smoothed_ticks[4]
             + the_audio_visualizer.smoothed_ticks[5]
            )*.3333f;


              float _hiticks = 
            (the_audio_visualizer.smoothed_ticks[6]
            + the_audio_visualizer.smoothed_ticks[7]
             
            )*.5f;

        shadermat.SetFloat("_lowtick", _lowticks);
         shadermat.SetFloat("_midtick", _midticks);
         shadermat.SetFloat("_hitick", _hiticks);
         

    }
}

basic unlit shader in HLSL to visualize the audio output

This basic unlit shader draws several rings, that react on band input. The three smaller rings visualize a continous band sum, that can be used to control more complex shader graphics lateron.

Shader "trxy/audio_vis_2"
{
    Properties
    {
        _MainTex ("maintex for experiemnts", 2D) = "white" {}

         _b0("b0",float) = 0.
         _b1("b1",float) = 0.
         _b2("b2",float) = 0.
         _b3("b3",float) = 0.
         _b4("b4",float) = 0.
         _b5("b5",float) = 0.
         _b6("b6",float) = 0.
         _b7("b7",float) = 0.


         _buff0("buffermax0",float) = 0.
         _buff1("buffermaxb1",float) = 0.
         _buff2("buffermaxb2",float) = 0.
         _buff3("buffermaxb3",float) = 0.
         _buff4("buffermaxb4",float) = 0.
         _buff5("buffermaxb5",float) = 0.
         _buff6("buffermaxb6",float) = 0.
         _buff7("buffermaxb7",float) = 0.


          _lowtick("lowtick",float) = 0.
           _midtick("lowtick",float) = 0.
          _hitick("hitick",float) = 0.

        
    }
    SubShader
    {
        Tags { "RenderType"="Opaque" }
        LOD 100

        Pass
        {
            CGPROGRAM
            #pragma vertex vert
            #pragma fragment frag
           
            #include "UnityCG.cginc"

            struct appdata
            {
                float4 vertex : POSITION;
                float2 uv : TEXCOORD0;
            };

            struct v2f
            {
                float2 uv : TEXCOORD0;
                float4 vertex : SV_POSITION;
                float4 screenPos : TEXCOORD1;
            };


            sampler2D _MainTex;
            float4 _MainTex_ST;

            float _b0;
            float _b1;
            float _b2;
            float _b3;
            float _b4;
            float _b5;
            float _b6;
            float _b7;

            float _buff0;
            float _buff1;
            float _buff2;
            float _buff3;
            float _buff4;
            float _buff5;
            float _buff6;
            float _buff7;

            float _lowtick;
            float _midtick;
            float _hitick;
            
             

            v2f vert (appdata v)
            {
                v2f o;
                o.vertex = UnityObjectToClipPos(v.vertex);
                o.uv = TRANSFORM_TEX(v.uv, _MainTex); // uv position of texture
                o.screenPos = ComputeScreenPos(o.vertex);  // uv position of screen
                
                return o;
            }

 

            float ringLight(half2 _uv, half2 _pos, float _radius, float _strokeWidth) {
                    float dist = length(_pos - _uv);
                    float dist2 = _strokeWidth / length(dist - _radius);
                    return dist2;
            }


            fixed4 frag (v2f i) : SV_Target
            {
              
                // half2 object_uv = i.uv;

                // widescreen fixed ration 16:9
                 float2 fragCoord = ((i.screenPos.xy) / (i.screenPos.w )) ;
                 half2 uv = fragCoord.xy;
                
                // fix ratio
                uv.y *= 9./16.;
                 uv.y +=.2;
      

                float shade = 0.;

                // stroke width ----------------
                float br = .14;
                float ba = .002; // amplitude
               
               // ring radius ---------------
                float brr = .08; // range radius amplitude
                 float brrr = .0002; // base radius

                 shade += ringLight(uv, half2( .15,.5),  _buff0*brr +br , _b0 * ba + brrr);
                 shade += ringLight(uv, half2( .25,.5),  _buff1*brr +br , _b1 * ba + brrr);
                 shade += ringLight(uv, half2( .35,.5),  _buff2*brr +br , _b2 * ba + brrr);
                 shade += ringLight(uv, half2( .45,.5),  _buff3*brr +br , _b3 * ba + brrr);
                 shade += ringLight(uv, half2( .55,.5),  _buff4*brr +br , _b4 * ba + brrr);
                 shade += ringLight(uv, half2( .65,.5),  _buff5*brr +br , _b5 * ba + brrr);
                 shade += ringLight(uv, half2( .75,.5),  _buff6*brr +br , _b6 * ba + brrr);
                 shade += ringLight(uv, half2( .85,.5),  _buff7*brr +br , _b7 * ba + brrr);

     
                

                float lowpulse = (sin(_lowtick*22.) )*.5 +.1;
                 float midpulse = (sin(_midtick*22.) )*.5 +.1;
                 float hipulse = (sin(_hitick*22.) )*.5 +.1;

                shade += ringLight(uv, half2( .15,lowpulse*.02 + .5),  .04, .004);
                 shade += ringLight(uv, half2( .5,midpulse*.02 + .5),  .04, .004);
                  shade += ringLight(uv, half2( .85,hipulse*.02 + .5),  .04, .004);
               
               

   
                 shade = smoothstep(.6, .4 , shade );            

                half4 col = half4( shade, shade ,shade ,1.);


                /*
                 col.x +=  ((sin(_lowtick*22. + uv.y*12.) +1. )*.2);

                 col.y +=  ((sin(_midtick*22. + uv.y*62. )+1.) *.2);

                 col.z +=  ((sin(_hitick*22. + uv.y*192. )+1.) *.2);
*/
                half4 col_debug = half4( fragCoord.x, fragCoord.y,0,0  );
                
                return col ;
            }
            ENDCG
        }
    }
}

For convenient reasons, you can use this package for unity ( just import in empty project ) to have a very basic setup to play with.
https://zukunft.burg-halle.de/downloads/live_audio_to_fft_to_shader.unitypackage