//------------------------------------------------------------------------------ // name: mosiac-extract.ck (v1.3) // desc: perform feature extraction over an audio file // // version: need chuck version 1.4.2.1 or higher // sorting: part of ChAI (ChucK for AI) // // USAGE: this is purely for feature extraction on training data, so... // run chuck in non-real-time mode (this actually can be much // faster than real-time mode, since it doesn't synch to audio): // // extract from INPUT and print // > chuck --silent mosiac-extract.ck:INPUT // // extract from INPUT audio file and write to OUTPUT model file // > chuck --silent mosiac-extract:INPUT:OUTPUT // // INPUT: an audio file to be extracted (.wav) // a text file listing audio file to be extracted (.txt) // OUTPUT: a model file containing feature vectors // ------------------------------------------------------------ // output model file format; each VALUE is a feature value // (feel free to adapt and modify the file format as needed) // ------------------------------------------------------------ // filePath windowStartTime VALUE VALUE ... VALUE // filePath windowStartTime VALUE VALUE ... VALUE // ... // ------------------------------------------------------------ // // date: Spring 2023 // authors: Ge Wang (https://ccrma.stanford.edu/~ge/) // Yikai Li //------------------------------------------------------------------------------ // input audio file "" => string INPUT; // output file (if empty, will print to console) "" => string OUTPUT_FILE; // get from arguments if( me.args() > 0 ) me.arg(0) => INPUT; // get from arguments if( me.args() > 1 ) me.arg(1) => OUTPUT_FILE; // print usage if( me.args() == 0 ) { <<< "usage: chuck --silent mosaic-extract.ck:INPUT:OUTPUT", "" >>>; <<< " |- INPUT: audio file (.wav), or text file (.txt) listing audio files", "" >>>; <<< " |- OUTPUT: model file (.txt) to contain extracted feature vectors", "" >>>; me.exit(); } // detect; print helpful message if( Machine.silent() == false ) { <<< "-----------------", "" >>>; <<< "[mosaic-extract]: chuck is currently running in REAL-TIME mode;", "" >>>; <<< "[mosaic-extract]: this step has no audio output; may run faster in SILENT mode!", "" >>>; <<< "[mosaic-extract]: to run in SILENT mode, restart chuck with --silent flag", "" >>>; <<< "-----------------", "" >>>; } //------------------------------------------------------------------------------ // analysis network -- this determines which feature will be extracted // NOTE: see examples/ai/features for examples of different features //------------------------------------------------------------------------------ // audio input into a FFT SndBuf audioFile => FFT fft; // a thing for collecting multiple features into one vector FeatureCollector combo => blackhole; // add spectral feature: Centroid fft =^ Centroid centroid =^ combo; // add spectral feature: Flux fft =^ Flux flux =^ combo; // add spectral feature: RMS fft =^ RMS rms =^ combo; // add spectral feature: MFCC fft =^ MFCC mfcc =^ combo; //------------------------------------------------------------------------------ // analysis parameters -- useful for tuning your extraction //------------------------------------------------------------------------------ // set number of coefficients in MFCC (how many we get out) 20 => mfcc.numCoeffs; // set number of mel filters in MFCC (internal to MFCC) 10 => mfcc.numFilters; // do one .upchuck() so FeatureCollector knows how many total dimension combo.upchuck(); // get number of total feature dimensions combo.fvals().size() => int NUM_DIMENSIONS; // set FFT size 4096 => fft.size; // set window type and size Windowing.hann(fft.size()) => fft.window; // our hop size (how often to perform analysis) (fft.size()/2)::samp => dur HOP; // how many frames to aggregate before averaging? 4 => int NUM_FRAMES; //------------------------------------------------------------------------------ // OUTPUT: prepare for output //------------------------------------------------------------------------------ // a feature frame float featureFrame[NUM_DIMENSIONS]; // how many input files 1 => int NUM_FILES; // output reference, default is error stream (cherr) cherr @=> IO @ theOut; // instantiate FileIO fout; // output file if( OUTPUT_FILE != "" ) { // print <<< "opening file for output:", OUTPUT_FILE >>>; // open fout.open( OUTPUT_FILE, FileIO.WRITE ); // test if( !fout.good() ) { <<< " |- cannot open file for writing...", "" >>>; me.exit(); } // override fout @=> theOut; } //------------------------------------------------------------------------------ // INPUT: prepare for iterating over input data and extract features //------------------------------------------------------------------------------ // array input audio files string filenames[0]; // parse INPUT, which may be an audio file (.wav) or a list of filenames (.txt) if( !parseInput( INPUT, filenames ) ) me.exit(); // loop over filenname for( int i; i < filenames.size(); i++) { // extract the file if( !extractTrajectory( me.dir()+filenames[i], filenames[i], i, theOut ) ) { // issue warning cherr <= "[mosaic-extract]: problem extracting (and skipping): " <= filenames[i] <= IO.newline(); // skip continue; } } // flush output theOut.flush(); // close theOut.close(); //------------------------------------------------------------------------------ // extractTrajectory() -- extracts and outputs feature vectors from a single file //------------------------------------------------------------------------------ fun int extractTrajectory( string inputFilePath, string shortName, int fileIndex, IO out ) { // increment NUM_FILES++; // log cherr <= "[" <= NUM_FILES <= "] extracting features: " <= inputFilePath <= IO.newline(); // load by block to speed up IO fft.size() => audioFile.chunks; // read the audio file inputFilePath => audioFile.read; // file position (in seconds) int pos; // frame index int index; while( audioFile.pos() < audioFile.samples() ) { // remember the starting pos of each vector audioFile.pos() => int pos; // let one FFT-size of time pass (to buffer) fft.size()::samp => now; // zero out featureFrame.zero(); // loop over frames for( int i; i < NUM_FRAMES; i++ ) { //------------------------------------------------------------- // a single upchuck() will trigger analysis on everything // connected upstream from combo via the upchuck operator (=^) // the total number of output dimensions is the sum of // dimensions of all the connected unit analyzers //------------------------------------------------------------- combo.upchuck(); // for each dimension for( int d; d < NUM_DIMENSIONS; d++ ) { // copy combo.fval(d) +=> featureFrame[d]; } // advance time HOP => now; } // print label name and endline out <= shortName <= " " <= (pos::samp)/second <= " "; //------------------------------------------------------------- // average into a single feature vector per file // NOTE: this can be easily modified to N feature vectors // spread out over the length of an audio file; for now // we will just do one feature vector per file //------------------------------------------------------------- for( int d; d < NUM_DIMENSIONS; d++ ) { // average by total number of frames NUM_FRAMES /=> featureFrame[d]; // print the MFCC results out <= featureFrame[d] <= " "; } out <= IO.newline(); // print . if( out != cherr ) { cherr <= "."; cherr.flush(); } // increment index index++; } // print newline to screen if( out != cherr ) cherr <= IO.newline(); // done return true; } //------------------------------------------------------------------------------ // parse INPUT argument -- either single audio file or a text file containing a list //------------------------------------------------------------------------------ fun int parseInput( string input, string results[] ) { // clear results results.clear(); // see if input is a file name if( input.rfind( ".wav" ) > 0 || input.rfind( ".aiff" ) > 0 ) { // make new string (since << current appends by reference) input => string sss; // append results << sss; } else { // load data FileIO fio; if( !fio.open( me.dir() + input, FileIO.READ ) ) { // error <<< "cannot open file:", me.dir() + input >>>; // close fio.close(); // return done return false; } // read each filename while( fio.more() ) { // read each line fio.readLine().trim() => string line; // if not empty if( line != "" ) { results << line; } } } return true; }