Hello everyone,

for my thesis I want to extract some voice features from audio data recorded during psychotherapy sessions. For this I am using the openSMILE toolkit. For the fundamental frequency and jitter I already get good results, but the extraction of center frequencies and bandwidths of the formants 1-3 is puzzling me. For some reason there appears to be just one formant (the first one) with a frequency range up to 6kHz. Formants 2 and 3 are getting values of 0. I expected the formants to be within a range of 500 to 2000 Hz.

I tried to fix the problem myself but could not find the issue here. Does anybody have experience with openSMILE, especially formant extraction, and could help me out?

For testing purposes I am using various audio files recorded by myself or extracted from youtube. My config file looks like this:

///////////////////////////////////////////////////////////////////////////

// openSMILE configuration template file generated by SMILExtract binary //

///////////////////////////////////////////////////////////////////////////

[componentInstances:cComponentManager]

instance[dataMemory].type = cDataMemory

instance[waveSource].type = cWaveSource

instance[framer].type = cFramer

instance[vectorPreemphasis].type = cVectorPreemphasis

instance[windower].type = cWindower

instance[transformFFT].type = cTransformFFT

instance[fFTmagphase].type = cFFTmagphase

instance[melspec].type = cMelspec

instance[mfcc].type = cMfcc

instance[acf].type = cAcf

instance[cepstrum].type = cAcf

instance[pitchAcf].type = cPitchACF

instance[lpc].type = cLpc

instance[formantLpc].type = cFormantLpc

instance[formantSmoother].type = cFormantSmoother

instance[pitchJitter].type = cPitchJitter

instance[lld].type = cContourSmoother

instance[deltaRegression1].type = cDeltaRegression

instance[deltaRegression2].type = cDeltaRegression

instance[functionals].type = cFunctionals

instance[arffSink].type = cArffSink

printLevelStats = 1

nThreads = 1

[waveSource:cWaveSource]

writer.dmLevel = wave

basePeriod = -1

filename = \cm[inputfile(I):name of input file]

monoMixdown = 1

[framer:cFramer]

reader.dmLevel = wave

writer.dmLevel = frames

copyInputName = 1

frameMode = fixed

frameSize = 0.0250

frameStep = 0.010

frameCenterSpecial = center

noPostEOIprocessing = 1

buffersize = 1000

[vectorPreemphasis:cVectorPreemphasis]

reader.dmLevel = frames

writer.dmLevel = framespe

k = 0.97

de = 0

[windower:cWindower]

reader.dmLevel=framespe

writer.dmLevel=winframe

copyInputName = 1

processArrayFields = 1

winFunc = ham

gain = 1.0

offset = 0

[transformFFT:cTransformFFT]

reader.dmLevel = winframe

writer.dmLevel = fftc

copyInputName = 1

processArrayFields = 1

inverse = 0

zeroPadSymmetric = 0

[fFTmagphase:cFFTmagphase]

reader.dmLevel = fftc

writer.dmLevel = fftmag

copyInputName = 1

processArrayFields = 1

inverse = 0

magnitude = 1

phase = 0

[melspec:cMelspec]

reader.dmLevel = fftmag

writer.dmLevel = mspec

nameAppend = melspec

copyInputName = 1

processArrayFields = 1

htkcompatible = 1

usePower = 0

nBands = 26

lofreq = 0

hifreq = 8000

usePower = 0

inverse = 0

specScale = mel

[mfcc:cMfcc]

reader.dmLevel=mspec

writer.dmLevel=mfcc1

copyInputName = 0

processArrayFields = 1

firstMfcc = 0

lastMfcc = 12

cepLifter = 22.0

htkcompatible = 1

[acf:cAcf]

reader.dmLevel=fftmag

writer.dmLevel=acf

nameAppend = acf

copyInputName = 1

processArrayFields = 1

usePower = 1

cepstrum = 0

acfCepsNormOutput = 0

[cepstrum:cAcf]

reader.dmLevel=fftmag

writer.dmLevel=cepstrum

nameAppend = acf

copyInputName = 1

processArrayFields = 1

usePower = 1

cepstrum = 1

acfCepsNormOutput = 0

oldCompatCepstrum = 1

absCepstrum = 1

[pitchAcf:cPitchACF]

reader.dmLevel=acf;cepstrum

writer.dmLevel=pitchACF

copyInputName = 1

processArrayFields = 0

maxPitch = 500

voiceProb = 0

voiceQual = 0

HNRdB = 0

F0 = 1

F0raw = 0

F0env = 1

voicingCutoff = 0.550000

[lpc:cLpc]

reader.dmLevel = fftc

writer.dmLevel = lpc1

method = acf

p = 8

saveLPCoeff = 1

lpGain = 0

saveRefCoeff = 0

residual = 0

forwardFilter = 0

lpSpectrum = 0

[formantLpc:cFormantLpc]

reader.dmLevel = lpc1

writer.dmLevel = formants

copyInputName = 1

nFormants = 3

saveFormants = 1

saveIntensity = 0

saveNumberOfValidFormants = 1

saveBandwidths = 1

minF = 400

maxF = 6000

[formantSmoother:cFormantSmoother]

reader.dmLevel = formants;pitchACF

writer.dmLevel = forsmoo

copyInputName = 1

medianFilter0 = 0

postSmoothing = 0

postSmoothingMethod = simple

F0field = F0

formantBandwidthField = formantBand

formantFreqField = formantFreq

formantFrameIntensField = formantFrameIntens

intensity = 0

nFormants = 3

formants = 1

bandwidths = 1

saveEnvs = 0

no0f0 = 0

[pitchJitter:cPitchJitter]

reader.dmLevel = wave

writer.dmLevel = jitter

writer.levelconf.nT = 1000

copyInputName = 1

F0reader.dmLevel = pitchACF

F0field = F0

searchRangeRel = 0.250000

jitterLocal = 1

jitterDDP = 1

jitterLocalEnv = 0

jitterDDPEnv = 0

shimmerLocal = 0

shimmerLocalEnv = 0

onlyVoiced = 0

inputMaxDelaySec = 2.0

[lld:cContourSmoother]

reader.dmLevel=mfcc1;pitchACF;forsmoo;jitter

writer.dmLevel=lld1

writer.levelconf.nT=10

writer.levelconf.isRb=0

writer.levelconf.growDyn=1

nameAppend = sma

copyInputName = 1

noPostEOIprocessing = 0

smaWin = 3

[deltaRegression1:cDeltaRegression]

reader.dmLevel=lld1

writer.dmLevel=lld_de

writer.levelconf.isRb=0

writer.levelconf.growDyn=1

nameAppend = de

copyInputName = 1

noPostEOIprocessing = 0

deltawin=2

blocksize=1

[deltaRegression2:cDeltaRegression]

reader.dmLevel=lld_de

writer.dmLevel=lld_dede

writer.levelconf.isRb=0

writer.levelconf.growDyn=1

nameAppend = de

copyInputName = 1

noPostEOIprocessing = 0

deltawin=2

blocksize=1

[functionals:cFunctionals]

reader.dmLevel = lld1;lld_de;lld_dede

writer.dmLevel = statist

copyInputName = 1

frameMode = full

// frameListFile =

// frameList =

frameSize = 0

frameStep = 0

frameCenterSpecial = left

noPostEOIprocessing = 0

functionalsEnabled=Extremes;Moments;Means

Extremes.max = 1

Extremes.min = 1

Extremes.range = 1

Extremes.maxpos = 0

Extremes.minpos = 0

Extremes.amean = 0

Extremes.maxameandist = 0

Extremes.minameandist = 0

Extremes.norm = frame

Moments.doRatioLimit = 0

Moments.variance = 1

Moments.stddev = 1

Moments.skewness = 0

Moments.kurtosis = 0

Moments.amean = 0

Means.amean = 1

Means.absmean = 1

Means.qmean = 0

Means.nzamean = 1

Means.nzabsmean = 1

Means.nzqmean = 0

Means.nzgmean = 0

Means.nnz = 0

[arffSink:cArffSink]

reader.dmLevel = statist

filename = \cm[outputfile(O):name of output file]

append = 0

relation = smile

instanceName = \cm[inputfile]

number = 0

timestamp = 0

frameIndex = 1

frameTime = 1

frameTimeAdd = 0

frameLength = 0

// class[] =

printDefaultClassDummyAttribute = 0

// target[] =

// ################### END OF openSMILE CONFIG FILE ######################

More Edwin Wald's questions See All
Similar questions and discussions