-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathagSegmentSf.py
executable file
·75 lines (62 loc) · 5.79 KB
/
agSegmentSf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python3
############################################################################
## This software is distributed for free, without warranties of any kind. ##
## Send bug reports or suggestions to [email protected] ##
############################################################################
import sys, os, audioguide
from optparse import OptionParser
parser = OptionParser(usage="usage: %prog [option flags] soundfile")
parser.set_defaults(TRIGGER_THRESHOLD=-40)
parser.set_defaults(RISERATIO=1.3)
parser.set_defaults(MULTIRISE=False)
parser.set_defaults(MINIMUM_DB_OFFSET_BOOST=+12)
parser.set_defaults(DB_OFFSET_ABSOLUTE=-80)
parser.set_defaults(MINIMUM_SEG=0.1)
parser.set_defaults(MAXIMUM_SEG=1000)
parser.set_defaults(SEGMENTATION_INFO='logic')
parser.set_defaults(OUTPUT_FILE='')
parser.add_option("-t", "--triggerthreshold", action="store", dest="TRIGGER_THRESHOLD", type="float", help="set the threshold for detecting segment onsets. a value from -100 to 0 where a lower value make onsets happen more frequently. default=-40")
parser.add_option("-r", "--riseratio", action="store", dest="RISERATIO", type="float", help="set the rise-ratio used in determining segment offsets. the rise-ratio is the ratio of a frame's amplitude to the next frame's amplitude. in an active segment, if this ratio is greater than user-supplied rise-ratio, it will cause an offset. it must be greater than 1. default=1.3")
parser.add_option("-m", "--multirise", action="store_true", dest="MULTIRISE", help="If True, corpus soundfile segmentation will be done in several passes where the rise ratio will vary from -20% to +20% of the user supplied riseratio. This results in more segments which will likely overlap each other at times. A good optipon to use for long corpus files.")
parser.add_option("-d", "--minoffsetdbboost", action="store", dest="MINIMUM_DB_OFFSET_BOOST", type="float", help="set the minimum-db-offset-boost. this value in dB is added to the soundfile's minimum amplitude. in an active segment, if a frame's amplitude is below this threshold, it causes a segment offset. default=+12")
parser.add_option("-a", "--offsetabsolute", action="store", dest="DB_OFFSET_ABSOLUTE", type="float", help="set the minimum-db-offset-boost. this value in dB is added to the soundfile's minimum amplitude. in an active segment, if a frame's amplitude is below this threshold, it causes a segment offset. default=-80")
parser.add_option("-s", "--minimum", action="store", dest="MINIMUM_SEG", type="float", help="set the minimum segment length in seconds. segments will be forced to be this duration in seconds or greater. default=0.1")
parser.add_option("-l", "--maximum", action="store", dest="MAXIMUM_SEG", type="float", help="set the maximum segment length in seconds. segments will be forced to be this duration in seconds or shorter. default=1000")
parser.add_option("-f", "--file", action="store", dest="OUTPUT_FILE", type="str", help="set the name of the output file. by default it is the soundfile name + '.txt'")
parser.add_option("-p", "--plot", action="store_true", dest="PLOT_OUTPUT", default=False, help="If added, this flag will create a plot of this file's segmentation saved to the file name plus '.jpg'")
parser.add_option("-i", "--info", action="store", dest="SEGMENTATION_INFO", default='logic', help="If logic, will add some info to the segmentation file about segmentation. If a segmented descriptor name, will add that for each segment")
(options, args) = parser.parse_args()
from audioguide.userclasses import TargetOptionsEntry as tsf
from audioguide.userclasses import SingleDescriptor as d
createdSegFiles = 0
for file in args:
# test if its an audio file
file = os.path.abspath(file)
if not audioguide.util.verifyPathIsValidSoundfile(file): continue
if options.PLOT_OUTPUT: plotMe = file+'.jpg'
else: plotMe = None
segopsdict = {
'TARGET': eval("tsf('%s', thresh=%f, offsetRise=%f, offsetThreshAdd=%f, offsetThreshAbs=%f, minSegLen=%f, maxSegLen=%f, multiriseBool=%s)"%(file, options.TRIGGER_THRESHOLD, options.RISERATIO, options.MINIMUM_DB_OFFSET_BOOST, options.DB_OFFSET_ABSOLUTE, options.MINIMUM_SEG, options.MAXIMUM_SEG, options.MULTIRISE)),
'VERBOSITY': 1,
'TARGET_SEGMENT_LABELS_INFO': options.SEGMENTATION_INFO,
}
ag = audioguide.main()
ag.parse_options_dict(segopsdict)
ag.initialize_analysis_interface()
ag.load_target()
minamp = audioguide.util.ampToDb(min(ag.tgt.whole.desc.get('power')))
#ag.p.pprint("Evaluating %s from %.2f-%.2f"%(ag.tgt.filename, ag.tgt.whole.segmentStartSec, ag.tgt.whole.segmentEndSec), colour="BOLD")
ag.p.pprint("\nAN ONSET HAPPENS when", colour="BOLD")
ag.p.pprint("The amplitude crosses the Relative Onset Trigger Threshold: ${YELLOW}"+"%.2f"%options.TRIGGER_THRESHOLD+" ${NORMAL}(-t option)\n", colour="NORMAL")
ag.p.pprint("\nAN OFFSET HAPPENS when", colour="BOLD")
ag.p.pprint("1. Offset Rise Ratio: when next-frame's-amplitude/this-frame's-amplitude >= ${YELLOW}"+"%.2f"%options.RISERATIO+" ${NORMAL}(-r option)", colour="NORMAL")
ag.p.pprint("\t...or...", colour="BOLD")
ag.p.pprint("2. Offset dB above minimum: when this frame's absolute amplitude <= "+"%.2f"%audioguide.util.ampToDb(ag.tgt.powerOffsetValue)+" (minimum found amplitude of "+"%.2f"%minamp+" plus the offset dB boost of ${YELLOW}"+"%.2f"%options.MINIMUM_DB_OFFSET_BOOST+"${NORMAL} (-d option))\n", colour="NORMAL")
if options.OUTPUT_FILE == '': segFile = file+'.txt'
else: segFile = os.path.abspath(options.OUTPUT_FILE)
ag.tgt.writeSegmentationFile(segFile)
ag.p.pprint("\nFound ${RED}"+str(len(ag.tgt.segs))+"${NORMAL} segments", colour="NORMAL")
print( "Wrote file %s\n"%(segFile) )
createdSegFiles += 1
if (createdSegFiles == 0):
print("\n\nError: I didn't create any segmentation textfiles because you didn't give me any soundfiles in your input arguments! You wrote: %s\n\n"%(' '.join(sys.argv)))