Edinburgh Speech Tools  2.4-release
ch_wave_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : April 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Change EST_Wave utility main */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <iostream>
41 #include <cmath>
42 #include "EST_Wave.h"
43 #include "EST_cmd_line.h"
44 #include "EST_cmd_line_options.h"
45 #include "EST_sigpr.h"
46 #include "EST_wave_aux.h"
47 #include "EST.h"
48 
49 #define sgn(x) (x>0?1:x?-1:0)
50 
51 void wave_extract_channel(EST_Wave &single, const EST_Wave &multi, EST_IList &ch_list);
52 
53 
54 void extract_channels(EST_Wave &single, const EST_Wave &multi, EST_IList &ch_list);
55 
56 /** @name <command>ch_wave</command> <emphasis>Audio file manipulation</emphasis>
57  @id ch_wave_manual
58  * @toc
59  */
60 
61 //@{
62 
63 
64 /**@name Synopsis
65  */
66 //@{
67 
68 //@synopsis
69 
70 /**
71 ch_wave is used to manipulate the format of a waveform
72 file. Operations include:
73 
74 <itemizedlist>
75 <listitem><para>file format conversion</para></listitem>
76 <listitem><para>resampling (changing the sampling frequency)</para></listitem>
77 <listitem><para>byte-swapping</para></listitem>
78 <listitem><para>making multiple input files into a single multi-channel output file</para></listitem>
79 <listitem><para>making multiple input files into a single single-channel output file</para></listitem>
80 <listitem><para>extracting a single channel from a multi-channel waveform</para></listitem>
81 <listitem><para>scaling the amplitude of the waveform</para></listitem>
82 <listitem><para>low pass and high pass filtering</para></listitem>
83 <listitem><para>extracting a time-delimited portion of the waveform</para></listitem>
84 </itemizedlist>
85 
86 ch_wave is a executable program that serves as a wrap-around for the
87 EST_Wave class and the basic wave manipulation functions. More
88 advanced waveform processing is performed by the signal processing library.
89 
90 */
91 
92 //@}
93 
94 /**@name OPTIONS
95  */
96 //@{
97 
98 //@options
99 
100 //@}
101 
102 
103 int main (int argc, char *argv[])
104 {
105  EST_Wave sig, sigload;
106  EST_String in_file("-"), out_file("-"), op_file(""), test;
107  EST_Option al;
108  EST_StrList files;
109  EST_Litem *p;
110 
111 
112  parse_command_line
113  (argc, argv,
114  EST_String("[input file0] [input file1] ... -o [output file]\n")+
115  "Summary: change/copy/combine waveform files\n"+
116  "use \"-\" to make input and output files stdin/out\n"+
117  "-h Options help\n\n"+
118  options_wave_input()+
119  options_wave_output()+
120  "-scale <float> Scaling factor. Increase or descrease the amplitude\n"
121  " of the whole waveform by the factor given\n\n"
122 
123  "-scaleN <float> Scaling factor with normalization. \n"
124  " The waveform is scaled to its maximum level, after which \n"
125  " it is scaled by the factor given\n\n"
126 
127  "-lpfilter <int> Low pass filter, with cutoff frequency in Hz \n"
128  " Filtering is performed by a FIR filter which is built at run \n"
129  " time. The order of the filter can be given by -forder. The \n"
130  " default value is 199\n\n"
131 
132  "-hpfilter <int> High pass filter, with cutoff frequency in Hz \n"
133  " Filtering is performed by a FIR filter which is \n"
134  " built at run time. The order of the filter can \n"
135  " be given by -forder. The default value is 199.\n\n"
136 
137  "-forder <int> Order of FIR filter used for lpfilter and \n"
138  " hpfilter. This must be ODD. Sensible values range \n"+
139  " from 19 (quick but with a shallow rolloff) to 199 \n"
140  " (slow but with a steep rolloff). The default is 199.\n\n"
141 
142  "-fafter Do filtering after other operations such as \n"
143  " resampling (default : filter before other operations)\n\n"
144 
145  "-info Print information about file and header. \n"
146  " This option gives useful information such as file \n"
147  " length, sampling rate, number of channels etc\n"
148  " No output is produced\n\n"
149 
150  "-add A new single channel waveform is created by adding \n"
151  " the corresponding sample points of each input waveform\n\n"
152 
153  "-pc <string> Combine input waveforms to form a single \n"
154  " multichannel waveform. The argument to this option controls \n"
155  " how long the new waveform should be. If the option \n"
156  " is LONGEST, the output wave if the length of the \n"
157  " longest input wave and shorter waves are padded with \n"
158  " zeros at the end. If the option is FIRST, the length \n"
159  " of the new waveform is the length of the first file \n"
160  " on the command line, and subsequent waves are padded \n"
161  " or cut to this length\n\n"
162 
163  "-key <ifile> Label file designating subsections, for use with \n"
164  " -divide. The KEYLAB file is a label file which specifies \n"
165  " where chunks (such as individual sentences) in \n"
166  " a waveform begin and end. See section of wave extraction.\n\n"
167 
168  "-divide Divide a single input waveform into multiple output \n"
169  " waveforms. Each output waveform is extracted from the \n"
170  " input waveform by using the KEYLAB file, which \n"
171  " specifies the start and stop times for each chunk. \n"
172  " The output files are named according to the filename \n"
173  " in the KEYLAB file, with extension given by -ext. See \n"
174  " section on wave extraction\n\n"
175 
176  "-ext <string> File extension for divided waveforms\n\n"
177 
178  "-compress <float> Apply Dynamic Range Compression by factor specified \n"
179 
180  "-extract <string> Used in conjunction with -key to extract a \n"
181  " single section of waveform from the input \n"
182  " waveform. The argument is the name of a file given \n"
183  " in the file column of the KEYLAB file.\n",
184  files, al);
185 
186  out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
187 
188  // There will always be at least one (or stdin)
189  // The first is dealt specially in case its *way* big
190  if (read_wave(sig, files.first(), al) != format_ok)
191  exit(-1);
192  if (al.present("-info"))
193  wave_info(sig);
194  // concat or parallelize remaining input files
195 
196  if (files.length() > 1)
197  {
198  for (p= files.head()->next(); p != 0; p=p->next())
199  {
200  if (read_wave(sigload, files(p), al) != format_ok)
201  exit(-1);
202  if (al.present("-info"))
203  wave_info(sigload);
204  else if (al.present("-pc"))
205  {
206  if ((downcase(al.val("-pc")) == "longest") &&
207  (sig.num_samples() < sigload.num_samples()))
208  sig.resize(sigload.num_samples());
209  else /* "first" or sig is longer */
210  sigload.resize(sig.num_samples());
211  sig |= sigload;
212  }
213  else if (al.present("-add"))
214  add_waves(sig, sigload);
215  else
216  sig += sigload;
217  }
218  }
219 
220  if (al.present("-info"))
221  exit(0); // done what I've been asked to so stop
222 
223  // All input files are now in a single wave called sig
224 
225  // default is to filter before any resampling etc.
226  // (this may cause problems for multiplexed data !)
227  if(!al.present("-fafter")){
228  if(al.present("-lpfilter"))
229  FIRlowpass_filter(sig,al.ival("-lpfilter"),al.ival("-forder"));
230  if(al.present("-hpfilter"))
231  FIRhighpass_filter(sig,al.ival("-hpfilter"),al.ival("-forder"));
232  }
233 
234  if (al.present("-c")) // extract a channel from a multi-channel wave
235  {
236  EST_StrList s;
237  EST_IList il;
238  EST_Wave nsig;
239  StringtoStrList(al.val("-c"), s, " ,"); // separator can be space or comma
240  StrListtoIList(s, il);
241  extract_channels(nsig, sig, il);
242  sig = nsig;
243  }
244 
245  if (al.present("-F")) // resample
246  sig.resample(al.ival("-F"));
247 
248  if (al.present("-compress")) // Dynamic Range Compression
249  {
250  float mu = al.fval("-compress" , 0);
251  float lim = 30000.0;
252 
253  sig.compress(mu, lim);
254  }
255 
256  if (al.present("-scale")) // rescale
257  {
258  float scale = al.fval("-scale", 0);
259  sig.rescale(scale);
260  }
261  if (al.present("-scaleN")) // rescale
262  {
263  float scale = al.fval("-scaleN", 0);
264  if ((scale < 0) || (scale > 1.0))
265  {
266  cerr << "ch_wave: -scaleN must be in range 0 to 1" << endl;
267  exit(-1);
268  }
269  sig.rescale(scale,1);
270  }
271 
272  EST_Relation key;
273 
274  if (al.present("-divide"))
275  {
276  EST_WaveList wl;
277  if (!al.present("-key"))
278  {
279  cerr << "Must have key file specified when dividing waveform\n";
280  exit (-1);
281  }
282  if (key.load(al.val("-key")) != format_ok)
283  exit(-1);
284 
285  if (wave_divide(wl, sig, key, al.val("-ext", 0)) == -1)
286  exit(0);
287  for (p = wl.head(); p; p = p->next())
288  wl(p).save(wl(p).name(), al.val("-otype", 0));
289  exit(0);
290  }
291  else if (al.present("-extract"))
292  {
293  EST_Wave e;
294  if (!al.present("-key"))
295  {
296  cerr << "Must have key file specified when dividing waveform\n";
297  exit (-1);
298  }
299  if (key.load(al.val("-key")) != format_ok)
300  exit(-1);
301 
302  if (wave_extract(e, sig, key, al.val("-extract")) == -1)
303  exit (-1);
304  sig = e;
305  }
306 
307  // if we are filtering after other operations
308  if(al.present("-fafter")){
309  if(al.present("-lpfilter"))
310  FIRlowpass_filter(sig,al.ival("-lpfilter"),al.ival("-forder"));
311  if(al.present("-hpfilter"))
312  FIRhighpass_filter(sig,al.ival("-hpfilter"),al.ival("-forder"));
313  }
314 
315  write_wave(sig, out_file, al);
316  return 0;
317 }
318 
319 /** @name Making multiple waves into a single wave
320 
321 If multiple input files are specified, by default they are concatenated into
322 the output file.
323 </para>
324 <para>
325 <screen>
326 $ ch_wave kdt_010.wav kdt_011.wav kdt_012.wav kdt_013.wav -o out.wav
327 </screen>
328 </para>
329 <para>
330 In the above example, 4 single channel input files are converted to
331 one single channel output file. Multi-channel waveforms can also be
332 concatenated provided they all have the same number of input channels.
333 
334 </para><para>
335 
336 Multiple input files can be made into a multi-channel output file by
337 using the -pc option:
338 
339 </para><para>
340 <screen>
341 $ ch_wave kdt_010.wav kdt_011.wav kdt_012.wav kdt_013.wav -o -pc LONGEST out.wav
342 </screen>
343 </para>
344 <para>
345 The argument to -pc can either be LONGEST, in which the output
346 waveform is the length of the longest input file, or FIRST in which it
347 is the length of the first input file.
348 
349 */
350 
351 //@{
352 //@}
353 
354 /** @name Extracting channels from multi-channel waves
355 
356 The -c option is used to specify channels which should be extracted
357 from the input. If the input is a 4 channel wave,
358 </para><para>
359 <screen>
360 $ ch_wave kdt_m.wav -o a.wav -c "0 2"
361 </screen>
362 </para>
363 <para>
364 will extract the 0th and 2nd channel (counting starts from 0). The
365 argument to -c can be either a single number of a list of numbers
366 (wrapped in quotes)
367 
368  */
369 //@{
370 //@}
371 
372 
373 /** @name Extracting of a single region from a waveform
374 
375 There are several ways of extracting a region of a waveform. The
376 simplest way is by using the start, end, to and from commands to
377 delimit a sub portion of the input wave. For example
378 </para><para>
379 <screen>
380 $ ch_wave kdt_010.wav -o small.wav -start 1.45 -end 1.768
381 </screen>
382 </para>
383 <para>
384 extracts a subwave starting at 1.45 seconds and extending to 1.768 seconds.
385 
386 alternatively,
387 </para><para>
388 <screen>
389 $ ch_wave kd_010.wav -o small.wav -from 5000 -to 10000
390 </screen>
391 </para>
392 <para>
393 extracts a subwave starting at 5000 samples and extending to 10000
394 samples. Times and samples can be mixed in sub-wave extraction. The
395 output waveform will have the same number of channels as the input
396 waveform.
397 
398 */
399 //@{
400 //@}
401 
402 /** @name Extracting of a multiple regions from a waveform
403 
404 Multiple regions can be extracted from a waveform, but as it would be
405 too complicated to specify the start and end points on the command
406 line, a label file with start and end points, and file names is used.
407 
408 The file is called a key label file and in xwaves label format looks
409 like:
410 </para>
411 <para>
412 <screen>
413 separator ;
414 #
415 0.308272 121 sil ; file kdt_010.01 ;
416 0.440021 121 are ; file kdt_010.02 ;
417 0.512930 121 your ; file kdt_010.03 ;
418 0.784097 121 grades ; file kdt_010.04 ;
419 1.140969 121 higher ; file kdt_010.05 ;
420 1.258647 121 or ; file kdt_010.06 ;
421 1.577145 121 lower ; file kdt_010.07 ;
422 1.725516 121 than ; file kdt_010.08 ;
423 2.315186 121 nancy's ; file kdt_010.09 ;
424 </screen>
425 </para>
426 <para>
427 Each line represents one region. The first column is the end time of
428 that region and the start time of the next. The next two columns are
429 colour and an arbitrary name, and the filename in which the output
430 waveform is to be stored is kept as a field called file in the last column.
431 In this example, each region corresponds to a single word in the file.
432 
433 If the above file is called "kdt_010.words.keylab", the command:
434 </para>
435 <para>
436 <screen>
437 $ ch_wave kdt_010.wav -key kdt_010.words -ext .wav -divide
438 </screen>
439 </para>
440 <para>
441 will divide the input waveform into 9 output waveforms called
442 kdt_010.01.wav, kdt_010.02.wav ... kdt_010.09.wav. The -ext option
443 specifies the extension of the new waveforms, and the -divide command
444 specifies that division of the entire waveform is to take place.
445 
446 If only a single file is required the -extract option can be used, in
447 which case its argument is the filename required.
448 </para>
449 <para>
450 <screen>
451 $ ch_wave kdt_010.wav -key kdt_010.words -ext .wav -extract kdt_010.03 \
452  -o kdt_010.03.wav
453 </screen>
454 </para>
455 <para>
456 Note that an output filename should be specified with this option.
457 */
458 //@{
459 //@}
460 
461 /** @name Adding headers and format conversion
462 
463 It is usually a good idea for all waveform files to have headers as
464 this way different byte orders, sampling rates etc can be handled
465 safely. ch_wave provides a means of adding headers to raw files.
466 
467 The following adds a header to a file of 16 bit shorts
468 </para>
469 <para>
470 <screen>
471 $ ch_wave kdt_010.raw1 -o kdt_010.h1.wav -otype nist -f 16000 -itype raw
472 </screen>
473 </para>
474 <para>
475 The following downsamples the input to 8 KHz
476 </para>
477 <para>
478 <screen>
479 $ ch_wave kdt_010.raw1 -o kdt_010.h2.wav -otype nist -f 16000 \
480  -F 8000 -itype raw
481 </screen>
482 </para>
483 <para>
484 The following takes a 8K ulaw input file and produces a 16bit, 20Khz output file:
485 </para>
486 <para>
487 <screen>
488 $ ch_wave kdt_010.raw2 -o kdt_010.h3.wav -otype nist -istype ulaw \
489  -f 8000 -F 20000 -itype raw
490 </screen>
491 */
492  //@{
493  //@}
494 
495 //@}
EST_Option
Definition: EST_Option.h:50
EST_Relation::load
EST_read_status load(const EST_String &filename, const EST_String &type="esps")
Definition: EST_Relation.cc:620
EST_Wave
Definition: EST_Wave.h:64
EST_TList< int >
EST_TList::first
const T & first() const
return const reference to first item in list
Definition: EST_TList.h:146
EST_Wave::resize
void resize(int num_samples, int num_channels=EST_ALL, int set=1)
resize the waveform
Definition: EST_Wave.h:184
EST_Wave::num_samples
int num_samples() const
return the number of samples in the waveform
Definition: EST_Wave.h:143
EST_Wave::rescale
void rescale(float gain, int normalize=0)
Definition: EST_Wave.cc:517
EST_TKVL::present
const int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
EST_Option::fval
float fval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:98
EST_UItem
Definition: EST_UList.h:51
EST_Relation
Definition: EST_Relation.h:67
EST_String
Definition: EST_String.h:70
EST_Option::ival
int ival(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:76
EST_Wave::compress
void compress(float mu, float limit)
Dynamic Range Compression - SaiKrishna May 2017.
Definition: EST_Wave.cc:503
EST_Wave::resample
void resample(int rate)
Resample waveform to rate
Definition: EST_Wave.cc:489
EST_TKVL::val
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145