mishig HF staff commited on
Commit
181830a
·
1 Parent(s): 4b11f38

Fix share btn WAV audio

Browse files
Files changed (1) hide show
  1. share_btn.py +141 -3
share_btn.py CHANGED
@@ -14,7 +14,7 @@ share_js = """async () => {
14
  const response = await fetch(UPLOAD_URL, {
15
  method: 'POST',
16
  headers: {
17
- 'Content-Type': file.type,
18
  'X-Requested-With': 'XMLHttpRequest',
19
  },
20
  body: file, /// <- File inherits from Blob
@@ -23,6 +23,119 @@ share_js = """async () => {
23
  return url;
24
  }
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  const gradioEl = document.querySelector('body > gradio-app');
27
  const audioEl = gradioEl.querySelector('audio');
28
  const resultTxt = gradioEl.querySelector('#result-textarea textarea').value;
@@ -40,8 +153,33 @@ share_js = """async () => {
40
 
41
  const res = await fetch(audioEl.src);
42
  const blob = await res.blob();
43
- const fileName = `whisper-demo-input.webm`;
44
- const audioFile = new File([blob], fileName, { type: 'audio/webm' });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  const url = await uploadFile(audioFile);
47
 
 
14
  const response = await fetch(UPLOAD_URL, {
15
  method: 'POST',
16
  headers: {
17
+ 'Content-Type': 'audio/wav',
18
  'X-Requested-With': 'XMLHttpRequest',
19
  },
20
  body: file, /// <- File inherits from Blob
 
23
  return url;
24
  }
25
 
26
+ function audioResample(buffer, sampleRate){
27
+ const offlineCtx = new OfflineAudioContext(2, (buffer.length / buffer.sampleRate) * sampleRate, sampleRate);
28
+ const source = offlineCtx.createBufferSource();
29
+ source.buffer = buffer;
30
+ source.connect(offlineCtx.destination);
31
+ source.start();
32
+ return offlineCtx.startRendering();
33
+ };
34
+
35
+ function audioReduceChannels(buffer, targetChannelOpt){
36
+ if(targetChannelOpt === 'both' || buffer.numberOfChannels < 2) return buffer;
37
+ const outBuffer = new AudioBuffer({
38
+ sampleRate: buffer.sampleRate,
39
+ length: buffer.length,
40
+ numberOfChannels: 1
41
+ });
42
+
43
+ const data = [buffer.getChannelData(0), buffer.getChannelData(1)];
44
+ const newData = new Float32Array(buffer.length);
45
+ for(let i = 0; i < buffer.length; ++i)
46
+ newData[i] =
47
+ targetChannelOpt === 'left'? data[0][i] :
48
+ targetChannelOpt === 'right'? data[1][i] :
49
+ (data[0][i] + data[1][i]) / 2 ;
50
+ outBuffer.copyToChannel(newData, 0);
51
+ return outBuffer;
52
+ };
53
+
54
+ function audioNormalize(buffer){
55
+ const data = Array.from(Array(buffer.numberOfChannels)).map((_, idx) => buffer.getChannelData(idx));
56
+ const maxAmplitude = Math.max(...data.map(chan => chan.reduce((acc, cur) => Math.max(acc, Math.abs(cur)), 0)));
57
+ if(maxAmplitude >= 1.0) return buffer;
58
+ const coeff = 1.0 / maxAmplitude;
59
+ data.forEach(chan => {
60
+ chan.forEach((v, idx) => chan[idx] = v*coeff);
61
+ buffer.copyToChannel(chan, 0);
62
+ });
63
+ return buffer;
64
+ };
65
+
66
+ async function processAudioFile(
67
+ audioBufferIn,
68
+ targetChannelOpt,
69
+ targetSampleRate
70
+ ) {
71
+ const resampled = await audioResample(audioBufferIn, targetSampleRate);
72
+ const reduced = audioReduceChannels(resampled, targetChannelOpt);
73
+ const normalized = audioNormalize(reduced);
74
+ return normalized;
75
+ }
76
+
77
+ function audioToRawWave(audioChannels, bytesPerSample, mixChannels=false) {
78
+ const bufferLength = audioChannels[0].length;
79
+ const numberOfChannels = audioChannels.length === 1 ? 1 : 2;
80
+ const reducedData = new Uint8Array(
81
+ bufferLength * numberOfChannels * bytesPerSample
82
+ );
83
+ for (let i = 0; i < bufferLength; ++i) {
84
+ for (
85
+ let channel = 0;
86
+ channel < (mixChannels ? 1 : numberOfChannels);
87
+ ++channel
88
+ ) {
89
+ const outputIndex = (i * numberOfChannels + channel) * bytesPerSample;
90
+ let sample;
91
+ if (!mixChannels) sample = audioChannels[channel][i];
92
+ else
93
+ sample =
94
+ audioChannels.reduce((prv, cur) => prv + cur[i], 0) /
95
+ numberOfChannels;
96
+ sample = sample > 1 ? 1 : sample < -1 ? -1 : sample; //check for clipping
97
+ //bit reduce and convert to Uint8
98
+ switch (bytesPerSample) {
99
+ case 2:
100
+ sample = sample * 32767;
101
+ reducedData[outputIndex] = sample;
102
+ reducedData[outputIndex + 1] = sample >> 8;
103
+ break;
104
+ case 1:
105
+ reducedData[outputIndex] = (sample + 1) * 127;
106
+ break;
107
+ default:
108
+ throw "Only 8, 16 bits per sample are supported";
109
+ }
110
+ }
111
+ }
112
+ return reducedData;
113
+ }
114
+
115
+ function makeWav(data, channels, sampleRate, bytesPerSample) {
116
+ const headerLength = 44;
117
+ var wav = new Uint8Array(headerLength + data.length);
118
+ var view = new DataView(wav.buffer);
119
+
120
+ view.setUint32(0, 1380533830, false); // RIFF identifier 'RIFF'
121
+ view.setUint32(4, 36 + data.length, true); // file length minus RIFF identifier length and file description length
122
+ view.setUint32(8, 1463899717, false); // RIFF type 'WAVE'
123
+ view.setUint32(12, 1718449184, false); // format chunk identifier 'fmt '
124
+ view.setUint32(16, 16, true); // format chunk length
125
+ view.setUint16(20, 1, true); // sample format (raw)
126
+ view.setUint16(22, channels, true); // channel count
127
+ view.setUint32(24, sampleRate, true); // sample rate
128
+ view.setUint32(28, sampleRate * bytesPerSample * channels, true); // byte rate (sample rate * block align)
129
+ view.setUint16(32, bytesPerSample * channels, true); // block align (channel count * bytes per sample)
130
+ view.setUint16(34, bytesPerSample * 8, true); // bits per sample
131
+ view.setUint32(36, 1684108385, false); // data chunk identifier 'data'
132
+ view.setUint32(40, data.length, true); // data chunk length
133
+
134
+ wav.set(data, headerLength);
135
+
136
+ return new Blob([wav.buffer], { type: "audio/wav" });
137
+ }
138
+
139
  const gradioEl = document.querySelector('body > gradio-app');
140
  const audioEl = gradioEl.querySelector('audio');
141
  const resultTxt = gradioEl.querySelector('#result-textarea textarea').value;
 
153
 
154
  const res = await fetch(audioEl.src);
155
  const blob = await res.blob();
156
+
157
+ const channelOpt = "both";
158
+ const sampleRate = 48000;
159
+ const bytesPerSample = 1; // or 2
160
+ const audioBufferIn = await new AudioContext().decodeAudioData(
161
+ await blob.arrayBuffer()
162
+ );
163
+ const audioBuffer = await processAudioFile(
164
+ audioBufferIn,
165
+ channelOpt,
166
+ sampleRate
167
+ );
168
+ const rawData = audioToRawWave(
169
+ channelOpt === "both"
170
+ ? [audioBuffer.getChannelData(0), audioBuffer.getChannelData(1)]
171
+ : [audioBuffer.getChannelData(0)],
172
+ bytesPerSample
173
+ );
174
+ const blobWav = makeWav(
175
+ rawData,
176
+ channelOpt === "both" ? 2 : 1,
177
+ sampleRate,
178
+ bytesPerSample
179
+ );
180
+
181
+ const fileName = `whisper-demo-input.wav`;
182
+ const audioFile = new File([blobWav], fileName, { type: 'audio/wav' });
183
 
184
  const url = await uploadFile(audioFile);
185