π 곡λΆνλ μ§μ§μνμΉ΄λ μ²μμ΄μ§?
[μμ±]μμ± μ²λ¦¬ λΆμΌμμμ Deep Learning λ³Έλ¬Έ
[μμ±]μμ± μ²λ¦¬ λΆμΌμμμ Deep Learning
μ§μ§μνμΉ΄ 2022. 1. 24. 23:47220124 μμ±
<λ³Έ λΈλ‘κ·Έλ Kaggle μ μ°Έκ³ ν΄μ μ λ§μ νμ΄λ₯Ό μμ±νμμ΅λλ€>
https://engineering.linecorp.com/ko/blog/voice-waveform-arbitrary-signal-to-noise-ratio-python/
λ₯ λ¬λ μμ± μΈμμ νμν νλ ¨ λ°μ΄ν°λ₯Ό μ§μ λ§λ€μ΄λ³΄μ - LINE ENGINEERING
μλ νμΈμ, LINEμμ κ΄κ³ νλ«νΌ κ°λ°μ λ§‘κ³ μλ 1λ μ°¨ μ μ μ¬μ Kunihiko Satoμ λλ€. μ΄λ² λΈλ‘κ·Έμμλ Pythonμ μ¬μ©ν΄μ μμμ Signal-to-Noise ratio(SNλΉ)λ₯Ό κ°μ§ μμ± ννμ λ§λλ λ°©λ²μ μκ°
engineering.linecorp.com
1. μμ λΆλ¦¬
: μ¬λ¬ κ°μ μμμ΄ μμ¬ μλ μ λ ₯ ννμ κ°λ³ μμμ ννμΌλ‘ λΆλ¦¬νλ κ²
: μμ± κ°μ‘° or μ‘μ μ κ±°
: μμ±κ³Ό μ‘μμ΄ μμ¬ μλ μ λ ₯ ννμ μμ± ννκ³Ό μ‘μ ννμΌλ‘ κ°κ° λΆλ¦¬ν΄λ΄λ κ²
ex) μ‘μμ κ±°, νΉμ μΈλ¬Ό μμ± μΆμΆ, μ κΈ°λ³ μμ λΆλ¦¬
2. λ₯λ¬λμ νμν νλ ¨ λ°μ΄ν° μ μ
- νλ ¨ λ°μ΄ν°
: μμ±κ³Ό μ‘μμ΄ μμ¬ μλ ννμ΄ νμ
: μ΄ λ°μ΄ν°λ₯Ό ν΅ν΄ μ κ²½ νλ‘λ§μ μ‘μμ΄ μμ¬ μλ μμ± ννμμ μμ±λ§ μΆμΆνλλ‘ νλ ¨
3. Signal-to-Noise ratio ( SNλΉ, μ νΈ λλΉ μ‘μ λΉ )
: μ νΈμ ν¬κΈ°κ° μ‘μμ ν¬κΈ°λ³΄λ€ μΌλ§λ ν°μ§ λνλ΄λ λΉμ¨
: SNλΉμ λ¨μλ dB(λ°μ벨)
: Signal -> μμ±, Noise κ·Έ μΈ μ리 -> νμ΄νΈ λ Έμ΄μ¦, νκ²½μ
: SN λΉ λμμλ‘ μμ± > μ‘μ
: 0 db λ μμ±κ³Ό μ‘μ ν¬κΈ° λμΌ
: - db λ μμ± < μ‘μ
: μμμ Signal-to-Noise ratoi λ₯Ό κ°μ§ μμ± ννμ λ§λ λ€
=> μνλ dB λΉμ¨λ‘ μμ±κ³Ό μ‘μμ΄ μμ¬ μλ μμ± ννμ λ§λ λ€
- κ³μ° λ°©λ²
Asignal : μμ±μ ν¬κΈ° or μΈκΈ°
Anoise : μ‘μμ ν¬κΈ° or μΈκΈ° ( μΈκΈ° : μ§νκ°μ νκ· μ κ³±κ·Ό_Root Mean Square, RMS )
1) μμ±μ μ§νκ°μ΄ λ§μ΄λμ€ μμΉλ‘ λμ¬ μλ μμΌλ μ§νκ° μ κ³±
2) μ κ³±ν κ°μ λν λ€ κ·Έ κ°μ νκ· κ΅¬ν¨
3) νκ· ν κ°μ μ κ³±κ·Όμ κ³μ°νλ©΄ μ리μ μΈκΈ° ꡬν¨
: ννμ 무μ ꡬκ°, νΉμ ꡬκ°λ§ λΉμ μμ μΌλ‘ μ§νκ° νΌ -> μ리μ μΈκΈ°λ‘ μ¬μ© X
4. Python μΌλ‘ μμ Signal-to-Noise ratio μ μμ± νν μ μ
: μμ±μ μμ ν¬κΈ°μ μ‘μ μ€μ²©
1) μμ± νμΌ ν¬λ§· νμΈ
: .wav νμΌ μ¬μ©
2) wav νμΌ λ‘λ©
import argparse # νλ‘κ·Έλ¨μ μ€νμμ 컀맨λ λΌμΈμ μΈμλ₯Ό λ°μ μ²λ¦¬λ₯Ό κ°λ¨ν
import array
import math
import numpy as np
import random
import wave
def get_args() :
# μΈμκ°μ λ°μ μ μλ μΈμ€ν΄μ€ μμ±
parser = argparse.ArgumentParser()
# parser.add_argumentλ‘ λ°μλ€μΌ μΈμλ₯Ό μΆκ°
parser.add_argument('--clean_file', type = str, required = True) # μμ±λ§ μλ νμΌ μ λ κ²½λ‘
parser.add_argument('--noise_file', type = str, required = True) # μ‘μλ§ μλ νμΌ μ λ κ²½λ‘
parser.add_argument('--output_mixed_file', type = str, default = '', required = True) # μ²λ¦¬ μλ£λ μμ±λ§ μλ νμΌ μ λ κ²½λ‘
parser.add_argument('--output_clean_file', type = str, default = '') # μ²λ¦¬ μλ£λ μ‘μλ§ μλ νμΌ μ λ κ²½λ‘
parser.add_argument('--output_noise_file', type = str, default = '') # μμ SNλΉμ μμ± νμΌ μ λ κ²½λ‘
parser.add_argument('--snr', type = float, default = '', required = True) # ν©μ±νλ €λ SN λΉμ ν¬κΈ°
# μΈμλ₯Ό λΆμ, μ μ₯
args = parser.parse_args()
return args
# κ°κ°μ μ§νμ λν μ§νκ°μ΄ wav νμΌμ μμν bitμ, 16bit(32767)
def cal_adjusted_rms(clean_rms, snr) :
a = float(snr) / 20
noise_rms = clean_rms / (10**a)
return noise_rms
# μμ± ννμ μ§νκ° μ·¨λνκΈ°
def cal_amp(wf) :
# wf.readframes(n) : μ΅λ nκ°μ μ€λμ€ νλ μ μ½μ΄μ bytes κ°μ²΄ λ°ν
# wf.getnframes() : μ€λμ€ νλ μ μ λ°ν
# => wav νμΌμ λͺ¨λ μ§νκ°μ μ·¨λ
buffer = wf.readframes(wf.getnframes())
# The dtype depends on the value of pulse-code modulation
# The int16 is set for 16-bit PCM
amptitude = (np.frombuffer(buffer, dtype = "int16")).astype(np.float64)
return amptitude
# μ§νκ°μ νκ· μ κ³±κ·Ό(Root Mean Square, RMS) ꡬνκΈ°
# μ‘μ λ°μ΄ν° ννμ μμ± λ°μ΄ν° νν κΈΈμ΄λ‘ μλ₯΄κΈ°
# μ‘μ νμΌμμ μλΌλΈ νν, μμ± ννμ RMSλ₯Ό κ°κ° κ³μ° -> μμ SNλΉκ° λμ€λλ‘ μ€μ²©
def cal_rms(amp) :
# np.square : μ΄λ μ΄μ μμ λ¨μλ‘ μ κ³±μ λ°ν
# SN λΉ : μ§νκ° μ κ³± -> νκ· -> μ κ³±κ·Ό
return np.sqrt(np.mean(np.square(amp), axis = -1))
def save_waveform(output_path, params, amp) :
output_file = wave.Wave_write(output_path)
output_file.setparams(params) #nchannels, sampwidth, framerate, nframes, comptype, compname
output_file.writeframes(array.array('h', amp.astype(np.int16)).tobytes())
output_file.close()
if __name__ == '__main__' :
args = get_args()
clean_file = args.clean_file
noise_file = args.noise_file
clean_wav = wave.open(clean_file, 'r')
noise_wav = wave.open(noise_file, 'r')
clean_amp = cal_amp(clean_wav)
noise_amp = cal_amp(noise_wav)
clean_rms = cal_rms(clean_amp)
# μ‘μμ μλ₯Ό μμΉλ₯Ό λλ€μΌλ‘ μ ν΄μ μμ±μ κΈΈμ΄λ§νΌ μλΌλ
start = random.randint(0, len(noise_amp) - len(clean_amp))
divided_noise_amp = noise_amp[start: start + len(clean_amp)]
noise_rms = cal_rms(divided_noise_amp)
snr = args.snr
adjusted_noise_rms = cal_adjusted_rms(clean_rms, snr)
adjusted_noise_rms = divided_noise_amp * (adjusted_noise_rms / noise_rms)
mixed_amp = (clean_amp + adjusted_noise_rms)
# Avoid clipping noise
max_int16 = np.iinfo(np.int16).max
min_int16 = np.iinfo(np.int16).min
# μλ‘ λν κ°μ΄ 16bitμ μ΅λκ°μ λμΌλ©΄, μ΅λ 32767 μμ λ€μ΄μ€λλ‘ μ κ·ν
if mixed_amp.max(axis = 0) > max_int16 or mixed_amp.min(axis = 0) < min_int16 :
if mixed_amp.max(axis = 0) >= abs(mixed_amp.min(axis = 0)) :
reduction_rate = max_int16 / mixed_amp.max(axis = 0)
else :
reduction_rate = min_int16 / mixed_amp.min(axis = 0)
mixed_amp = mixed_amp * (reduction_rate)
clean_amp = clean_amp * (reduction_rate)
# save_waveform(args.output_mixed_file, clean_wav.getparams(), mixed_amp)
# ννμ wav νμΌλ‘ μ μ₯
noise_wave = wave.Wave_write(args.output_noise_file)
noise_wave.setparams(clean_wav.getparams()) # setparams : wavνμΌμ ν¬λ§·μ μ§μ νλ λ©μλ
noise_wave.writeframes(array.array('h', mixed_amp.astype(np.int16)).toString())
noise_wave.close() # writeframes : μ§νκ°μ μ§μ . Stringμ μΊμ€ν
clean_wave = wave.Wave_write(args.output_clean_file)
clean_wave.setparams(clean_wav.getparams())
clean_wave.writeframes(array.array('h', clean_amp.astype(np.int16)).toString())
clean_wave.close()
noise_wave = wave.Wave_write(args.output_noise_file)
noise_wave.setparams(clean_wav.getparams())
noise_wave.writeframes(array.array('h', adjusted_noise_rms.astype(np.int16)).toString())
noise_wave.close()
+) Signal-to-Noise ratio κ³μ°μμ μ΄μ©ν΄ μμ ν¬κΈ°λ‘ νν ν©μ±
- μμ±μ λν΄ μμμ SN λΉκ° λμ€λλ‘ μ‘μμ RMS ꡬνκΈ°
- μ‘μμ RMS
: RMS(Anoise) μ μλ³Έ μ‘μμ RMS λΉμ¨μ κ³μ°νμ¬, κ·Έ λΉμ¨λ§νΌ μλ³Έ μ‘μμ μ§νκ° μ‘°μ
: μ‘°μ ν μ‘μμ μ§νκ³Ό μμ± λ¨λ μ μ§ν λν¨
μ°Έκ³ νλ©΄ λ μ’μ μ 보μ λ°©λ²μ μ μ μμ κ²!
create_mixed_audio_file.py μ 16 bit μ©! path λν΄μ μ€ννλλ ,, μ μλ κΉ μ κΆκΈνλ€
https://github.com/Sato-Kunihiko/audio-SNR
GitHub - Sato-Kunihiko/audio-SNR: Mixing an audio file with a noise file at any Signal-to-Noise Ratio (SNR)
Mixing an audio file with a noise file at any Signal-to-Noise Ratio (SNR) - GitHub - Sato-Kunihiko/audio-SNR: Mixing an audio file with a noise file at any Signal-to-Noise Ratio (SNR)
github.com
μ 체 μ½λ
import argparse # νλ‘κ·Έλ¨μ μ€νμμ 컀맨λ λΌμΈμ μΈμλ₯Ό λ°μ μ²λ¦¬λ₯Ό κ°λ¨ν
import array
import math
import numpy as np
import random
import wave
def get_args() :
# μΈμκ°μ λ°μ μ μλ μΈμ€ν΄μ€ μμ±
parser = argparse.ArgumentParser()
# parser.add_argumentλ‘ λ°μλ€μΌ μΈμλ₯Ό μΆκ°
parser.add_argument('--clean_file', type = str, required = True) # μμ±λ§ μλ νμΌ μ λ κ²½λ‘
parser.add_argument('--noise_file', type = str, required = True) # μ‘μλ§ μλ νμΌ μ λ κ²½λ‘
parser.add_argument('--output_mixed_file', type = str, default = '', required = True) # μ²λ¦¬ μλ£λ μμ±λ§ μλ νμΌ μ λ κ²½λ‘
parser.add_argument('--output_clean_file', type = str, default = '') # μ²λ¦¬ μλ£λ μ‘μλ§ μλ νμΌ μ λ κ²½λ‘
parser.add_argument('--output_noise_file', type = str, default = '') # μμ SNλΉμ μμ± νμΌ μ λ κ²½λ‘
parser.add_argument('--snr', type = float, default = '', required = True) # ν©μ±νλ €λ SN λΉμ ν¬κΈ°
# μΈμλ₯Ό λΆμ, μ μ₯
args = parser.parse_args()
return args
# κ°κ°μ μ§νμ λν μ§νκ°μ΄ wav νμΌμ μμν bitμ, 16bit(32767)
def cal_adjusted_rms(clean_rms, snr) :
a = float(snr) / 20
noise_rms = clean_rms / (10**a)
return noise_rms
# μμ± ννμ μ§νκ° μ·¨λνκΈ°
def cal_amp(wf) :
# wf.readframes(n) : μ΅λ nκ°μ μ€λμ€ νλ μ μ½μ΄μ bytes κ°μ²΄ λ°ν
# wf.getnframes() : μ€λμ€ νλ μ μ λ°ν
# => wav νμΌμ λͺ¨λ μ§νκ°μ μ·¨λ
buffer = wf.readframes(wf.getnframes())
# The dtype depends on the value of pulse-code modulation
# The int16 is set for 16-bit PCM
amptitude = (np.frombuffer(buffer, dtype = "int16")).astype(np.float64)
return amptitude
# μ§νκ°μ νκ· μ κ³±κ·Ό(Root Mean Square, RMS) ꡬνκΈ°
# μ‘μ λ°μ΄ν° ννμ μμ± λ°μ΄ν° νν κΈΈμ΄λ‘ μλ₯΄κΈ°
# μ‘μ νμΌμμ μλΌλΈ νν, μμ± ννμ RMSλ₯Ό κ°κ° κ³μ° -> μμ SNλΉκ° λμ€λλ‘ μ€μ²©
def cal_rms(amp) :
# np.square : μ΄λ μ΄μ μμ λ¨μλ‘ μ κ³±μ λ°ν
# SN λΉ : μ§νκ° μ κ³± -> νκ· -> μ κ³±κ·Ό
return np.sqrt(np.mean(np.square(amp), axis = -1))
def save_waveform(output_path, params, amp) :
output_file = wave.Wave_write(output_path)
output_file.setparams(params) #nchannels, sampwidth, framerate, nframes, comptype, compname
output_file.writeframes(array.array('h', amp.astype(np.int16)).tobytes())
output_file.close()
if __name__ == '__main__' :
args = get_args()
clean_file = args.clean_file
noise_file = args.noise_file
clean_wav = wave.open(clean_file, 'r')
noise_wav = wave.open(noise_file, 'r')
clean_amp = cal_amp(clean_wav)
noise_amp = cal_amp(noise_wav)
clean_rms = cal_rms(clean_amp)
# μ‘μμ μλ₯Ό μμΉλ₯Ό λλ€μΌλ‘ μ ν΄μ μμ±μ κΈΈμ΄λ§νΌ μλΌλ
start = random.randint(0, len(noise_amp) - len(clean_amp))
divided_noise_amp = noise_amp[start: start + len(clean_amp)]
noise_rms = cal_rms(divided_noise_amp)
snr = args.snr
adjusted_noise_rms = cal_adjusted_rms(clean_rms, snr)
adjusted_noise_rms = divided_noise_amp * (adjusted_noise_rms / noise_rms)
mixed_amp = (clean_amp + adjusted_noise_rms)
# Avoid clipping noise
max_int16 = np.iinfo(np.int16).max
min_int16 = np.iinfo(np.int16).min
# μλ‘ λν κ°μ΄ 16bitμ μ΅λκ°μ λμΌλ©΄, μ΅λ 32767 μμ λ€μ΄μ€λλ‘ μ κ·ν
if mixed_amp.max(axis = 0) > max_int16 or mixed_amp.min(axis = 0) < min_int16 :
if mixed_amp.max(axis = 0) >= abs(mixed_amp.min(axis = 0)) :
reduction_rate = max_int16 / mixed_amp.max(axis = 0)
else :
reduction_rate = min_int16 / mixed_amp.min(axis = 0)
mixed_amp = mixed_amp * (reduction_rate)
clean_amp = clean_amp * (reduction_rate)
# save_waveform(args.output_mixed_file, clean_wav.getparams(), mixed_amp)
# ννμ wav νμΌλ‘ μ μ₯
noise_wave = wave.Wave_write(args.output_noise_file)
noise_wave.setparams(clean_wav.getparams()) # setparams : wavνμΌμ ν¬λ§·μ μ§μ νλ λ©μλ
noise_wave.writeframes(array.array('h', mixed_amp.astype(np.int16)).toString())
noise_wave.close() # writeframes : μ§νκ°μ μ§μ . Stringμ μΊμ€ν
clean_wave = wave.Wave_write(args.output_clean_file)
clean_wave.setparams(clean_wav.getparams())
clean_wave.writeframes(array.array('h', clean_amp.astype(np.int16)).toString())
clean_wave.close()
noise_wave = wave.Wave_write(args.output_noise_file)
noise_wave.setparams(clean_wav.getparams())
noise_wave.writeframes(array.array('h', adjusted_noise_rms.astype(np.int16)).toString())
noise_wave.close()