版权声明:尊重博主原创文章,转载请注明出处 https://blog.csdn.net/weixin_39020940/article/details/83217960
直接上代码
my_audio.py
# -*- coding: utf-8 -*-
# Created: huashan
import os
import re
import wave
import numpy as np
import pyaudio
class voice():
def loaddata(self, filepath):
'''
:param filepath: 文件路径,为wav文件
:return: 如果无异常则返回True,如果有异常退出并返回False
self.wave_data内储存着多通道的音频数据,其中self.wave_data[0]代表第一通道
具体有几通道,看self.nchannels
'''
if type(filepath) != str:
raise TypeError, 'the type of filepath must be string'
p1 = re.compile('\.wav')
if p1.findall(filepath) is None:
raise IOError, 'the suffix of file must be .wav'
try:
f = wave.open(filepath, 'rb')
params = f.getparams()
self.nchannels, self.sampwidth, self.framerate, self.nframes = params[:4]
str_data = f.readframes(self.nframes)
self.wave_data = np.fromstring(str_data, dtype=np.short)
self.wave_data.shape = -1, self.sampwidth
self.wave_data = self.wave_data.T
f.close()
self.name = os.path.basename(filepath) # 记录下文件名
return True
except:
raise IOError, 'File Error'
def fft(self, frames=40):
'''
整体指纹提取的核心方法,将整个音频分块后分别对每块进行傅里叶变换,之后分子带抽取高能量点的下标
:param frames: frames是指定每秒钟分块数
:return:
'''
block = []
fft_blocks = []
self.high_point = []
blocks_size = self.framerate / frames # block_size为每一块的frame数量
blocks_num = self.nframes / blocks_size # 将音频分块的数量
for i in xrange(0, len(self.wave_data[0]) - blocks_size, blocks_size):
block.append(self.wave_data[0][i:i + blocks_size])
fft_blocks.append(np.abs(np.fft.fft(self.wave_data[0][i:i + blocks_size])))
self.high_point.append((np.argmax(fft_blocks[-1][:40]),
np.argmax(fft_blocks[-1][40:80]) + 40,
np.argmax(fft_blocks[-1][80:120]) + 80,
np.argmax(fft_blocks[-1][120:180]) + 120,
# np.argmax(fft_blocks[-1][180:300]) + 180,
))
def play(self, filepath):
'''
音频播放方法
:param filepath:文件路径
:return:
'''
chunk = 1024
wf = wave.open(filepath, 'rb')
p = pyaudio.PyAudio()
# 打开声音输出流
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
# 写声音输出流进行播放
while True:
data = wf.readframes(chunk)
if data == "": break
stream.write(data)
stream.close()
p.terminate()
if __name__ == '__main__':
p = voice()
p.play('the_mess.wav')
print p.name
plar.py
# -*- coding: utf-8 -*-
# Created: huashan
import os
import MySQLdb
import my_audio
class memory():
def __init__(self, host, port, user, passwd, db):
'''
初始化的方法,主要是存储连接数据库的参数
:param host:
:param port:
:param user:
:param passwd:
:param db:
'''
self.host = host
self.port = port
self.user = user
self.passwd = passwd
self.db = db
def addsong(self, path):
'''
添加歌曲方法,将歌曲名和歌曲特征指纹存到数据库
:param path: 歌曲路径
:return:
'''
if type(path) != str:
raise TypeError, 'path need string'
basename = os.path.basename(path)
try:
conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.passwd, db=self.db,
charset='utf8')
except:
print 'DataBase error'
return None
cur = conn.cursor()
namecount = cur.execute("select * from fingerprint.musicdata WHERE song_name = '%s'" % basename)
if namecount > 0:
print 'the song has been record!'
return None
v = my_audio.voice()
v.loaddata(path)
v.fft()
cur.execute("insert into fingerprint.musicdata VALUES('%s','%s')" % (basename, v.high_point.__str__()))
conn.commit()
cur.close()
conn.close()
def fp_compare(self, search_fp, match_fp):
'''
:param search_fp: 查询指纹
:param match_fp: 库中指纹
:return:最大相似值 float
'''
if len(search_fp) > len(match_fp):
return 0
max_similar = 0
search_fp_len = len(search_fp)
match_fp_len = len(match_fp)
for i in range(match_fp_len - search_fp_len):
temp = 0
for j in range(search_fp_len):
if match_fp[i + j] == search_fp[j]:
temp += 1
if temp > max_similar:
max_similar = temp
return max_similar
def search(self, path):
'''
搜索方法,输入为文件路径
:param path: 待检索文件路径
:return: 按照相似度排序后的列表,元素类型为tuple,二元组,歌曲名和相似匹配值
'''
#先计算出来我们的音频指纹
v = my_audio.voice()
v.loaddata(path)
v.fft()
#尝试连接数据库
try:
conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.passwd, db=self.db,
charset='utf8')
except:
raise IOError, 'DataBase error'
cur = conn.cursor()
cur.execute("SELECT * FROM fingerprint.musicdata")
result = cur.fetchall()
compare_res = []
for i in result:
compare_res.append((self.fp_compare(v.high_point[:-1], eval(i[1])), i[0]))
compare_res.sort(reverse=True)
cur.close()
conn.close()
print compare_res
return compare_res
def search_and_play(self, path):
'''
搜索方法顺带了播放方法
:param path:文件路径
:return:
'''
v = my_audio.voice()
v.loaddata(path)
v.fft()
try:
conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.passwd, db=self.db,
charset='utf8')
except:
print 'DataBase error'
return None
cur = conn.cursor()
cur.execute("SELECT * FROM fingerprint.musicdata")
result = cur.fetchall()
compare_res = []
for i in result:
compare_res.append((self.fp_compare(v.high_point[:-1], eval(i[1])), i[0]))
compare_res.sort(reverse=True)
cur.close()
conn.close()
print compare_res
v.play(compare_res[0][1])
return compare_res
if __name__ == '__main__':
sss = memory('localhost', 3306, 'root', 'huawei', 'fingerprint')
sss.addsong('60542.wav')
sss.addsong('70715.wav')
sss.addsong('70342.wav')
sss.search_and_play('70715_Convert.wav')