The previous article said that the odas_web interface is very difficult to install, and its operation is also very stuck. So I wrote an interface program in python to receive the results processed by odas.
The interface program and odas are connected through sockets. The interface acts as a server, and odas acts as a client. Since there are two channels of data, there are two servers and two clients. But what is actually drawn on the interface is the result of SSL, not SST. In fact, I have also tried the results of SST. From the intuitive feeling, the effect will be worse than SSL, and the real-time performance is not very high. My understanding is that the advantage of SST is that it can track whether the audio source is active.
In addition, I also tried to run this code on Raspberry Pi 3, the performance will be much better than odas_web, but it is still not as smooth as a laptop. First install python3-opencv on the Raspberry Pi, and then use python3 to run this interface program.
The installation commands and the running commands of the two programs can be referred to as follows:
sudo apt install python3-opencv
python3 DOA_sound.py
./bin/odaslive -c config/odaslive/shao.cfg
video:
Attached below is the interface program I wrote:
#!/usr/bin/env python
import socket
import sys
import threading
import random
import os
import time
import struct
import cv2
import signal
import json
import ast
import numpy as np
stop = False
HOST = "0.0.0.0"
PORT = 9000
SOCK_ADDR = (HOST, PORT)
PORT2 = 9001
SOCK_ADDR2 = (HOST, PORT2)
def stop_handler(signum, frame):
global running
running = False
signal.signal(signal.SIGINT, stop_handler)
spectrum_rgb3_lut = [
[ 0, 0, 0 ],
[ 0, 0, 3 ],
[ 0, 0, 6 ],
[ 0, 0, 9 ],
[ 0, 0, 12 ],
[ 0, 0, 15 ],
[ 0, 0, 18 ],
[ 0, 0, 21 ],
[ 0, 0, 24 ],
[ 0, 0, 27 ],
[ 0, 0, 30 ],
[ 0, 0, 33 ],
[ 0, 0, 36 ],
[ 0, 0, 39 ],
[ 0, 0, 42 ],
[ 0, 0, 45 ],
[ 0, 0, 48 ],
[ 0, 0, 51 ],
[ 0, 0, 54 ],
[ 0, 0, 57 ],
[ 0, 0, 60 ],
[ 0, 0, 63 ],
[ 0, 0, 66 ],
[ 0, 0, 69 ],
[ 0, 0, 72 ],
[ 0, 0, 75 ],
[ 0, 0, 78 ],
[ 0, 0, 81 ],
[ 0, 0, 84 ],
[ 0, 0, 87 ],
[ 0, 0, 90 ],
[ 0, 0, 93 ],
[ 0, 0, 96 ],
[ 0, 0, 99 ],
[ 0, 0, 102 ],
[ 0, 0, 105 ],
[ 0, 0, 108 ],
[ 0, 0, 111 ],
[ 0, 0, 114 ],
[ 0, 0, 117 ],
[ 0, 0, 120 ],
[ 0, 0, 123 ],
[ 0, 0, 126 ],
[ 0, 0, 129 ],
[ 0, 0, 132 ],
[ 0, 0, 135 ],
[ 0, 0, 138 ],
[ 0, 0, 141 ],
[ 0, 0, 144 ],
[ 0, 0, 147 ],
[ 0, 0, 150 ],
[ 0, 0, 153 ],
[ 0, 0, 156 ],
[ 0, 0, 159 ],
[ 0, 0, 162 ],
[ 0, 0, 165 ],
[ 0, 0, 168 ],
[ 0, 0, 171 ],
[ 0, 0, 174 ],
[ 0, 0, 177 ],
[ 0, 0, 180 ],
[ 0, 0, 183 ],
[ 0, 0, 186 ],
[ 0, 0, 189 ],
[ 0, 0, 192 ],
[ 0, 0, 195 ],
[ 0, 0, 198 ],
[ 0, 0, 201 ],
[ 0, 0, 204 ],
[ 0, 0, 207 ],
[ 0, 0, 210 ],
[ 0, 0, 213 ],
[ 0, 0, 216 ],
[ 0, 0, 219 ],
[ 0, 0, 222 ],
[ 0, 0, 225 ],
[ 0, 0, 228 ],
[ 0, 0, 231 ],
[ 0, 0, 234 ],
[ 0, 0, 237 ],
[ 0, 0, 240 ],
[ 0, 0, 243 ],
[ 0, 0, 246 ],
[ 0, 0, 249 ],
[ 0, 0, 252 ],
[ 0, 0, 255 ],
[ 0, 3, 252 ],
[ 0, 6, 249 ],
[ 0, 9, 246 ],
[ 0, 12, 243 ],
[ 0, 15, 240 ],
[ 0, 18, 237 ],
[ 0, 21, 234 ],
[ 0, 24, 231 ],
[ 0, 27, 228 ],
[ 0, 30, 225 ],
[ 0, 33, 222 ],
[ 0, 36, 219 ],
[ 0, 39, 216 ],
[ 0, 42, 213 ],
[ 0, 45, 210 ],
[ 0, 48, 207 ],
[ 0, 51, 204 ],
[ 0, 54, 201 ],
[ 0, 57, 198 ],
[ 0, 60, 195 ],
[ 0, 63, 192 ],
[ 0, 66, 189 ],
[ 0, 69, 186 ],
[ 0, 72, 183 ],
[ 0, 75, 180 ],
[ 0, 78, 177 ],
[ 0, 81, 174 ],
[ 0, 84, 171 ],
[ 0, 87, 168 ],
[ 0, 90, 165 ],
[ 0, 93, 162 ],
[ 0, 96, 159 ],
[ 0, 99, 156 ],
[ 0, 102, 153 ],
[ 0, 105, 150 ],
[ 0, 108, 147 ],
[ 0, 111, 144 ],
[ 0, 114, 141 ],
[ 0, 117, 138 ],
[ 0, 120, 135 ],
[ 0, 123, 132 ],
[ 0, 126, 129 ],
[ 0, 129, 126 ],
[ 0, 132, 123 ],
[ 0, 135, 120 ],
[ 0, 138, 117 ],
[ 0, 141, 114 ],
[ 0, 144, 111 ],
[ 0, 147, 108 ],
[ 0, 150, 105 ],
[ 0, 153, 102 ],
[ 0, 156, 99 ],
[ 0, 159, 96 ],
[ 0, 162, 93 ],
[ 0, 165, 90 ],
[ 0, 168, 87 ],
[ 0, 171, 84 ],
[ 0, 174, 81 ],
[ 0, 177, 78 ],
[ 0, 180, 75 ],
[ 0, 183, 72 ],
[ 0, 186, 69 ],
[ 0, 189, 66 ],
[ 0, 192, 63 ],
[ 0, 195, 60 ],
[ 0, 198, 57 ],
[ 0, 201, 54 ],
[ 0, 204, 51 ],
[ 0, 207, 48 ],
[ 0, 210, 45 ],
[ 0, 213, 42 ],
[ 0, 216, 39 ],
[ 0, 219, 36 ],
[ 0, 222, 33 ],
[ 0, 225, 30 ],
[ 0, 228, 27 ],
[ 0, 231, 24 ],
[ 0, 234, 21 ],
[ 0, 237, 18 ],
[ 0, 240, 15 ],
[ 0, 243, 12 ],
[ 0, 246, 9 ],
[ 0, 249, 6 ],
[ 0, 252, 3 ],
[ 0, 255, 0 ],
[ 3, 252, 0 ],
[ 6, 249, 0 ],
[ 9, 246, 0 ],
[ 12, 243, 0 ],
[ 15, 240, 0 ],
[ 18, 237, 0 ],
[ 21, 234, 0 ],
[ 24, 231, 0 ],
[ 27, 228, 0 ],
[ 30, 225, 0 ],
[ 33, 222, 0 ],
[ 36, 219, 0 ],
[ 39, 216, 0 ],
[ 42, 213, 0 ],
[ 45, 210, 0 ],
[ 48, 207, 0 ],
[ 51, 204, 0 ],
[ 54, 201, 0 ],
[ 57, 198, 0 ],
[ 60, 195, 0 ],
[ 63, 192, 0 ],
[ 66, 189, 0 ],
[ 69, 186, 0 ],
[ 72, 183, 0 ],
[ 75, 180, 0 ],
[ 78, 177, 0 ],
[ 81, 174, 0 ],
[ 84, 171, 0 ],
[ 87, 168, 0 ],
[ 90, 165, 0 ],
[ 93, 162, 0 ],
[ 96, 159, 0 ],
[ 99, 156, 0 ],
[ 102, 153, 0 ],
[ 105, 150, 0 ],
[ 108, 147, 0 ],
[ 111, 144, 0 ],
[ 114, 141, 0 ],
[ 117, 138, 0 ],
[ 120, 135, 0 ],
[ 123, 132, 0 ],
[ 126, 129, 0 ],
[ 129, 126, 0 ],
[ 132, 123, 0 ],
[ 135, 120, 0 ],
[ 138, 117, 0 ],
[ 141, 114, 0 ],
[ 144, 111, 0 ],
[ 147, 108, 0 ],
[ 150, 105, 0 ],
[ 153, 102, 0 ],
[ 156, 99, 0 ],
[ 159, 96, 0 ],
[ 162, 93, 0 ],
[ 165, 90, 0 ],
[ 168, 87, 0 ],
[ 171, 84, 0 ],
[ 174, 81, 0 ],
[ 177, 78, 0 ],
[ 180, 75, 0 ],
[ 183, 72, 0 ],
[ 186, 69, 0 ],
[ 189, 66, 0 ],
[ 192, 63, 0 ],
[ 195, 60, 0 ],
[ 198, 57, 0 ],
[ 201, 54, 0 ],
[ 204, 51, 0 ],
[ 207, 48, 0 ],
[ 210, 45, 0 ],
[ 213, 42, 0 ],
[ 216, 39, 0 ],
[ 219, 36, 0 ],
[ 222, 33, 0 ],
[ 225, 30, 0 ],
[ 228, 27, 0 ],
[ 231, 24, 0 ],
[ 234, 21, 0 ],
[ 237, 18, 0 ],
[ 240, 15, 0 ],
[ 243, 12, 0 ],
[ 246, 9, 0 ],
[ 249, 6, 0 ],
[ 252, 3, 0 ],
[ 255, 0, 0 ]]
class SocketClientObject(object):
def __init__(self, socket, address ):
self.socket = socket
self.address = address
class ClientThread(threading.Thread):
def __init__(self, client_object):
threading.Thread.__init__(self)
self.client_object = client_object
def run(self):
global running
while running == True:
img = np.zeros((800,800,3),np.uint8)
data = self.client_object.socket.recv(1024)
data = data.decode("utf-8")
data = data.replace("\n", "")
try:
src = (data.split('[')[1]).split(']')[0]
items = src.split(", ")
target = json.loads(items[0])
x = int(float(target["x"]) * 400) + 400
y = int(-float(target["y"]) * 400) + 400
energy = int(float(target["E"]) * 255)
if (energy > 80):
cv2.circle(img, (x, y), 30, (spectrum_rgb3_lut[255- energy][0], spectrum_rgb3_lut[255- energy][1], spectrum_rgb3_lut[255- energy][2]), -1)
cv2.imshow('pu', img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except:
print "problem1"
cv2.destroyAllWindows()
self.client_object.socket.close()
class VideoThread(threading.Thread):
def __init__(self,dest_object):
threading.Thread.__init__(self)
self.dest_object=dest_object
def run(self):
global running
while running == True:
#img = np.zeros((800,800,3),np.uint8)
data = self.dest_object.socket.recv(1024)
data = data.decode("utf-8")
data = data.replace("\n", "")
try:
src = (data.split('[')[1]).split(']')[0]
items = src.split(", ")
for item in items:
target = json.loads(item)
x = int(float(target["x"]) * 400) + 400
y = int(-float(target["y"]) * 400) + 400
activity = int(float(target["activity"]) * 255)
#if (activity > 100):
# cv2.circle(img, (x, y), 30, (0,255,0), -1)
#cv2.imshow('pu2', img)
#if cv2.waitKey(1) & 0xFF == ord('q'):
#break
except:
print "problem2"
#cv2.destroyAllWindows()
self.dest_object.socket.close()
def main():
global running
running = True
try:
sock1 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock1.bind(SOCK_ADDR)
sock1.listen(5)
sock2 = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock2.bind(SOCK_ADDR2)
sock2.listen(2)
while running:
(clientsocket, address) = sock1.accept()
print " Accept client: ", address
ct = ClientThread(SocketClientObject(clientsocket, address))
ct.start()
(dst,dst_addr) = sock2.accept()
print "Destination Connected by", dst_addr
vt = VideoThread(SocketClientObject(dst,dst_addr))
vt.start()
except:
print "#! EXC: ", sys.exc_info()
sock1.close()
sock2.close()
print "THE END! Goodbye!"
if __name__ == "__main__":
main()
Also, I attach the shao.cfg file I use:
# Configuration file for ReSpeaker USB 4 Mic Array (ReSpeaker USB Mic Array v2.0)
version = "2.1";
# Raw
raw:
{
fS = 16000;
hopSize = 128;
nBits = 16;
nChannels = 6;
# Input with raw signal from microphones
interface: {
type = "soundcard";
card = 2;
device = 0;
}
}
# Mapping
mapping:
{
map: (2, 3, 4, 5);
}
# General
general:
{
epsilon = 1E-20;
size:
{
hopSize = 128;
frameSize = 256;
};
samplerate:
{
mu = 16000;
sigma2 = 0.01;
};
speedofsound:
{
mu = 343.0;
sigma2 = 25.0;
};
mics = (
# Microphone 2
{
mu = ( -0.032, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +0.000, +1.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 3
{
mu = ( +0.000, -0.032, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +0.000, +1.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 4
{
mu = ( +0.032, +0.000, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +0.000, +1.000 );
angle = ( 80.0, 100.0 );
},
# Microphone 5
{
mu = ( +0.000, +0.032, +0.000 );
sigma2 = ( +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000, +0.000 );
direction = ( +0.000, +0.000, +1.000 );
angle = ( 80.0, 100.0 );
}
);
# Spatial filter to include only a range of direction if required
# (may be useful to remove false detections from the floor)
spatialfilters = (
{
direction = ( +0.000, +0.000, +1.000 );
angle = (80.0, 100.0);
}
);
nThetas = 181;
gainMin = 0.25;
};
# Stationnary noise estimation
sne:
{
b = 3;
alphaS = 0.1;
L = 150;
delta = 3.0;
alphaD = 0.1;
}
# Sound Source Localization
ssl:
{
nPots = 4;
nMatches = 10;
probMin = 0.5;
nRefinedLevels = 1;
interpRate = 4;
# Number of scans: level is the resolution of the sphere
# and delta is the size of the maximum sliding window
# (delta = -1 means the size is automatically computed)
scans = (
{ level = 2; delta = -1; },
{ level = 4; delta = -1; }
);
# Output to export potential sources
potential: {
# format = "undefined";
format = "json";
interface: {
#type = "blackhole";
type = "socket"; ip = "127.0.0.1"; port = 9000;
#type = "terminal";
};
};
};
# Sound Source Tracking
sst:
{
# Mode is either "kalman" or "particle"
mode = "kalman";
# Add is either "static" or "dynamic"
add = "dynamic";
# Parameters used by both the Kalman and particle filter
active = (
{ weight = 1.0; mu = 0.4; sigma2 = 0.0025 }
);
inactive = (
{ weight = 1.0; mu = 0.25; sigma2 = 0.0025 }
);
sigmaR2_prob = 0.0025;
sigmaR2_active = 0.0225;
sigmaR2_target = 0.0025;
Pfalse = 0.1;
Pnew = 0.1;
Ptrack = 0.8;
theta_new = 0.9;
N_prob = 5;
theta_prob = 0.8;
N_inactive = ( 250, 250, 250, 250 );
theta_inactive = 0.9;
# Parameters used by the Kalman filter only
kalman: {
sigmaQ = 0.001;
};
# Parameters used by the particle filter only
particle: {
nParticles = 1000;
st_alpha = 2.0;
st_beta = 0.04;
st_ratio = 0.5;
ve_alpha = 0.05;
ve_beta = 0.2;
ve_ratio = 0.3;
ac_alpha = 0.5;
ac_beta = 0.2;
ac_ratio = 0.2;
Nmin = 0.7;
};
target: ();
# Output to export tracked sources
tracked: {
format = "json";
interface: {
#type = "file"; path = "tracks.txt";
type = "socket"; ip = "127.0.0.1"; port = 9001;
#type = "terminal";
};
};
}
sss:
{
# Mode is either "dds", "dgss" or "dmvdr"
mode_sep = "dds";
mode_pf = "ms";
gain_sep = 1.0;
gain_pf = 10.0;
dds: {
};
dgss: {
mu = 0.01;
lambda = 0.5;
};
dmvdr: {
};
ms: {
alphaPmin = 0.07;
eta = 0.5;
alphaZ = 0.8;
thetaWin = 0.3;
alphaWin = 0.3;
maxAbsenceProb = 0.9;
Gmin = 0.01;
winSizeLocal = 3;
winSizeGlobal = 23;
winSizeFrame = 256;
};
ss: {
Gmin = 0.01;
Gmid = 0.9;
Gslope = 10.0;
};
separated: {
fS = 16000;
hopSize = 128;
nBits = 16;
interface: {
type = "file";
path = "separated.raw";
};
};
postfiltered: {
fS = 16000;
hopSize = 128;
nBits = 16;
gain = 10.0;
interface: {
type = "file";
path = "postfiltered.raw";
};
};
};
classify:
{
frameSize = 4096;
winSize = 3;
tauMin = 88;
tauMax = 551;
deltaTauMax = 20;
alpha = 0.3;
gamma = 0.05;
phiMin = 0.5;
r0 = 0.2;
category: {
format = "undefined";
interface: {
type = "blackhole";
}
}
}