仿照FFmpeg在GLSL中处理HDR.ToneMapping（下）

FFmpeg 命令行 HDR 转 SDR

ffmpeg -i planet_hdr.MP4 -vf zscale=t=linear:npl=100,format=gbrpf32le,zscale=p=bt709,tonemap=tonemap=hable:desat=0,zscale=t=bt709:m=bt709:r=tv,format=yuv420p planet_ff_hdr2sdr.MP4

ffmpeg -i planet_hdr.MP4 -vf zscale=t=linear:npl=100,format=gbrpf32le,zscale=p=bt709,tonemap=tonemap=hable:desat=0,zscale=t=bt709:m=bt709:r=tv,format=yuv420p planet_ff_hdr2sdr.MP4

一行行来拆解命令行内容，最主要的是 -vf video filter的一大串命令。

1、zscale=t=linear:npl=100

=> 指定zscale模块的转换函数linear，输入参数npl=100

format=gbrpf32le

=> 转换格式gbr浮点32 little end

zscale=p=bt709

=> 指定zscale模块的设置色域bt709

2、tonemap=tonemap=hable:desat=0

=> 指定tonemapping转换算法hable，输入参数desat=0

3、zscale=t=bt709:m=bt709:r=tv

=> 指定zscale模块的转换函数bt709，range=tv. limited

format=yuv420p

=> 转换格式yuv420p

zscale转换模块是ffmpeg内部引用第三方库zimg，官方介绍地址：FFmpeg Filters Documentation想利用zscale，必须确认ffmpeg编译是--enable-libzimg，zimg库源码：GitHub - sekrit-twc/zimg: Scaling, colorspace conversion, and dithering library

命令行是一个串联执行流程，顺序不能乱。接下来我们就从ffmpeg和zscale模块的源码当中寻求解决方案。

第一步，颜色数字信号经过EOTF转换为线性的模拟光信号

我们可以从zimg代码里面的src / zimg / colorspace / operation.cpp源文件中找到关键函数

std::unique_ptr<Operation> create_gamma_to_linear_operation(const ColorspaceDefinition &in, const ColorspaceDefinition &out, const OperationParams &params, CPUClass cpu)
{
	zassert_d(in.primaries == out.primaries, "primaries mismatch");
	zassert_d((in.matrix == MatrixCoefficients::RGB || in.matrix == MatrixCoefficients::REC_2100_LMS) &&
	          (out.matrix == MatrixCoefficients::RGB || out.matrix == MatrixCoefficients::REC_2100_LMS), "must be RGB or LMS");
	zassert_d(in.transfer != TransferCharacteristics::LINEAR && out.transfer == TransferCharacteristics::LINEAR, "wrong transfer characteristics");

	if (in.transfer == TransferCharacteristics::ARIB_B67 && use_display_referred_b67(in.primaries, params))
		return create_inverse_arib_b67_operation(ncl_rgb_to_yuv_matrix_from_primaries(in.primaries), params);
	else
		return create_inverse_gamma_operation(select_transfer_function(in.transfer, params.peak_luminance, params.scene_referred), params, cpu);
}


std::unique_ptr<Operation> create_inverse_gamma_operation(const TransferFunction &transfer, const OperationParams &params, CPUClass cpu)
{
	std::unique_ptr<Operation> ret;

#if defined(ZIMG_X86)
	ret = create_inverse_gamma_operation_x86(transfer, params, cpu);
#elif defined(ZIMG_ARM)
	ret = create_inverse_gamma_operation_arm(transfer, params, cpu);
#endif
	if (!ret)
		ret = std::make_unique<GammaOperationC>(transfer.to_linear, 1.0f, transfer.to_linear_scale);

	return ret;
}

因为现在是按照PQ感知量化曲线的标准做转换，所以in.transfer不等于ARIB_B67(这是HLG混合对数转换标准)，走else逻辑。暂时不考虑平台加速，参照纯C实现 GammaOperationC。配置GammaOperationC之前经过select_transfer_function组合传输函数，继续往下看。

// src / zimg / colorspace / gamma.cpp
TransferFunction select_transfer_function(TransferCharacteristics transfer, double peak_luminance, bool scene_referred)
{
	zassert_d(!std::isnan(peak_luminance), "nan detected");

	TransferFunction func{};

	func.to_linear_scale = 1.0f;
	func.to_gamma_scale = 1.0f;

	switch (transfer) {
	// ... 
    case TransferCharacteristics::REC_709:
		func.to_linear = scene_referred ? rec_709_inverse_oetf : rec_1886_eotf;
		func.to_gamma = scene_referred ? rec_709_oetf : rec_1886_inverse_eotf;
		break;
	case TransferCharacteristics::ST_2084:
		func.to_linear = scene_referred ? st_2084_inverse_oetf : st_2084_eotf;
		func.to_gamma = scene_referred ? st_2084_oetf : st_2084_inverse_eotf;
		func.to_linear_scale = static_cast<float>(ST2084_PEAK_LUMINANCE / peak_luminance);
		func.to_gamma_scale = static_cast<float>(peak_luminance / ST2084_PEAK_LUMINANCE);
		break;
	case TransferCharacteristics::ARIB_B67:
		func.to_linear = scene_referred ? arib_b67_inverse_oetf : arib_b67_eotf;
		func.to_gamma = scene_referred ? arib_b67_oetf : arib_b67_inverse_eotf;
		func.to_linear_scale = scene_referred ? 12.0f : static_cast<float>(1000.0 / peak_luminance);
		func.to_gamma_scale = scene_referred ? 1.0f / 12.0f : static_cast<float>(peak_luminance / 1000.0);
		break;
	default:
		error::throw_<error::InternalError>("invalid transfer characteristics");
		break;
	}

	return func;
}

// src / zimg / colorspace / operation_impl.cpp
class GammaOperationC final : public Operation {
	gamma_func m_func;
	float m_prescale;
	float m_postscale;
public:
	GammaOperationC(gamma_func func, float prescale, float postscale) :
		m_func{ func },
		m_prescale{ prescale },
		m_postscale{ postscale }
	{}

	void process(const float * const *src, float * const *dst, unsigned left, unsigned right) const override
	{
		EnsureSinglePrecision x87;

		for (unsigned p = 0; p < 3; ++p) {
			const float *src_p = src[p];
			float *dst_p = dst[p];

			for (unsigned i = left; i < right; ++i) {
				dst_p[i] = m_postscale * m_func(src_p[i] * m_prescale);
			}
		}
	}
};

提醒一句，要注意组装GammaOperationC的两个参数prescale / postscale的入参是什么。

PQ感知量化曲线的标准是SMPTE ST 2084，非线性光情况，所以传输函数是用st_2084_eotf。

constexpr float ST2084_M1 = 0.1593017578125f;
constexpr float ST2084_M2 = 78.84375f;
constexpr float ST2084_C1 = 0.8359375f;
constexpr float ST2084_C2 = 18.8515625f;
constexpr float ST2084_C3 = 18.6875f;		
constexpr float FLT_MIN 1.17549435082228750797e-38F

float st_2084_eotf(float x) noexcept
{
	// Filter negative values to avoid NAN.
	if (x > 0.0f) {
		float xpow = zimg_x_powf(x, 1.0f / ST2084_M2);
		float num = std::max(xpow - ST2084_C1, 0.0f);
		float den = std::max(ST2084_C2 - ST2084_C3 * xpow, FLT_MIN);
		x = zimg_x_powf(num / den, 1.0f / ST2084_M1);
	} else {
		x = 0.0f;
	}

	return x;
}

关键代码都已经帖出来了，总结一下流程：

create_gamma_to_linear_operation
    -> create_inverse_gamma_operation
        -> select_transfer_function
            case TransferCharacteristics::ST_2084:
                func.to_linear = scene_referred ? st_2084_inverse_oetf : st_2084_eotf;
                func.to_linear_scale = static_cast<float>(ST2084_PEAK_LUMINANCE / peak_luminance);
                break;

第二步，HDR的线性模拟光信号 ToneMapping转换到 SDR的线性模拟光信号

此步骤就是对应的代码是在ffmpeg的video filter视频滤镜处理当中，具体文件路径是ffmpeg \ libavfilter \ vf_tonemap.c，静态方法tonemap，具体代码如下：


#define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
                    const AVPixFmtDescriptor *desc, int x, int y, double peak)
{
    int map[3] = { desc->comp[0].plane, desc->comp[1].plane, desc->comp[2].plane };
    const float *r_in = (const float *)(in->data[map[0]] + x * desc->comp[map[0]].step + y * in->linesize[map[0]]);
    const float *g_in = (const float *)(in->data[map[1]] + x * desc->comp[map[1]].step + y * in->linesize[map[1]]);
    const float *b_in = (const float *)(in->data[map[2]] + x * desc->comp[map[2]].step + y * in->linesize[map[2]]);
    float *r_out = (float *)(out->data[map[0]] + x * desc->comp[map[0]].step + y * out->linesize[map[0]]);
    float *g_out = (float *)(out->data[map[1]] + x * desc->comp[map[1]].step + y * out->linesize[map[1]]);
    float *b_out = (float *)(out->data[map[2]] + x * desc->comp[map[2]].step + y * out->linesize[map[2]]);
    float sig, sig_orig;

    /* load values */
    *r_out = *r_in;
    *g_out = *g_in;
    *b_out = *b_in;

    /* desaturate to prevent unnatural colors */
    if (s->desat > 0) {
        float luma = s->coeffs->cr * *r_in + s->coeffs->cg * *g_in + s->coeffs->cb * *b_in;
        float overbright = FFMAX(luma - s->desat, 1e-6) / FFMAX(luma, 1e-6);
        *r_out = MIX(*r_in, luma, overbright);
        *g_out = MIX(*g_in, luma, overbright);
        *b_out = MIX(*b_in, luma, overbright);
    }

    /* pick the brightest component, reducing the value range as necessary
     * to keep the entire signal in range and preventing discoloration due to
     * out-of-bounds clipping */
    sig = FFMAX(FFMAX3(*r_out, *g_out, *b_out), 1e-6);
    sig_orig = sig;

    switch(s->tonemap) {
    default:
    case TONEMAP_NONE:
        // do nothing
        break;
    case TONEMAP_LINEAR:
        sig = sig * s->param / peak;
        break;
    case TONEMAP_GAMMA:
        sig = sig > 0.05f ? pow(sig / peak, 1.0f / s->param)
                          : sig * pow(0.05f / peak, 1.0f / s->param) / 0.05f;
        break;
    case TONEMAP_CLIP:
        sig = av_clipf(sig * s->param, 0, 1.0f);
        break;
    case TONEMAP_HABLE:
        sig = hable(sig) / hable(peak);
        break;
    case TONEMAP_REINHARD:
        sig = sig / (sig + s->param) * (peak + s->param) / peak;
        break;
    case TONEMAP_MOBIUS:
        sig = mobius(sig, s->param, peak);
        break;
    }

    /* apply the computed scale factor to the color,
     * linearly to prevent discoloration */
    *r_out *= sig / sig_orig;
    *g_out *= sig / sig_orig;
    *b_out *= sig / sig_orig;
}

逻辑不难理解，注释也很健全，命令行入参参数desat=0。直接看看hable和mobius两个tonemap算法的实现

static float hable(float in)
{
    float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
    return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f;
}

static float mobius(float in, float j, double peak)
{
    float a, b;

    if (in <= j)
        return in;

    a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
    b = (j * j - 2.0f * j * peak + peak) / FFMAX(peak - 1.0f, 1e-6);

    return (b * b + 2.0f * b * j + j * j) / (b - a) * (in + a) / (in + b);
}

简单看着不太懂其数学含义，tonemap其实就是一个编码压缩曲线，可以简单的理解为把[0，1024]的空间范围如何较好的压缩映射到[0，255]的空间范围。ToneMapping映射算法不单只是hable和mobius，还有很多其他类型，可以看看这篇文章的结束 Tone mapping进化论 - 知乎

第三步，线性的模拟光信号经过OETF转换为数字颜色电信号

对应命令行，zscale=t=bt709:m=bt709:r=tv，指定zscale模块的转换函数bt709，转换矩阵也是bt.709，yuv range为tv. limited。这里提醒一下，经过ToneMapping处理后的线性模拟光信号，已经是SDR范围的数据，所以不要再使用smpte st 2084的HDR标准，指定使用bt.709，线性光场景的光电转换函数rec_709_oetf

代码流程和第一步差不多，这里不在贴出具体代码，总结一下：

create_linear_to_gamma_operation
    -> create_gamma_operation
        -> select_transfer_function
            case TransferCharacteristics::REC_709:
		        func.to_gamma = scene_referred ? rec_709_oetf : rec_1886_inverse_eotf;
		        break;

constexpr float REC709_ALPHA = 1.09929682680944f;
constexpr float REC709_BETA = 0.018053968510807f;
float rec_709_oetf(float x) noexcept
{
	x = std::max(x, 0.0f);

	if (x < REC709_BETA)
		x = x * 4.5f;
	else
		x = REC709_ALPHA * zimg_x_powf(x, 0.45f) - (REC709_ALPHA - 1.0f);

	return x;
}

GLSL实现HDR.ToneMapping

逻辑流程上面已经分析的很清楚了，废话不多说，直接上GLSL. fragment shader的代码。

#version 320 es
precision highp float;
uniform int bitMark; // 双字节(16bit)存储10位数据的 位掩码个数
uniform lowp float imgWidth;
uniform lowp float imgHeight;
uniform highp usampler2D tex_unsigned_y; // GL_R16UI、GL_RED_INTEGER、GL_UNSIGNED_SHORT
uniform highp usampler2D tex_unsigned_uv;// GL_RG16UI、GL_RG_INTEGER、GL_UNSIGNED_SHORT
in  vec2 vTextureCoord;
out vec4 _FragColor;
 
highp vec3 YuvConvertRGB_BT2020(highp uvec3 yuv, int normalize) {
    highp vec3 rgb;
    highp int y = highp int(yuv.x);
    highp int u = highp int(yuv.y);
    highp int v = highp int(yuv.z);
	// [64, 960]
    float r = float(y - 64) * 1.164384                             - float(v - 512) * -1.67867;
    float g = float(y - 64) * 1.164384 - float(u - 512) * 0.187326 - float(v - 512) * 0.65042;
    float b = float(y - 64) * 1.164384 - float(u - 512) * -2.14177;
    rgb.r = r;
    rgb.g = g;
    rgb.b = b;
    if (normalize == 1) { 
        rgb.r = r / 1024.0; 
        rgb.g = g / 1024.0; 
        rgb.b = b / 1024.0; 
    }// 归一化处理，10位数据共有1024个色阶
    return rgb;
}

highp float ST2084_M1 = 0.1593017578125f;
const float ST2084_M2 = 78.84375f;
const float ST2084_C1 = 0.8359375f;
const float ST2084_C2 = 18.8515625f;
const float ST2084_C3 = 18.6875f;
highp float FLT_MIN = 1.17549435082228750797e-38F;
highp float st_2084_eotf(highp float x)
{
    highp float xpow = pow(x, float(1.0 / ST2084_M2));
    highp float num = max(xpow - ST2084_C1, 0.0);
    highp float den = max(ST2084_C2 - ST2084_C3 * xpow, FLT_MIN);
    return pow(num/den, 1.0 / ST2084_M1);
}

const float REC709_ALPHA = 1.09929682680944f;
const float REC709_BETA = 0.018053968510807f;
highp float rec_709_oetf(highp float x)
{
    x = max(x, 0.0);
    if (x < REC709_BETA )
        x = x * 4.5;
    else
        x = REC709_ALPHA * pow(x, 0.45f) - (REC709_ALPHA - 1.0);
    return x;
}

#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
#define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
void main() {
    float samplerPosX = vTextureCoord.x * imgWidth;
    float samplerPosY = vTextureCoord.y * imgHeight;
    highp uint unsigned_y = texelFetch(tex_unsigned_y, ivec2(int(samplerPosX), int(samplerPosY)), 0).x;
    highp uint unsigned_u = texelFetch(tex_unsigned_uv, ivec2(int(samplerPosX / 2.0), int(samplerPosY / 2.0)), 0).r;
    highp uint unsigned_v = texelFetch(tex_unsigned_uv, ivec2(int(samplerPosX / 2.0), int(samplerPosY / 2.0)), 0).g;
    highp uvec3 yuv10bit = uvec3(unsigned_y >> bitMark, unsigned_u >> bitMark, unsigned_v >> bitMark);
    highp vec3 rgb10bit = YuvConvertRGB_BT2020(yuv10bit, 1);

    // 第一步、电 转线性光信号
	float ST2084_PEAK_LUMINANCE = 10000.0f;
	float peak_luminance = 800.0f
	// 参考zscale源代码参数ST2084_PEAK_LUMINANCE固定1w，peak_luminance视频元数据-标峰亮度值，静态元数据的全视频用一个peak，动态元数据的每一帧一个peak
	// 参考zscale源代码to_linear的GammaOperationC，to_linear_scale赋值是postscale，后处理缩放。
    float to_linear_scale = ST2084_PEAK_LUMINANCE / peak_luminance;
    highp vec3 fragColor = to_linear_scale * vec3(st_2084_eotf(rgb10bit.r), st_2084_eotf(rgb10bit.g), st_2084_eotf(rgb10bit.b));
	
    // 第二步、HDR线性 ToneMapping映射转成 SDR线性
	// 参考ffmpeg的tonemap函数，hable算法。
    highp float sig;
    highp float sig_orig;
    sig = FFMAX(FFMAX3(fragColor.r, fragColor.g, fragColor.b), 1e-6);
    sig_orig = sig;
    float peak = 540.0f / 100.0f; // 手机设备的最大亮度值MaxCLL / REFERENCE_WHITE(固定100);
    sig = hableF(sig) / hableF(peak);
    fragColor.r = fragColor.r * (sig / sig_orig);
    fragColor.g = fragColor.g * (sig / sig_orig);
    fragColor.b = fragColor.b * (sig / sig_orig);
	
    // 第三步、逆线性光信号，变回电
	// 参考zscale源代码，rec_709的to_gamma，没有prescale和postscale.
    fragColor = vec3(rec_709_oetf(fragColor.r), rec_709_oetf(fragColor.g), rec_709_oetf(fragColor.b));
    _FragColor = vec4(fragColor + vec3(0.0)/*额外曝光调试用*/, 1.0);
}

所有代码参考来源自FFmpeg和Zimg。流程知识源自 HDR in Android专栏

再次强调，本次HDR.ToneMapping是根据PQ感知量化曲线的思路方法，需要配合HDR元数据peak_luminance-标峰亮度值等参数食用，动态元数据效果更佳。接下来会研究探讨HLG混合对数曲线的映射方式，此方式是不需要元数据的，比较方便容易扩展。