1. 背景介绍
在实时视频修正领域,头发变色、修正发型是很盛行和受欢迎的场景。这种功用除了音视频相关的技术,还离不开AI能力的支持。而且这种场景自身对实时性要求高,很适合在端侧应用落地。上一篇文章咱们根据谷歌的MediaPipe项目完成了本地实时人脸检测功用,本文咱们再来一步一步跑通端侧实时染色功用。下面是作用:
2. 需求分析
上一篇中,人脸检测输入是一帧帧图片,输出是识别到的人脸数量,坐标及对应得分列表,咱们能够经过得分与设置的阈值比较判断是否有人脸,还能够根据回来的坐标,给人脸标一个方框。
实时头发染色功用输入的仍然是一帧帧图片,由于头发自身是不规则的,假如输出坐标的话很难再去制作,所以这次模型为咱们回来了一个完整的图片内容,图片上是变色的头发的内容,并且和原图片头发方位坐标坚持一直,这样咱们能够先制作原图画,再制作变色的染色头发图片。
3. 代码完成
和上一篇类似,运行模型一般咱们有以下几个步骤:
- 加载模型;
- 摄像头预览纹路转换为RGBA
- 将图画数据feed到模型引擎进行推理
- 解析烘托结果
3.1 加载模型
hair_segmentation模型加载时tflite::ops::builtin::BuiltinOpResolver
新增了三个自定义operations:
tflite::ops::builtin::BuiltinOpResolver resolver;
resolver.AddCustom("MaxPoolingWithArgmax2D",
mediapipe::tflite_operations::RegisterMaxPoolingWithArgmax2D());
resolver.AddCustom("MaxUnpooling2D",
mediapipe::tflite_operations::RegisterMaxUnpooling2D());
resolver.AddCustom("Convolution2DTransposeBias",
mediapipe::tflite_operations::RegisterConvolution2DTransposeBias());
对应完成函数:
TfLiteRegistration* RegisterMaxPoolingWithArgmax2D() {
static TfLiteRegistration reg = {
[](TfLiteContext*, const char*, size_t) -> void* {
return new TfLitePaddingValues();
},
[](TfLiteContext*, void* buffer) -> void {
delete reinterpret_cast<TfLitePaddingValues*>(buffer);
},
Prepare, Eval};
return
}
TfLiteRegistration* RegisterMaxUnpooling2D() {
static TfLiteRegistration reg = {
[](TfLiteContext*, const char*, size_t) -> void* {
return new TfLitePaddingValues();
},
[](TfLiteContext*, void* buffer) -> void {
delete reinterpret_cast<TfLitePaddingValues*>(buffer);
},
Prepare, Eval};
return
}
TfLiteRegistration* RegisterConvolution2DTransposeBias() {
static TfLiteRegistration reg = {nullptr, nullptr, Prepare, Eval};
return
}
经过InterpreterBuilder
创建履行器std::unique_ptr<tflite::Interpreter>
后,获取模型输入输出函数:
static tflite_tensor_t s_tensor_input;
static tflite_tensor_t s_tensor_segment;
tflite_get_tensor_by_name (&s_interpreter, 0, "input_1", &s_tensor_input);
tflite_get_tensor_by_name (&s_interpreter, 1, "conv2d_transpose_4", &s_tensor_segment);
tflite_tensor_t结构有ptr指针成员,输入时存放图画信息,输出时存放被烘托过的头发的图画信息。
3.2 摄像头预览纹路转换为RGBA
纹路转RGBA跟上一篇人脸检测一样,不在赘述。
3.3 将图画数据feed到模型引擎进行推理
feed数据到模型跟上一篇人脸检测一样,不在赘述。feed完后开端履行推理:
typedef struct _segmentation_result_t
{
float *segmentmap;
int segmentmap_dims[3];
} segmentation_result_t;
int invoke_segmentation (segmentation_result_t *segment_result)
{
if (interpreter->Invoke() != kTfLiteOk)
{
DBG_LOGE ("ERR: %s(%d)\n", __FILE__, __LINE__);
return -1;
}
segment_result->segmentmap = (float *)s_tensor_segment.ptr;
segment_result->segmentmap_dims[0] = s_tensor_segment.dims[2];
segment_result->segmentmap_dims[1] = s_tensor_segment.dims[1];
segment_result->segmentmap_dims[2] = s_tensor_segment.dims[3];
return 0;
}
结果首要包含被染发的图画数据。
3.4 解析烘托结果
制作时先制作原始图画纹路,然后制作模型回来的修正后的数据:
void render_segment_result (int ofstx, int ofsty, int draw_w, int draw_h,
texture_2d_t *srctex, segmentation_result_t *segment_ret)
{
float *segmap = segment_ret->segmentmap;
int segmap_w = segment_ret->segmentmap_dims[0];
int segmap_h = segment_ret->segmentmap_dims[1];
int segmap_c = segment_ret->segmentmap_dims[2];
int x, y, c;
static unsigned int *imgbuf = NULL;
float hair_color[4] = {0};
float back_color[4] = {0};
static float s_hsv_h = 0.0f;
if (imgbuf == NULL)
{
imgbuf = (unsigned int *)malloc (segmap_w * segmap_h * sizeof(unsigned int));
}
s_hsv_h += 5.0f;
if (s_hsv_h >= 360.0f)
s_hsv_h = 0.0f;
colormap_hsv (s_hsv_h / 360.0f, hair_color);
#if defined (RENDER_BY_BLEND)
float lumi = (hair_color[0] * 0.299f + hair_color[1] * 0.587f + hair_color[2] * 0.114f);
hair_color[3] = lumi;
#endif
/* find the most confident class for each pixel. */
for (y = 0; y < segmap_h; y ++)
{
for (x = 0; x < segmap_w; x ++)
{
int max_id;
float conf_max = 0;
for (c = 0; c < MAX_SEGMENT_CLASS; c ++)
{
float confidence = segmap[(y * segmap_w * segmap_c)+ (x * segmap_c) + c];
if (c == 0 || confidence > conf_max)
{
conf_max = confidence;
max_id = c;
}
}
float *col = (max_id > 0) ? hair_color : back_color;
unsigned char r = ((int)(col[0] * 255)) & 0xff;
unsigned char g = ((int)(col[1] * 255)) & 0xff;
unsigned char b = ((int)(col[2] * 255)) & 0xff;
unsigned char a = ((int)(col[3] * 255)) & 0xff;
imgbuf[y * segmap_w + x] = (a << 24) | (b << 16) | (g << 8) | (r);
}
}
GLuint texid;
glGenTextures (1, &texid );
glBindTexture (GL_TEXTURE_2D, texid);
glTexParameterf (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameterf (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameterf (GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameterf (GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glPixelStorei (GL_UNPACK_ALIGNMENT, 4);
glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA,
segmap_w, segmap_h, 0, GL_RGBA,
GL_UNSIGNED_BYTE, imgbuf);
#if !defined (RENDER_BY_BLEND)
draw_colored_hair (srctex, texid, ofstx, ofsty, draw_w, draw_h, 0, hair_color);
#else
draw_2d_texture_ex (srctex, ofstx, ofsty, draw_w, draw_h, 0);
unsigned int blend_add [] = {GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE};
draw_2d_texture_blendfunc (texid, ofstx, ofsty, draw_w, draw_h, 0, blend_add);
#endif
glDeleteTextures (1, &texid);
render_hsv_circle (ofstx + draw_w - 100, ofsty + 100, s_hsv_h);
}
4. 总结
本文介绍了AI技术的实时头发染色模型运用,首要应用于视频特效修正等场景。该模型用到了BuiltinOpResolver的AddCustom新方法。
本文正在参与 人工智能创作者扶持方案