Python使用freetype渲染显示阿拉伯语

目录
一、使用场景
二、语言背景
三、环境搭建
四、程序结构
五、代码

一、使用场景

公司一直以点阵屏显示为业务。最近希望替换原有的点阵字库，转用FreeType渲染矢量字形，且需要支持阿拉伯语。验证可行性阶段因为python的各种库用起来相当舒服，所以就先用Python进行验证，通过后再转C++实现。

二、语言背景

阿拉伯语与常规语言不同，它属于复杂文本语言。它有以下3个特点：
1.阅读顺序从右往左
2.字符在词前中后有不同写法
3.带有修饰符号
这里非常感谢建国雄心大哥的文章。推荐大家对于阿拉伯文如果不理解可以去他的博客看看。
http://blog.sina.com.cn/s/articlelist_1569506881_0_1.html

因为阿拉伯文有以上特点，所以不能单纯的一个字符一个字符的读取并渲染，在对字符串渲染之前需要经过一次特殊处理，转成正确的Unicode码串后再使用FreeTyoe对转换后的Unicode码串进行渲染。

三、环境搭建

这里需要用到3个Python库
1.numpy
引用原因：方便对点阵数组的操作, 而且Freetype好像也用到了
安装命令：pip install numpy
2.freetype-py
引用原因：矢量字体渲染模块
安装命令：pip install freetype-py
3.matplotlib
引用原因：显示渲染结果，如果需要在别的地方显示，可以不需要
安装命令：pip install matplotlib

四、程序结构

首先，对多语言的渲染大致分为两个模块，解析模块和渲染模块。解析模块用于处理原始Unicode码；渲染模块根据Unicode码取字模并合成点阵数组。
流程大致如下：原始字符串 -> 解析模块 -> 处理后Unicode码串 -> 渲染模块 -> 点阵数组 -> 显示。
这里的解析模块本人是自己写，但是推荐使用HarfBuzz。

这里放个用matplotlib显示的效果
在这里插入图片描述

五、代码

解析模块

class ArabicText(object):
    # first, last, middle, alone   
    __arabic_Positions=[[ 0xfe80, 0xfe80, 0xfe80, 0xfe80],         #0x621
        [ 0xfe82, 0xfe81, 0xfe82, 0xfe81], 
        [ 0xfe84, 0xfe83, 0xfe84, 0xfe83],
        [ 0xfe86, 0xfe85, 0xfe86, 0xfe85],
        [ 0xfe88, 0xfe87, 0xfe88, 0xfe87],
        [ 0xfe8a, 0xfe8b, 0xfe8c, 0xfe89],    
        [ 0xfe8e, 0xfe8d, 0xfe8e, 0xfe8d],    
        [ 0xfe90, 0xfe91, 0xfe92, 0xfe8f],    
        [ 0xfe94, 0xfe93, 0xfe94, 0xfe93],    
        [ 0xfe96, 0xfe97, 0xfe98, 0xfe95],    
        [ 0xfe9a, 0xfe9b, 0xfe9c, 0xfe99],    
        [ 0xfe9e, 0xfe9f, 0xfea0, 0xfe9d],    
        [ 0xfea2, 0xfea3, 0xfea4, 0xfea1],    
        [ 0xfea6, 0xfea7, 0xfea8, 0xfea5],    
        [ 0xfeaa, 0xfea9, 0xfeaa, 0xfea9],    
        [ 0xfeac, 0xfeab, 0xfeac, 0xfeab],    
        [ 0xfeae, 0xfead, 0xfeae, 0xfead],    
        [ 0xfeb0, 0xfeaf, 0xfeb0, 0xfeaf],    
        [ 0xfeb2, 0xfeb3, 0xfeb4, 0xfeb1],    
        [ 0xfeb6, 0xfeb7, 0xfeb8, 0xfeb5],    
        [ 0xfeba, 0xfebb, 0xfebc, 0xfeb9],    
        [ 0xfebe, 0xfebf, 0xfec0, 0xfebd],    
        [ 0xfec2, 0xfec3, 0xfec4, 0xfec1],    
        [ 0xfec6, 0xfec7, 0xfec8, 0xfec5],    
        [ 0xfeca, 0xfecb, 0xfecc, 0xfec9],    
        [ 0xfece, 0xfecf, 0xfed0, 0xfecd],    
        [ 0x63b, 0x63b, 0x63b, 0x63b],    
        [ 0x63c, 0x63c, 0x63c, 0x63c],    
        [ 0x63d, 0x63d, 0x63d, 0x63d],    
        [ 0x63e, 0x63e, 0x63e, 0x63e],    
        [ 0x63f, 0x63f, 0x63f, 0x63f],    
        [ 0x640, 0x640, 0x640, 0x640],    
        [ 0xfed2, 0xfed3, 0xfed4, 0xfed1],    
        [ 0xfed6, 0xfed7, 0xfed8, 0xfed5],    
        [ 0xfeda, 0xfedb, 0xfedc, 0xfed9],    
        [ 0xfede, 0xfedf, 0xfee0, 0xfedd],    
        [ 0xfee2, 0xfee3, 0xfee4, 0xfee1],    
        [ 0xfee6, 0xfee7, 0xfee8, 0xfee5],    
        [ 0xfeea, 0xfeeb, 0xfeec, 0xfee9],    
        [ 0xfeee, 0xfeed, 0xfeee, 0xfeed],    
        [ 0xfef0, 0xfef3, 0xfef4, 0xfeef],    
        [0xfef2, 0xfef3, 0xfef4, 0xfef1]]
        
    __preSet = [0x62c, 0x62d, 0x62e, 0x647, 0x639, 0x63a, 0x641, 0x642,
            0x62b, 0x635, 0x636, 0x637, 0x643, 0x645, 0x646, 0x62a,        
            0x644, 0x628, 0x64a, 0x633, 0x634, 0x638, 0x626, 0x640] 
                   
    __nextSet = [0x62c, 0x62d, 0x62e, 0x647, 0x639, 0x63a, 0x641, 0x642,
            0x62b, 0x635, 0x636, 0x637, 0x643, 0x645, 0x646, 0x62a,        
            0x644, 0x628, 0x64a, 0x633, 0x634, 0x638, 0x626,        
            0x627, 0x623, 0x625, 0x622, 0x62f, 0x630, 0x631, 0x632,        
            0x648, 0x624, 0x629, 0x649, 0x640]
    __replaceSet = [[0xFEF5,0xFEF6],[0xFEF7,0xFEF8],[0xFEF9,0xFEFA],[0xFEFB,0xFEFC]]        
    # 将传入的字符串转换为显示时的数组，显示时用FreeType直接取数组中的每一个值进行排版显示即可    
    # 返回前已经将阿拉伯文倒置(阿拉伯文从右往左书写)    
    @staticmethod    
    def Translate(text):
        retArr = []        
        textLen = len(text)
        lastIdx = -3    # 上一个非阿拉伯字符所在下标        
        begIdxs = []    # 非阿拉伯字符串开始下标集合        
        endIdxs = []    # 非阿拉伯字符串结束下标集合
            
        for i in range(0,textLen):            
            charCode = ord(text[i])            
            # 非阿拉伯语字符直接添加            
            if charCode not in range(0x621,0x6ff):                
                retArr.append(charCode)                
                arrLen = len(retArr)                
                # 不连续                
                if arrLen - 1 != lastIdx + 1:                    
                    begIdxs.append(arrLen - 1)                
                # 最后一个字符是非阿拉伯字符                
                if i == textLen - 1:                    
                    endIdxs.append(arrLen - 1)                
                lastIdx = arrLen - 1                
                continue            
            else:                
                arrLen = len(retArr)                
                # 当前阿拉伯字符的前一个字符是非阿拉伯字符                
                if lastIdx == arrLen-2:                    
                    endIdxs.append(lastIdx)
                    
            #----rule 1----            
            # 前一个字符的Unicode码            
            preCh = (0 if (i==0) else ord(text[i-1]))    # preCh = i==0 ? 0 : (int)text[i-1]            
            # 当前字符的Unicode码            
            ch = charCode                               # ch = (int)text[i]            
            # 后一个字符的Unicode码            
            nextCh = (0 if(i==(textLen-1)) else ord(text[i+1])) # nextCh = i == (textLen-1) ? 0 : (int)text[i+1]
            val = ArabicText.__GetTransform(preCh,ch,nextCh)            
            retArr.append(val)            
            #----rule 2----            
            replace = ArabicText.__GetContinuousWriting(preCh,ch,nextCh)            
            if replace > 0:                
                retArr.append(replace)                
                i = i + 2
        # 阿拉伯文从右往左显示，所以要把结果反过来        
        retArr.reverse()        
        ArabicText.__NonArabicReverse(retArr,begIdxs,endIdxs)        
        return retArr        
    # 处理非阿拉伯字符，非阿拉伯字符不用反转，这里把他们再反回来    
    @classmethod    
    def __NonArabicReverse(cls,charArr=[],begIdxs=[],endIdxs=[]):        
        lastIdx = len(charArr) - 1   # 最后一个下标        
        loopCnt = len(begIdxs)        
        for i in range(0,loopCnt):            
            beg = (lastIdx - endIdxs[i])            
            end = (lastIdx - begIdxs[i])            
            switchTimes = int((end + 1 - beg)/2)            
            for j in range(0,switchTimes):                
                temp = charArr[beg+j]                
                charArr[beg+j] = charArr[end-j]                
                charArr[end-j] = temp
                
    # 处理连写字符 某些情况下需要将后续两个字符替换成其他字符    
    @classmethod    
    def __GetContinuousWriting(cls,preCh=0,ch=0,nextCh=0):        
        retVal = 0        
        nextChArr = [0x622,0x623,0x625,0x627]        
        positionIdx = -1        
        charIdx = 0        
        if (ch == 0x644) and (nextCh in nextChArr):            
            charIdx = nextChArr.index(nextCh)            
            if preCh in cls.__preSet:                
                positionIdx = 1            
            else:                
                positionIdx = 0            
            retVal = cls.__replaceSet[charIdx][positionIdx]                
        return retVal
        
    # 处理字符因前连写后连写的变形    
    @classmethod    
    def __GetTransform(cls,preCh=0,ch=0,nextCh=0):        
        preConnect = False        
        nextConnect = False        
        positionIdx = -1        
        charIdx = 0
        # 是前连字符        
        if preCh in cls.__preSet:            
            preConnect = True            
            positionIdx = 0
        # 是后连字符        
        if nextCh in cls.__nextSet:            
            nextConnect = True            
            positionIdx = 1
        # 既是前连又是后连，等于在中间        
        if preConnect and nextConnect:            
            positionIdx = 2        
        # 不是前连又不是后连，等于要单独显示        
        elif (preConnect == False) and (nextConnect == False):            
            positionIdx = 3
            
        charIdx = ch - 0x621        
        retVal = cls.__arabic_Positions[charIdx][positionIdx]        
        return retVal

入口和渲染模块

# -*- coding: utf-8 -*-
import freetype
import numpy
import matplotlib.pyplot as plt
import ArabicTextHelper as Arabic
def main():
    text = u'شبح ، شبح الشيوعية ، يتجول في جميع أنحاء القارة الأوروبية'    
    textArr = []    
    # 处理原始字符串，生成转换后的数组     
    textArr = Arabic.ArabicText.Translate(text=text)
    
    # 显示转换后的数组        
    FreeTypeDisplay(textArr,0x33,0xe4,0xff)    

def FreeTypeDisplay(textArr=[],R=255,G=255,B=255):    
    RGB = [('R',numpy.uint8), ('G',numpy.uint8), ('B',numpy.uint8)]    
    face = freetype.Face('Fonts/ARIALUNI.TTF')    
    face.set_char_size( 48*64 )    
    slot = face.glyph
    
    # First pass to compute bbox
    width, height, = 0, 0    
    previous = 0    
    # 计算总宽高    
    for c in textArr:        
        face.load_char(c)        
        bitmap = slot.bitmap        
        height = max(height, (face.size._FT_Size_Metrics.height >> 6))        
        kerning = face.get_kerning(previous, c)        
        width += (slot.advance.x >> 6) + (kerning.x >> 6)        
        previous = c
        
    imgBuf = numpy.zeros((height,width), dtype=numpy.ubyte)    
    colorBuf = numpy.zeros((height,width),dtype=RGB)
    
    # Second pass for actual rendering
    xBeg, yBeg = 0, 0    
    previous = 0    
    # 把每个字添加到imgBuf里    
    for c in textArr:        
        face.load_char(c)        
        # 可以理解为校正值        
        descender = (-face.size._FT_Size_Metrics.descender) >> 6        
        bitmap = slot.bitmap        
        #基线到字模顶部的距离        
        top = slot.bitmap_top        
        w = bitmap.width        
        h = bitmap.rows        
        yBeg = height - top - descender        
        # 间隔         
        kerning = face.get_kerning(previous, c)        
        xBeg += (kerning.x >> 6)        
        newChar = numpy.array(bitmap.buffer, dtype='ubyte').reshape(h,w)        
        yEnd = yBeg+h        
        xEnd = xBeg+w        
        # 添加到imgBuf中        
        imgBuf[yBeg:yEnd,xBeg:xEnd] += newChar       
        xBeg += (slot.advance.x >> 6)        
        previous = c
        
    FillColor(imgBuf,colorBuf,R,G,B)
    
    # 显示imgBuf的内容    
    plt.figure(figsize=(10, 10*imgBuf.shape[0]/float(imgBuf.shape[1])))    
    showing = colorBuf.view(dtype=numpy.uint8).reshape(colorBuf.shape[0],colorBuf.shape[1],3)    
    plt.imshow(showing, interpolation='nearest', origin='upper')    
    plt.xticks([]), plt.yticks([])    
    plt.show()
    
def FillColor(srcBuf,colorBuf,R,G,B):    
    rows = srcBuf.shape[0]    
    columns = srcBuf.shape[1]    
    for y in range(0,rows):        
        for x in range(0,columns):            
            if srcBuf[y][x] > 0:                
                colorBuf[y][x] = (R,G,B)

if __name__ == '__main__':    
    main()

参考文章
https://www.cnblogs.com/8335IT/p/8053850.html
https://blog.csdn.net/wuxinyanzi/article/details/12912533

初夏de遐想

发布了15 篇原创文章 · 获赞 2 · 访问量 4863

私信关注