Python实现 Edmonds-Karp算法求最大流 并用Cython优化提速

原生python运行速度很慢,只要数据量大于100,求解就变得十分困难

五组测试数据

测试用例的第一行为图的节点数和边数,第二行为最大流算法的起始节点和中止节点,剩余所有行均为有向加权边,其中前两个数字代表边的两个端点,后一个数字代表边的权重。

·测试用例1(将txt压缩为xz后使用base16384转为文本)

赍薥潨一伹筫弘倀噀忀一丐臖嫎七倀栗币瀰丢礁凹僺藘熐珃箖孨舉詹瑷俶嘴奊懫羾姨羖盾眅芽贁蒎膝悢艵戧婝榬繣哧譹侽烿犎滃幗徤俊俿圐嫌綉殾穫嫻带潶宜疹讥及煷洟側廫盃渍制谳珲澼瘀侄买縐丁誀妇礜徟褂一一丑杚

·测试用例2(将txt压缩为xz后使用base16384转为文本)

赍薥潨一伹筫弘倀噀忀一丐臖嫎乃攅泗币砰丢荲湽貅桠缐剧绚嵵囉屿篶折蟤櫆尘蓇憨总昮瘓杪廍蓌冇偔窵蚣糌安挅憅婞吸痊嘳朷梨傉劈臹癭蟲汦彆匓裰嵌捚讐續抒覈极俭愰哽楔荱訠早瀤媩搿胹烷呲悹桜秾秵嶜凊孭乳厢灰眠暵缫痑攠搨茌矓竦诸侼氛堽暨禭圌筺禐瀔垚絹愙俦蟇热究葭狆弢姏簾窽瀀媃豷耳姸臁硵脰瞉炦畦帑壄藛灻猃皁樟艑蜦暸莠稼殔冸盿篷荞晪熩詥楷晘瞀聜絟务竉嘬贔勊瞵澃婞詈戼兠睆腝岪擬秱崉貁忊種椄容胖稢珫漡圏梻豃嚓脻楉櫽儒漇篠蛉狼粚抐俎敚撑曀祻潱艌譗嚘吗凿汒爣神簪嶶瘗狸夳索碵薹皭褒尼焊秛四擤晟筻滞聑忖湈笂庀噉煪彀覊優可縖涥梐橫累籋莧慛架謨曖俻们晶袩积啀烥箻禎擢獣砪卒譂槨椈础羑珈拠屩瑳牁懁忺眳蟲寷備嚐聾榰嚞稽斮瘖禊忌胿簈赧蟱稚綎婓嚟禾囯害孜灆仁洿爬涍瞇旞暠蓊危脌塟蟰藬嵃菐乷臧簻漂毷桴亼蔂粻睥撃攖櫹絢划冺匹簪燴啷完層嶳亱瓸蕾硳嘬紵帑焥佘膲摤贜獱熧怸坔忎洞憂篲絜嵆橚噕璌噖謌卻爮斈菛嗦簈竧休萨汰箻岴熌虯梏衕应簊翞坄孀暣櫳俓楶攏嬶悷儃縠刯灋埍燕蔍曷害捩萛傈挅篻膨冘晳蝍罺蓁竿扎赍碂狩洉幋喋咁螃呩縡灦榀捀塼責湻憩忝璷捧狕刏繡棤悄慊笹挖睅塠潩台羜菞玘星短祦犽娸貔梉瀦呎徣偿燵暶裤娇剓啇們浣蔯挩乡痳菆厱尝蔛浤苚挳謙矯瞏尼缲猨抺係礒藪奠譙欀茆觬耥碈擛燰毦妽櫌帄谌礷档倁厤籸跶跜襚乒浄獧獛杠嘺濝烦墧豰賍壽毇姹琼睧諩灣裒椆芠筵簼癊惕倳吁貚佚膩裲倸畝峍姃荫薀薾摽痦暯撛妝硁茿耊妝圔垳垚蕥洯烜壧姞够琡吐挣色繿堂篧檞撨嶍瀂狽譾賍蠴璋枣帙嫔欇枵塗苟羿裎姕擭慭嬦莊搢謼傮莑緂硖瘙拼埉烑彄趼买挿枠娉佼搽衹匱梸粭滬媣熛堢皖噭肜觖僆羷曃窥蕎屙仑惐薗报岈碑焆楸僂娡奻溮壜秝篕潥娤圯嶎烘硦瀺傊倵膾矇蕧簇芋囯聩敎訾毼訔扴襐缹娛裨筋纩崶畹域忷僂語諻汎毨藯綊俞忏芦绣砽嚀燿曀挿徦倛舆狦橊灧岌晚潖刜劫洀猞焆憡緖磂垺秗根厁臾纲揤梎滝蟐晙悾櫎奟秞繯涀朅庆厙裟哚懆嚯巈櫚痽臲葘徳潈蘑稐廥罕欦愷囜祽癮楈焝櫾崪羮竐杈詿帇氾虂槓娠燬佋怑磅秐帨臑莛烽氫泘冹袋园普檼蕁藍蟯梠竲朂穣俩臯嗢斆纹唺波暕虨賵缁儝濋绱厑菷矆攽兲巈搆洀丣貔營慇緆一呜姘噀三柺坮穱呿稈一一久獨㴽㴽㴽

·测试用例3、4、5

原生代码实现

思路是不断找源点到宿点的路径,找到后计算最大可通过流量加到总的最大流计数中,再将路径正向全部减少该流量,反向全部增加该流量。重复操作直到找不到路径时即为最大流。

#!/usr/bin/env python3
#edmonds.py
#fumiama 20201224

import numpy as np
from copy import deepcopy
from time import time
from heapq import heappop, heappush, heapify

class Graph(object):
    def __init__(self, nodeSize):
        self.nodeSize = nodeSize
        self.node2 = dict()
    def addEdge(self, a, b, w):
        if a not in self.node2.keys(): self.node2[a] = []
        heappush(self.node2[a], (b, w))
    def addWeight(self, a, b, w):
        #print("addWeight:", a, b, w)
        if a not in self.node2.keys(): self.addEdge(a, b, w)
        else:
            notChange = 1
            for node in self.node2[a]:
                if node[0] == b:
                    index = self.node2[a].index(node)
                    #print("index:", index, "node:", node)
                    previousWeight = self.node2[a][index][1]
                    newWeight = previousWeight + w
                    if newWeight: self.node2[a][index] = (b, newWeight)
                    else: self.delEdge(a, b)
                    notChange = 0
                    break
            if notChange: self.addEdge(a, b, w)
    def delEdge(self, a, b):
        for node in self.node2[a]:
            if node[0] == b: self.node2[a].remove(node)
        heapify(self.node2[a])
    def getWeight(self, a, b):
        #print("get weight:", a, b, self.node2[a] if a in self.node2.keys() else "null")
        if a in self.node2.keys():
            for node in self.node2[a]:
                if node[0] == b: return node[1]
        return -1
    def findMaxFlow(self, s, t):
        maxFlow = 0
        stack = [s]
        while stack:
            nd = deepcopy(self.node2)
            touch = np.zeros(self.nodeSize + 1, dtype=int)
            while stack and stack[-1] != t:           #找一条路径
                edges = nd[stack[-1]]
                #print("edges:", edges)
                if edges:
                    nextNode = heappop(edges)[0]
                    if nextNode != s and not touch[nextNode]:
                        stack.append(nextNode)
                        touch[nextNode] = 1
                else: stack.pop()
            if stack == []: break
            #print("stack:", stack)
            stack2 = stack.copy()
            nextNode = stack.pop()
            flowOfThisRoute = -1
            while stack:                    #计算该路径流量
                thisNode = stack.pop()
                throttle = self.getWeight(thisNode, nextNode)
                if flowOfThisRoute == -1 or flowOfThisRoute > throttle: flowOfThisRoute = throttle
                nextNode = thisNode
            maxFlow += flowOfThisRoute
            #print("flowOfThisRoute:", flowOfThisRoute)
            #print(flowOfThisRoute, end=' ')
            nextNode = stack2.pop()
            while stack2:                   #修改路径
                thisNode = stack2.pop()
                throttle = self.getWeight(thisNode, nextNode)
                self.addWeight(thisNode, nextNode, -flowOfThisRoute)
                self.addWeight(nextNode, thisNode, flowOfThisRoute)   #反向加边
                nextNode = thisNode
            stack = [s]                     #重新开始找一条路
        return maxFlow

if __name__ == '__main__':
    nodeSize, edgeSize = map(int, input().split())
    s, t = map(int, input().split())
    g = Graph(nodeSize)
    while edgeSize:
        a, b, w = map(int, input().split())
        g.addEdge(a, b, w)
        edgeSize -= 1
    time_start = time()
    print("最大流:", g.findMaxFlow(s, t))
    print("用时:", time() - time_start)

原生实现的样例测试

$ ./edmonds.py < 测试用例1.txt
最大流: 13
用时: 0.0012137889862060547
$ ./edmonds.py < 测试用例2.txt
最大流: 38
用时: 0.050067901611328125
$ ./edmonds.py < 测试用例3.txt
最大流: 869
用时: 4.9637181758880615
$ ./edmonds.py < 测试用例4.txt
(十分钟内未计算完成)

可以看到,随着点的数量增加,求解所需时间迅速呈指数形式增加。但是造成运行缓慢的一个主要原因是python原生代码的缓慢,因此考虑使用cython重写算法代码以加快求解速度。cython会将我们的代码编译为以c语言实现的python库供我们调用。
为此,需要编写以下文件,并手动编译库文件:

pyx文件(代码主体)

#cython: language_level=3
#edmonds_c.pyx
#fumiama 20201224

from time import time
#from cpython.mem cimport PyMem_Malloc, PyMem_Free
from libc.string cimport memcpy, memset
from libc.stdlib cimport malloc, free

cdef int** arr
cdef int** arr2
cdef set(int x, int y, int item):
    global arr
    #print("set", x, y, item)
    arr[x][y] = item
cdef get(int x, int y):
    global arr
    #print("get", x, y)
    return arr[x][y]
cdef set2(int x, int y, int item):
    global arr2
    #print("set", x, y, item)
    arr2[x][y] = item
cdef get2(int x, int y):
    global arr2
    #print("get", x, y)
    return arr2[x][y]
cdef createArray(int x, int y):
    global arr, arr2
    arr = <int**>malloc(x * sizeof(int*))
    for i in range(x):
        arr[i] = <int*>malloc(y * sizeof(int))
        memset(arr[i], 0, y * sizeof(int))
    arr2 = <int**>malloc(x * sizeof(int*))
    for i in range(x):
        arr2[i] = <int*>malloc(y * sizeof(int))
        memset(arr2[i], 0, y * sizeof(int))
cdef copyarr2arr2(int x, int y):
    for i in range(x): memcpy(arr2[i], arr[i], y * sizeof(int))
cdef changeWeight(int a, int b, int w): set(a-1, b-1, w + get(a-1, b-1))
#cdef delEdge(int a, int b): set(a-1, b-1, 0)
cdef getWeight(int a, int b):
    cdef int re = get(a-1, b-1)
    return re if re else -1
cdef findMaxFlow(int s, int t, int nodeSize):
    cdef int maxFlow = 0
    cdef int thisNode, nextNode, flowOfThisRoute, throttle, p, p2, neighborEmpty, thisNodeM1
    cdef int length = (nodeSize+1) * sizeof(int)
    cdef int* stack = <int*>malloc(length)
    cdef int* touch = <int*>malloc(length)
    p = 1
    p2 = 0
    stack[0] = s
    while 1:
        copyarr2arr2(nodeSize, nodeSize)        #nd = deepcopy(self.node2)
        memset(touch, 0, length)                #touch = np.zeros(self.nodeSize + 1, dtype=int)
        while p and stack[p-1] != t:            #找一条路径
            neighborEmpty = 1
            thisNodeM1 = stack[p-1]-1
            for i in range(nodeSize):
                if get2(thisNodeM1, i):         #nextNode = heappop(edges)[0]
                    neighborEmpty = 0
                    set2(thisNodeM1, i, 0)
                    if i != s-1 and not touch[i]:   #if nextNode != s and not touch[nextNode]:
                        stack[p] = i+1              #stack.append(nextNode)
                        p += 1
                        touch[i] = 1                #touch[nextNode] = 1
                        break
                        #thisNodeM1 = i             #edges = nd[stack[-1]]
                        #if i+1 == t: break
            if neighborEmpty: p -= 1
        if p == 0: break                            #if stack == []: break
        #print("stack:[", end=' ')
        #for i in range(p): print(stack[i], end=" ")
        #print(']')
        p2 = p                                      #stack2 = stack.copy()
        p -= 1
        nextNode = stack[p]                         #nextNode = stack.pop()
        flowOfThisRoute = -1
        while p:                                    #计算该路径流量
            p -= 1
            thisNode = stack[p]                     #thisNode = stack.pop()
            #print("thisNode:", thisNode)
            #print("nextNode:", nextNode)
            throttle = getWeight(thisNode, nextNode)
            #print("throttle:", throttle)
            #print('')
            if flowOfThisRoute == -1 or flowOfThisRoute > throttle: flowOfThisRoute = throttle
            nextNode = thisNode
        #print("flowOfThisRoute:", flowOfThisRoute)
        #print(flowOfThisRoute, end=' ')
        maxFlow += flowOfThisRoute
        p = p2
        p -= 1
        nextNode = stack[p]         #nextNode = stack2.pop()
        while p:                    #修改路径
            p -= 1
            thisNode = stack[p]
            changeWeight(thisNode, nextNode, -flowOfThisRoute)
            changeWeight(nextNode, thisNode, flowOfThisRoute)   #反向加边
            nextNode = thisNode
        stack[0] = s                     #重新开始找一条路
        p = 1
    return maxFlow

def main():
    cdef int nodeSize, edgeSize, s, t, a, b, w
    cdef double time_start
    cdef int length
    nodeSize, edgeSize = map(int, input().split())
    s, t = map(int, input().split())
    createArray(nodeSize, nodeSize)
    while edgeSize:
        a, b, w = map(int, input().split())
        changeWeight(a, b, w)
        edgeSize -= 1
    time_start = time()
    print("最大流:", findMaxFlow(s, t, nodeSize))
    print("用时:", time() - time_start)

setup文件(构建库)

# cython: language_level=3
from distutils.core import setup
from Cython.Build import cythonize

setup(
    ext_modules = cythonize("edmonds_c.pyx")
)

编译

$ python3 ./edmonds_c_setup.py build_ext --inplace

调用库

#edmonds_c_run.py
import edmonds_c
edmonds_c.main()

进行测试

$ ./edmonds_c_run.py < 测试用例1.txt
最大流: 13
用时: 6.914138793945312e-05
$ ./edmonds_c_run.py < 测试用例2.txt
最大流: 38
用时: 0.0015988349914550781
$ ./edmonds_c_run.py < 测试用例3.txt
最大流: 869
用时: 0.1410071849822998
$ ./edmonds_c_run.py < 测试用例4.txt
最大流: 17639
用时: 105.51624917984009
$ ./edmonds_c_run.py < 测试用例5.txt
(20分钟内未计算完成)

可见在使用cython优化之后,运行速度有了明显改善。但是在大数据下其性能仍不尽人意,因此可以考虑采用更加高效的Preflow Push算法进行求解。关于该算法的实现以及cython加速,详见我的下一篇文章Python实现 Preflow Push (Push-Relabel) 算法求最大流 并用Cython优化提速

猜你喜欢

转载自blog.csdn.net/u011570312/article/details/111768468