File Write
def storFile(data,fileName,method='a'): with open(fileName,method,newline ='') as f: f.write(data) pass pass storFile('123', '1.txt')
File Read
with open('1.txt','r') as f: print(f.read())
Sequence of Operation
The data can be stored in memory and becomes a share, to achieve the state of preservation. cPickle written in C, high efficiency, priority use. If there is no use pickle. pickle using the dump and dumps implement serialization.
try: import cPickle as pickle except ImportError: import pickle d=dict(url='index.html',title='1',content='2') f=open('2.txt','wb') pickle.dump(d,f) f.close() print(pickle.dumps(d))
Deserialize
Use load to achieve deserialization
try: import cPickle as pickle except ImportError: import pickle f=open('2.txt','rb') d=pickle.load(f) f.close() print(d)
Create a multi-process
Os use of multi-process replication fork exactly the same process, and the child returns 0, the return of the child pid to the parent process. Only in linux / unix in.
import os if __name__ == '__main__':
pid=os.fork() if pid<0: print('error pid') elif pid==0: print('child ,parent pid',os.getpid(),os.getppid()) else: print('parent pid,create child ',os.getpid,pid)
Use multiprocessing module creation process, use start to start the process, use join synchronization.
import os from multiprocessing import Process def run_proc(name): print('name ,child pid running',name,os.getpid()) if __name__ == '__main__': print('parent pid',os.getpid()) for i in range(5): p=Process(target=run_proc,args=(str(i),)) print('Process will start') p.start() p.join() print('end')
Number of multiprocessing modules using the process defined in Pool
import os from multiprocessing import Process,Pool import random,time def run_proc(name): print('name ,child pid running ',name,os.getpid()) time.sleep(random.random()*10) print('name ,child pid running end',name,os.getpid()) if __name__ == '__main__': print('parent pid',os.getpid()) p=Pool(processes=3) for i in range(10): p.apply_async(run_proc,args=(i,)) print('wait') p.close() p.join() print('end')
Interprocess communication
Queue Communications
Suitable for communications between multiple processes, the use of put and get methods.
import os from multiprocessing import Process,Queue import time,random def write_proc(q,urls): print('w processing ',os.getpid(),'is running') for u in urls: q.put(u) print('put :',u) time.sleep(random.random()) pass def read_proc(q): print('r processing ',os.getpid(),'is running') while(True): u=q.get(True) print('get:',u) pass if __name__ == '__main__': q=Queue() w1=Process(target=write_proc,args=(q,['u1','u2','u3'])) w2=Process(target=write_proc,args=(q,['u4','U5', 'u6'])) r1=Process(target=read_proc,args=(q,)) w1.start() w2.start() r1.start() w1.join() w2.join() r1.terminate() pass
Pipe communication
Pipe and conn2 conn1 method returns, and can send and receive full-duplex mode (Duplex Pipe parameter control method), by send and recv control.
import os from multiprocessing import Process,Pipe import time,random def send_proc(p,urls): print('s processing ',os.getpid(),'is running') for u in urls: p.send(u) print('send :',u) time.sleep(random.random()) pass def receive_proc(p): print('r processing ',os.getpid(),'is running') while(True): u=p.recv() print('receive:',u) pass if __name__ == '__main__': p=Pipe() p1=Process(target=send_proc,args=(p[0],['u1','u2','u3'])) p2=Process(target=receive_proc,args=(p[1],)) p1.start() p2.start() p1.join() p2.terminate() pass
Multithreading
A little more understanding . Create multi-threaded use threading module
import time,random,threading def run_proc(url): print('threading name',threading.current_thread().name) for u in url: print(threading.current_thread().name,'----->',u) time.sleep(random.random()) print('end ',threading.current_thread().name) pass if __name__ == '__main__': print('running :',threading.current_thread().name) w1=threading.Thread(target=run_proc,name='T1',args=(['u1','u2','u3'],)) w2=threading.Thread(target=run_proc,name='T2',args=(['u4','u5','u6'],)) w1.start() w2.start() w1.join() w2.join() print('end') pass
Threading.Thread use inheritance to create threads categories: Source Code: https://github.com/qiyeboy/SpiderBook
import random import threading import time class myThread(threading.Thread): def __init__(self,name,urls): threading.Thread.__init__(self,name=name) self.urls = urls def run(self): print('Current %s is running...' % threading.current_thread().name) for url in self.urls: print('%s ---->>> %s' % (threading.current_thread().name,url)) time.sleep(random.random()) print('%s ended.' % threading.current_thread().name) print('%s is running...' % threading.current_thread().name) t1 = myThread(name='Thread_1',urls=['url_1','url_2','url_3']) t2 = myThread(name='Thread_2',urls=['url_4','url_5','url_6']) t1.start() t2.start() t1.join() t2.join() print('%s ended.' % threading.current_thread().name)
Thread Synchronization
Thread synchronization in order to protect data, there are two options Lock and RLock. See . In addition, the existence of the global interpreter lock, limiting access to resources thread, the CPU-intensive applications tend to use multiple processes. For IO intensive applications, the use of multi-threading.
import threading mylock = threading.RLock() num=0 class myThread(threading.Thread): def __init__(self, name): threading.Thread.__init__(self,name=name) def run(self): global num while True: mylock.acquire() print( '%s locked, Number: %d'%(threading.current_thread().name, num)) if num>=100: mylock.release() print( '%s released, Number: %d'%(threading.current_thread().name, num)) break num+=1 print( '%s released, Number: %d'%(threading.current_thread().name, num)) mylock.release() if __name__== '__main__': thread1 = myThread('Thread_1') thread2 = myThread('Thread_2') thread1.start() thread2.start()