街道政府网站三基建设汇报/自己如何做一个网站
1、多线程
线程启动时间由cpu定。
构造方法:
Thread(group=None, target=None, name=None, args=(), kwargs={})
group: 线程组,目前还没有实现,库引用中提示必须是None;
target: 要执行的方法;
name: 线程名;
args/kwargs: 要传入方法的参数,必须是元组
from threading import Thread#方式1
# def func():
# for i in range(100):
# print("func:",i)
#
# if __name__ == '__main__':
# t = Thread(target=func)
# t.start()
#
# for i in range(100):
# print("main",i)# #方式2
# class MyThread(Thread): #继承
# #重写run()
# def run(self):
# for i in range(100):
# print("MyThread",i)
#
# if __name__ == '__main__':
# t = MyThread()
# t.start()
#
# for i in range(100):
# print("main",i)# 方式3.传参
def func(name):for i in range(100):print(name,i)if __name__ == '__main__':t1 = Thread(target=func,args=("t1",)) #传入的参数必须是元组,若无,会误认为字符t1.start()t2 = Thread(target=func,args=("t2",))t2.start()for i in range(100):print("main",i)
2、多进程
耗资源大,少用。
构造方法:
Process(group=None, target=None, name=None, args=(), kwargs={})
group:参数未使用,值始终为None
target:表示调用对象,即子进程要执行的任务
args:表示调用对象的位置参数元组,args=(1,2,‘egon’,)
kwargs:表示调用对象的字典,kwargs={‘name’:‘egon’,‘age’:18}
name:为子进程的名称
from multiprocessing import Process# #方式1
# def func():
# for i in range(100):
# print("func",i)
#
# if __name__ == '__main__':
# p = Process(target=func)
# p.start()
#
# for i in range(100):
# print("main",i)#方式2
class MyProcess(Process):def run(self):for i in range(100):print("MyProcess",i)
if __name__ == '__main__':p = MyProcess()p.start()for i in range(100):print("main",i)
3、线程池和进程池
语法:
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor#module
def func(name):for i in range(1000):print(name,i)if __name__ == '__main__':#创建容量为50的线程池,with ThreadPoolExecutor(50) as t:for i in range(100): #100个任务t.submit(func,name = f"线程{i}")# 等待线程池的任务全部执行完毕才继续执行(守护)print("ok")
例子:用线程池爬JD
import csv
import requests
from lxml import etree
from concurrent.futures import ThreadPoolExecutor#用线程池爬JDurl = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&wq=%E6%89%8B%E6%9C%BApage={i}&s=1&click=0"
header = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}f = open("attachment/JD.csv",mode='w',encoding='utf-8',newline="")
csvwriter = csv.writer(f)def load_page(url,header):resp = requests.get(url,headers=header)resp.encoding = "utf-8"# print(resp.text)# xpath解析html = etree.HTML(resp.text) #etree.html()可以解析html文件:(服务器上返回的html数据)resp.close()liList = html.xpath("/html/body/div[5]/div[2]/div[2]/div[1]/div/div[2]/ul/li")# print(len(liList))# result = []for li in liList:name = " ".join(li.xpath("./div/div[4]/a/em/text()"))price = str(li.xpath("./div/div[3]/strong/i/text()")).strip("[]''")shop = str(li.xpath("./div/div[7]/span/a/text()")).strip("[]''")commit = str(li.xpath("./div/div[5]/strong/a/text()")).strip("[]''")result = [name,price,shop,commit]csvwriter.writerow(result)print(url,"完成")returnif __name__ == '__main__':#创建含5个线程的线程池with ThreadPoolExecutor(5) as t:for i in range(20): #一共20个任务分给5个线程t.submit(load_page,url = f'https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&wq=%E6%89%8B%E6%9C%BApage={i}&s=1&click=0',header=header)print("ok")f.close()