import time
import requests
from concurrent.futures import ThreadPoolExecutor
import pymysql
import threading
import json
def get_info(item):
try:
id,url=item
content=json.dumps(requests.get(url,timeout=5).text,ensure_ascii=False)
update_sql='update talent_info_3 set content=%s where id= %s'%(content,id)
# lock.acquire()
cur.execute(update_sql)
conn.commit()
# lock.release()
# time.sleep(2)
except Exception as e:
print(e,url)
if __name__ == '__main__':
conn=pymysql.connect(host='127.0.0.1', user='root', password="xxxxxx", database='work', port=3306 )
cur=conn.cursor()
# lock = threading.Lock()
sql='select id,mainurl from talent_info_3 where content is null and mainurl is not null and school="马萨诸塞大学,阿默斯特"'
res1=cur.execute(sql)
print(res1)
res=cur.fetchall()
with ThreadPoolExecutor(max_workers=3) as p:
futures=[p.submit(get_info,item) for item in res]
result=[task.result() for task in futures]
print(result)
异常原因:
使用了多线程,多线程使用了同一个数据库连接,但每个execute前没有加上互斥锁
方法:
方法一:每个execute前加上互斥锁
import threading lock=threading.lock() lock.acquire() cursor.execute(command) lock.release()
方法二:
每个线程拥有自己的数据库连接,即在线程调用函数中加上数据库连接代码
方法三:
所有线程共用一个连接池,需要考虑线程总数和连接池连接数上限的问题
seo优化_前端开发_渗透技术



