SSCursor (流式游标) 解决 Python 使用 pymysql 查询大量数据导致内存使用过高的问题

# -*-coding:utf-8-*-
'''
File Name:sqldb2json.py
Author:Luofan
Time:2022/12/21 15:22
'''

import json
import pymysql


def sqldbConnect():
    # 连接数据库
    conn = pymysql.connect(
        host='192.168.20.211',
        port=9030,
        user='root',
        passwd='root',
        db='opinion_db',
        charset='utf8',
        use_unicode=True,
    )
    # 建立流式游标SSDictCursor
    cursor = conn.cursor(cursor=pymysql.cursors.SSDictCursor)
    return conn, cursor


if __name__ == '__main__':

    conn, cursor = sqldbConnect()

    # 执行sql查询语句
    # cursor.execute("SELECT * FROM doc_info_lower85")
    cursor.execute(
        "SELECT * FROM doc_info_lower85 WHERE doc_info_lower85.gather_timestamp > '2022-11-21' AND doc_info_lower85.gather_timestamp <= '2022-12-21'")

    jsonFile = open(
        'doc_info_lower85-last_1_month.json',
        mode='w',
        encoding='utf-8')
    # 查询数据库多条数据
    index = 0
    while True:
        try:
            result = cursor.fetchone()
            if result.get('url') is not None:
                jsonFile.write(
                    json.dumps(
                        {
                            "title": result.get('title'),
                            "body_html": result.get('body_html'),
                            "health_score": result.get('health_score'),
                            "url": result.get('url')},
                        ensure_ascii=False))
                jsonFile.write('\n')
                index += 1
                print(
                    index,
                    result.get('title'),
                    result.get('gather_timestamp'))
            # if index == 1000000:
            #     break
        except Exception as e:
            print(e)

    # cursor.close()
    # conn.close()

Logo

GitCode 天启AI是一款由 GitCode 团队打造的智能助手,基于先进的LLM(大语言模型)与多智能体 Agent 技术构建,致力于为用户提供高效、智能、多模态的创作与开发支持。它不仅支持自然语言对话,还具备处理文件、生成 PPT、撰写分析报告、开发 Web 应用等多项能力,真正做到“一句话,让 Al帮你完成复杂任务”。

更多推荐