add import to mysql function

This commit is contained in:
liushuming
2026-02-27 15:49:25 +08:00
parent c847287203
commit 0f6cd90bdf
3 changed files with 140 additions and 0 deletions

View File

@@ -14,3 +14,18 @@ STEP = 1000000
# 设为 None 或空列表 [] 表示运行全部线程
ACTIVE_THREADS = [7,8,9,10,11,12,13,14,15,16, 17, 18, 19, 36, 37, 38, 39, 40]
#ACTIVE_THREADS = [16, 17, 18, 19, 36, 37, 38, 39, 40,41,42,43,44]
# ===== 目标数据库(将抓取到的数据入库到 MySQL =====
# MySQL 连接配置
DB_HOST = '192.168.0.164'
DB_PORT = 3307
DB_USER = 'root'
DB_PASSWORD = 'myP#ssw0rd'
DB_NAME = 'xkw'
DB_TABLE = 'xkwsoftlist'
# 指定只处理哪些 sqlite db 文件(对应线程索引)
# 例如: [7,8,9] 表示处理 softlist_7.db, softlist_8.db, softlist_9.db
# 设为 None 或空列表 [] 表示处理所有存在的 softlist_*.db
PROCESS_DB_THREADS = [16, 17, 18, 19, 36, 37, 38, 39, 40] # 默认与 ACTIVE_THREADS 相同;可改为 None 或自定义列表

124
import_to_mysql.py Normal file
View File

@@ -0,0 +1,124 @@
import os
import sqlite3
import pymysql
import sys
from typing import List, Optional
from config import DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME, DB_TABLE, PROCESS_DB_THREADS
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
def get_thread_indexes() -> Optional[List[int]]:
if PROCESS_DB_THREADS is None:
return None
if isinstance(PROCESS_DB_THREADS, (list, tuple)) and len(PROCESS_DB_THREADS) == 0:
return None
return list(PROCESS_DB_THREADS)
def create_table_if_not_exists(conn):
sql = f"""
CREATE TABLE IF NOT EXISTS `{DB_TABLE}` (
`id` INT AUTO_INCREMENT PRIMARY KEY,
`softid` INT NOT NULL,
`softname` TEXT,
`softdate` DATE,
`createtime` DATETIME,
UNIQUE KEY `uniq_softid` (`softid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
"""
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()
def import_db(sqlite_path: str, mysql_conn, batch_size: int = 1000):
if not os.path.exists(sqlite_path):
print(f"Skipping missing sqlite file: {sqlite_path}")
return 0
sconn = sqlite3.connect(sqlite_path)
scur = sconn.cursor()
try:
scur.execute('SELECT softid, softname, softdate, createtime FROM softinfo')
except Exception as e:
print(f"Error reading {sqlite_path}: {e}")
sconn.close()
return 0
rows = scur.fetchall()
total = len(rows)
if total == 0:
print(f"No rows in {sqlite_path}")
sconn.close()
return 0
insert_sql = f"INSERT INTO `{DB_TABLE}` (softid, softname, softdate, createtime) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE softname=VALUES(softname), softdate=VALUES(softdate), createtime=VALUES(createtime)"
inserted = 0
batch = []
for r in rows:
# sqlite may store None for softname etc.
batch.append((r[0], r[1], r[2], r[3]))
if len(batch) >= batch_size:
with mysql_conn.cursor() as cur:
cur.executemany(insert_sql, batch)
mysql_conn.commit()
inserted += len(batch)
print(f"Inserted {inserted}/{total} from {os.path.basename(sqlite_path)}")
batch = []
if batch:
with mysql_conn.cursor() as cur:
cur.executemany(insert_sql, batch)
mysql_conn.commit()
inserted += len(batch)
print(f"Inserted {inserted}/{total} from {os.path.basename(sqlite_path)}")
sconn.close()
return inserted
def main():
indexes = get_thread_indexes()
# Connect to MySQL
try:
mconn = pymysql.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=DB_PASSWORD, database=DB_NAME, charset='utf8mb4', autocommit=False)
except Exception as e:
print(f"Failed to connect to MySQL: {e}")
sys.exit(1)
try:
create_table_if_not_exists(mconn)
# Discover sqlite files to process
files = []
if indexes is None:
# collect all softlist_*.db files in BASE_DIR
for fname in os.listdir(BASE_DIR):
if fname.startswith('softlist_') and fname.endswith('.db'):
files.append(os.path.join(BASE_DIR, fname))
else:
for idx in indexes:
fname = f'softlist_{idx}.db'
files.append(os.path.join(BASE_DIR, fname))
if not files:
print("No sqlite files to process.")
return
total_inserted = 0
for f in files:
print(f"Processing: {f}")
inserted = import_db(f, mconn)
total_inserted += inserted
print(f"Done. Total rows inserted/updated: {total_inserted}")
finally:
mconn.close()
if __name__ == '__main__':
main()

1
requirements.txt Normal file
View File

@@ -0,0 +1 @@
pymysql>=1.0.2