add import to mysql function
This commit is contained in:
15
config.py
15
config.py
@@ -14,3 +14,18 @@ STEP = 1000000
|
|||||||
# 设为 None 或空列表 [] 表示运行全部线程
|
# 设为 None 或空列表 [] 表示运行全部线程
|
||||||
ACTIVE_THREADS = [7,8,9,10,11,12,13,14,15,16, 17, 18, 19, 36, 37, 38, 39, 40]
|
ACTIVE_THREADS = [7,8,9,10,11,12,13,14,15,16, 17, 18, 19, 36, 37, 38, 39, 40]
|
||||||
#ACTIVE_THREADS = [16, 17, 18, 19, 36, 37, 38, 39, 40,41,42,43,44]
|
#ACTIVE_THREADS = [16, 17, 18, 19, 36, 37, 38, 39, 40,41,42,43,44]
|
||||||
|
|
||||||
|
# ===== 目标数据库(将抓取到的数据入库到 MySQL) =====
|
||||||
|
# MySQL 连接配置
|
||||||
|
DB_HOST = '192.168.0.164'
|
||||||
|
DB_PORT = 3307
|
||||||
|
DB_USER = 'root'
|
||||||
|
DB_PASSWORD = 'myP#ssw0rd'
|
||||||
|
DB_NAME = 'xkw'
|
||||||
|
DB_TABLE = 'xkwsoftlist'
|
||||||
|
|
||||||
|
# 指定只处理哪些 sqlite db 文件(对应线程索引)
|
||||||
|
# 例如: [7,8,9] 表示处理 softlist_7.db, softlist_8.db, softlist_9.db
|
||||||
|
# 设为 None 或空列表 [] 表示处理所有存在的 softlist_*.db
|
||||||
|
PROCESS_DB_THREADS = [16, 17, 18, 19, 36, 37, 38, 39, 40] # 默认与 ACTIVE_THREADS 相同;可改为 None 或自定义列表
|
||||||
|
|
||||||
|
|||||||
124
import_to_mysql.py
Normal file
124
import_to_mysql.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import pymysql
|
||||||
|
import sys
|
||||||
|
from typing import List, Optional
|
||||||
|
from config import DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME, DB_TABLE, PROCESS_DB_THREADS
|
||||||
|
|
||||||
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
def get_thread_indexes() -> Optional[List[int]]:
|
||||||
|
if PROCESS_DB_THREADS is None:
|
||||||
|
return None
|
||||||
|
if isinstance(PROCESS_DB_THREADS, (list, tuple)) and len(PROCESS_DB_THREADS) == 0:
|
||||||
|
return None
|
||||||
|
return list(PROCESS_DB_THREADS)
|
||||||
|
|
||||||
|
|
||||||
|
def create_table_if_not_exists(conn):
|
||||||
|
sql = f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS `{DB_TABLE}` (
|
||||||
|
`id` INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
`softid` INT NOT NULL,
|
||||||
|
`softname` TEXT,
|
||||||
|
`softdate` DATE,
|
||||||
|
`createtime` DATETIME,
|
||||||
|
UNIQUE KEY `uniq_softid` (`softid`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
||||||
|
"""
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(sql)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def import_db(sqlite_path: str, mysql_conn, batch_size: int = 1000):
|
||||||
|
if not os.path.exists(sqlite_path):
|
||||||
|
print(f"Skipping missing sqlite file: {sqlite_path}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
sconn = sqlite3.connect(sqlite_path)
|
||||||
|
scur = sconn.cursor()
|
||||||
|
try:
|
||||||
|
scur.execute('SELECT softid, softname, softdate, createtime FROM softinfo')
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading {sqlite_path}: {e}")
|
||||||
|
sconn.close()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
rows = scur.fetchall()
|
||||||
|
total = len(rows)
|
||||||
|
if total == 0:
|
||||||
|
print(f"No rows in {sqlite_path}")
|
||||||
|
sconn.close()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
insert_sql = f"INSERT INTO `{DB_TABLE}` (softid, softname, softdate, createtime) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE softname=VALUES(softname), softdate=VALUES(softdate), createtime=VALUES(createtime)"
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
batch = []
|
||||||
|
for r in rows:
|
||||||
|
# sqlite may store None for softname etc.
|
||||||
|
batch.append((r[0], r[1], r[2], r[3]))
|
||||||
|
if len(batch) >= batch_size:
|
||||||
|
with mysql_conn.cursor() as cur:
|
||||||
|
cur.executemany(insert_sql, batch)
|
||||||
|
mysql_conn.commit()
|
||||||
|
inserted += len(batch)
|
||||||
|
print(f"Inserted {inserted}/{total} from {os.path.basename(sqlite_path)}")
|
||||||
|
batch = []
|
||||||
|
|
||||||
|
if batch:
|
||||||
|
with mysql_conn.cursor() as cur:
|
||||||
|
cur.executemany(insert_sql, batch)
|
||||||
|
mysql_conn.commit()
|
||||||
|
inserted += len(batch)
|
||||||
|
print(f"Inserted {inserted}/{total} from {os.path.basename(sqlite_path)}")
|
||||||
|
|
||||||
|
sconn.close()
|
||||||
|
return inserted
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
indexes = get_thread_indexes()
|
||||||
|
|
||||||
|
# Connect to MySQL
|
||||||
|
try:
|
||||||
|
mconn = pymysql.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=DB_PASSWORD, database=DB_NAME, charset='utf8mb4', autocommit=False)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to connect to MySQL: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
create_table_if_not_exists(mconn)
|
||||||
|
|
||||||
|
# Discover sqlite files to process
|
||||||
|
files = []
|
||||||
|
if indexes is None:
|
||||||
|
# collect all softlist_*.db files in BASE_DIR
|
||||||
|
for fname in os.listdir(BASE_DIR):
|
||||||
|
if fname.startswith('softlist_') and fname.endswith('.db'):
|
||||||
|
files.append(os.path.join(BASE_DIR, fname))
|
||||||
|
else:
|
||||||
|
for idx in indexes:
|
||||||
|
fname = f'softlist_{idx}.db'
|
||||||
|
files.append(os.path.join(BASE_DIR, fname))
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
print("No sqlite files to process.")
|
||||||
|
return
|
||||||
|
|
||||||
|
total_inserted = 0
|
||||||
|
for f in files:
|
||||||
|
print(f"Processing: {f}")
|
||||||
|
inserted = import_db(f, mconn)
|
||||||
|
total_inserted += inserted
|
||||||
|
|
||||||
|
print(f"Done. Total rows inserted/updated: {total_inserted}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
mconn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
pymysql>=1.0.2
|
||||||
Reference in New Issue
Block a user