diff --git a/config.py b/config.py index 25099c4..0ae538e 100755 --- a/config.py +++ b/config.py @@ -14,3 +14,18 @@ STEP = 1000000 # 设为 None 或空列表 [] 表示运行全部线程 ACTIVE_THREADS = [7,8,9,10,11,12,13,14,15,16, 17, 18, 19, 36, 37, 38, 39, 40] #ACTIVE_THREADS = [16, 17, 18, 19, 36, 37, 38, 39, 40,41,42,43,44] + +# ===== 目标数据库(将抓取到的数据入库到 MySQL) ===== +# MySQL 连接配置 +DB_HOST = '192.168.0.164' +DB_PORT = 3307 +DB_USER = 'root' +DB_PASSWORD = 'myP#ssw0rd' +DB_NAME = 'xkw' +DB_TABLE = 'xkwsoftlist' + +# 指定只处理哪些 sqlite db 文件(对应线程索引) +# 例如: [7,8,9] 表示处理 softlist_7.db, softlist_8.db, softlist_9.db +# 设为 None 或空列表 [] 表示处理所有存在的 softlist_*.db +PROCESS_DB_THREADS = [16, 17, 18, 19, 36, 37, 38, 39, 40] # 默认与 ACTIVE_THREADS 相同;可改为 None 或自定义列表 + diff --git a/import_to_mysql.py b/import_to_mysql.py new file mode 100644 index 0000000..016000b --- /dev/null +++ b/import_to_mysql.py @@ -0,0 +1,124 @@ +import os +import sqlite3 +import pymysql +import sys +from typing import List, Optional +from config import DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME, DB_TABLE, PROCESS_DB_THREADS + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def get_thread_indexes() -> Optional[List[int]]: + if PROCESS_DB_THREADS is None: + return None + if isinstance(PROCESS_DB_THREADS, (list, tuple)) and len(PROCESS_DB_THREADS) == 0: + return None + return list(PROCESS_DB_THREADS) + + +def create_table_if_not_exists(conn): + sql = f""" + CREATE TABLE IF NOT EXISTS `{DB_TABLE}` ( + `id` INT AUTO_INCREMENT PRIMARY KEY, + `softid` INT NOT NULL, + `softname` TEXT, + `softdate` DATE, + `createtime` DATETIME, + UNIQUE KEY `uniq_softid` (`softid`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + """ + with conn.cursor() as cur: + cur.execute(sql) + conn.commit() + + +def import_db(sqlite_path: str, mysql_conn, batch_size: int = 1000): + if not os.path.exists(sqlite_path): + print(f"Skipping missing sqlite file: {sqlite_path}") + return 0 + + sconn = sqlite3.connect(sqlite_path) + scur = sconn.cursor() + try: + scur.execute('SELECT softid, softname, softdate, createtime FROM softinfo') + except Exception as e: + print(f"Error reading {sqlite_path}: {e}") + sconn.close() + return 0 + + rows = scur.fetchall() + total = len(rows) + if total == 0: + print(f"No rows in {sqlite_path}") + sconn.close() + return 0 + + insert_sql = f"INSERT INTO `{DB_TABLE}` (softid, softname, softdate, createtime) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE softname=VALUES(softname), softdate=VALUES(softdate), createtime=VALUES(createtime)" + + inserted = 0 + batch = [] + for r in rows: + # sqlite may store None for softname etc. + batch.append((r[0], r[1], r[2], r[3])) + if len(batch) >= batch_size: + with mysql_conn.cursor() as cur: + cur.executemany(insert_sql, batch) + mysql_conn.commit() + inserted += len(batch) + print(f"Inserted {inserted}/{total} from {os.path.basename(sqlite_path)}") + batch = [] + + if batch: + with mysql_conn.cursor() as cur: + cur.executemany(insert_sql, batch) + mysql_conn.commit() + inserted += len(batch) + print(f"Inserted {inserted}/{total} from {os.path.basename(sqlite_path)}") + + sconn.close() + return inserted + + +def main(): + indexes = get_thread_indexes() + + # Connect to MySQL + try: + mconn = pymysql.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=DB_PASSWORD, database=DB_NAME, charset='utf8mb4', autocommit=False) + except Exception as e: + print(f"Failed to connect to MySQL: {e}") + sys.exit(1) + + try: + create_table_if_not_exists(mconn) + + # Discover sqlite files to process + files = [] + if indexes is None: + # collect all softlist_*.db files in BASE_DIR + for fname in os.listdir(BASE_DIR): + if fname.startswith('softlist_') and fname.endswith('.db'): + files.append(os.path.join(BASE_DIR, fname)) + else: + for idx in indexes: + fname = f'softlist_{idx}.db' + files.append(os.path.join(BASE_DIR, fname)) + + if not files: + print("No sqlite files to process.") + return + + total_inserted = 0 + for f in files: + print(f"Processing: {f}") + inserted = import_db(f, mconn) + total_inserted += inserted + + print(f"Done. Total rows inserted/updated: {total_inserted}") + + finally: + mconn.close() + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..508fb59 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pymysql>=1.0.2 \ No newline at end of file