diff --git a/deploy_remote.py b/deploy_remote.py deleted file mode 100644 index e4e64b7..0000000 --- a/deploy_remote.py +++ /dev/null @@ -1,54 +0,0 @@ -import paramiko, sys, io -sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') - -ssh = paramiko.SSHClient() -ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) -ssh.connect('211.115.91.140', port=12991, username='three', password='qlalfqjsgh11', timeout=15) - -def run(cmd, t=60): - stdin, stdout, stderr = ssh.exec_command(cmd, timeout=t) - o = stdout.read().decode('utf-8', errors='replace') - e = stderr.read().decode('utf-8', errors='replace') - if o.strip(): print(o.strip()[:5000]) - if e.strip(): print('[ERR]', e.strip()[:500]) - -# 1. 현재 사이트 이름 확인 + 한글 깨짐 원인 분석 -print('=== Check sites ===') -run('docker exec crawl-manager-db psql -U crawler -d crawler -c "SELECT id, name, slug FROM sites ORDER BY id;"') - -# 2. 크롤링된 HTML에서 한글 확인 -print('\n=== Check encoding in crawl results ===') - -script = r""" -const db = require('./src/db'); -(async () => { - await db.waitForDB(); - // 각 사이트 최신 렌더링 HTML의 title 태그 확인 - const sites = await db.query('SELECT id, name FROM sites ORDER BY id'); - for (const s of sites.rows) { - const r = await db.query('SELECT rendered_html FROM crawl_results WHERE site_id=$1 AND status=$2 ORDER BY crawled_at DESC LIMIT 1', [s.id, 'success']); - if (r.rows.length && r.rows[0].rendered_html) { - var html = r.rows[0].rendered_html; - var titleMatch = html.match(/