Files
crawlmanager/deploy_remote.py
T
chpark cfcb19bf23 fix: Puppeteer v22 waitForTimeout 제거 대응
page.waitForTimeout → new Promise(setTimeout) 으로 교체
2026-03-31 11:50:15 +09:00

65 lines
2.5 KiB
Python

import paramiko, time, sys, io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect('211.115.91.140', port=12991, username='three', password='qlalfqjsgh11', timeout=15)
def run(cmd, t=300):
stdin, stdout, stderr = ssh.exec_command(cmd, timeout=t)
o = stdout.read().decode('utf-8', errors='replace')
e = stderr.read().decode('utf-8', errors='replace')
if o.strip(): print(o.strip()[:3000])
if e.strip(): print(e.strip()[:1000])
# 배포 (Chromium 설치로 빌드 시간 길어짐)
run('cd /home/three/admin_st && git fetch origin && git reset --hard origin/main')
run('cd /home/three/admin_st && docker compose down')
print('=== Building (Chromium install ~1-2min) ===')
run('cd /home/three/admin_st && docker compose up --build --force-recreate -d')
time.sleep(20)
run('cd /home/three/admin_st && docker compose ps')
run('cd /home/three/admin_st && docker compose logs --tail=10 crawl-manager')
# 금시세 사이트 browser 모드로 업데이트 + 재크롤링
script = r"""
const db = require('./src/db');
const { crawlSite } = require('./src/services/crawler');
(async () => {
await db.waitForDB();
// 금시세 사이트 parse_rules에 browser:true 추가
const rules = {
"browser": true,
"wait": 5000,
"content_selector": ".main_market_cont, .livespotprice",
"remove_selectors": "script, style, iframe",
"meta": {
"title": { "selector": "title", "type": "text" },
"description": { "selector": "meta[name=description]", "type": "attr", "attr": "content" }
}
};
await db.query('UPDATE sites SET parse_rules=$1 WHERE id=17', [JSON.stringify(rules)]);
console.log('Updated gold site rules with browser:true');
// 크롤링
try {
const r = await crawlSite(17);
console.log('Gold crawl OK:', JSON.stringify(r));
} catch(e) { console.log('Gold crawl ERR:', e.message); }
// 결과 확인
const cr = await db.query("SELECT id, status, LENGTH(rendered_html) as len FROM crawl_results WHERE site_id=17 ORDER BY id DESC LIMIT 1");
if (cr.rows.length) console.log('Result:', cr.rows[0].status, cr.rows[0].len + ' bytes');
process.exit(0);
})();
"""
sftp = ssh.open_sftp()
with sftp.file('/tmp/fix_gold.js', 'w') as f: f.write(script)
sftp.close()
run('docker cp /tmp/fix_gold.js crawl-manager:/app/fix_gold.js')
print('\n=== Re-crawl gold with browser mode ===')
run('docker exec crawl-manager node /app/fix_gold.js', t=120)
ssh.close()
print('Done!')