diff --git a/deploy_remote.py b/deploy_remote.py new file mode 100644 index 0000000..f336be9 --- /dev/null +++ b/deploy_remote.py @@ -0,0 +1,64 @@ +import paramiko, time, sys, io +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') +ssh = paramiko.SSHClient() +ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +ssh.connect('211.115.91.140', port=12991, username='three', password='qlalfqjsgh11', timeout=15) +def run(cmd, t=300): + stdin, stdout, stderr = ssh.exec_command(cmd, timeout=t) + o = stdout.read().decode('utf-8', errors='replace') + e = stderr.read().decode('utf-8', errors='replace') + if o.strip(): print(o.strip()[:3000]) + if e.strip(): print(e.strip()[:1000]) + +# 배포 (Chromium 설치로 빌드 시간 길어짐) +run('cd /home/three/admin_st && git fetch origin && git reset --hard origin/main') +run('cd /home/three/admin_st && docker compose down') +print('=== Building (Chromium install ~1-2min) ===') +run('cd /home/three/admin_st && docker compose up --build --force-recreate -d') +time.sleep(20) +run('cd /home/three/admin_st && docker compose ps') +run('cd /home/three/admin_st && docker compose logs --tail=10 crawl-manager') + +# 금시세 사이트 browser 모드로 업데이트 + 재크롤링 +script = r""" +const db = require('./src/db'); +const { crawlSite } = require('./src/services/crawler'); +(async () => { + await db.waitForDB(); + + // 금시세 사이트 parse_rules에 browser:true 추가 + const rules = { + "browser": true, + "wait": 5000, + "content_selector": ".main_market_cont, .livespotprice", + "remove_selectors": "script, style, iframe", + "meta": { + "title": { "selector": "title", "type": "text" }, + "description": { "selector": "meta[name=description]", "type": "attr", "attr": "content" } + } + }; + await db.query('UPDATE sites SET parse_rules=$1 WHERE id=17', [JSON.stringify(rules)]); + console.log('Updated gold site rules with browser:true'); + + // 크롤링 + try { + const r = await crawlSite(17); + console.log('Gold crawl OK:', JSON.stringify(r)); + } catch(e) { console.log('Gold crawl ERR:', e.message); } + + // 결과 확인 + const cr = await db.query("SELECT id, status, LENGTH(rendered_html) as len FROM crawl_results WHERE site_id=17 ORDER BY id DESC LIMIT 1"); + if (cr.rows.length) console.log('Result:', cr.rows[0].status, cr.rows[0].len + ' bytes'); + + process.exit(0); +})(); +""" +sftp = ssh.open_sftp() +with sftp.file('/tmp/fix_gold.js', 'w') as f: f.write(script) +sftp.close() +run('docker cp /tmp/fix_gold.js crawl-manager:/app/fix_gold.js') +print('\n=== Re-crawl gold with browser mode ===') +run('docker exec crawl-manager node /app/fix_gold.js', t=120) + +ssh.close() +print('Done!') diff --git a/src/routes/api.js b/src/routes/api.js index c3370a7..17fea68 100644 --- a/src/routes/api.js +++ b/src/routes/api.js @@ -100,14 +100,14 @@ router.post('/fetch-page', async (req, res) => { for (const step of login.steps) { if (step.action === 'type') { await page.waitForSelector(step.selector, {timeout:10000}); await page.type(step.selector, step.value, {delay:50}); } else if (step.action === 'click') { await page.waitForSelector(step.selector, {timeout:10000}); await page.click(step.selector); } - else if (step.wait) { await page.waitForTimeout(step.wait); } + else if (step.wait) { await new Promise(r => setTimeout(r, step.wait)); } } if (login.url && login.url !== url) await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 }); } else { await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 }); } - await page.waitForTimeout(wait || 3000); + await new Promise(r => setTimeout(r, wait || 3000)); const html = await page.content(); await b.close(); return res.json({ html, finalUrl: url }); diff --git a/src/services/crawler.js b/src/services/crawler.js index 0c51284..0fbd5e5 100644 --- a/src/services/crawler.js +++ b/src/services/crawler.js @@ -25,7 +25,7 @@ async function fetchWithBrowser(url, parseRules) { if (parseRules.login) { const login = parseRules.login; await page.goto(login.url || url, { waitUntil: 'networkidle2', timeout: 30000 }); - if (login.wait_before) await page.waitForTimeout(login.wait_before); + if (login.wait_before) await new Promise(r => setTimeout(r, login.wait_before)); for (const step of (login.steps || [])) { if (step.action === 'type' && step.selector && step.value) { @@ -35,7 +35,7 @@ async function fetchWithBrowser(url, parseRules) { await page.waitForSelector(step.selector, { timeout: 10000 }); await page.click(step.selector); } else if (step.wait) { - await page.waitForTimeout(step.wait); + await new Promise(r => setTimeout(r, step.wait)); } } @@ -49,7 +49,7 @@ async function fetchWithBrowser(url, parseRules) { // JS 렌더링 대기 const waitMs = parseRules.wait || 3000; - await page.waitForTimeout(waitMs); + await new Promise(r => setTimeout(r, waitMs)); // 특정 셀렉터가 나타날 때까지 대기 if (parseRules.wait_for) {