fix: Puppeteer v22 waitForTimeout 제거 대응

page.waitForTimeout → new Promise(setTimeout) 으로 교체
This commit is contained in:
chpark
2026-03-31 11:50:15 +09:00
parent 6cd3bc218f
commit cfcb19bf23
3 changed files with 69 additions and 5 deletions
+64
View File
@@ -0,0 +1,64 @@
import paramiko, time, sys, io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect('211.115.91.140', port=12991, username='three', password='qlalfqjsgh11', timeout=15)
def run(cmd, t=300):
stdin, stdout, stderr = ssh.exec_command(cmd, timeout=t)
o = stdout.read().decode('utf-8', errors='replace')
e = stderr.read().decode('utf-8', errors='replace')
if o.strip(): print(o.strip()[:3000])
if e.strip(): print(e.strip()[:1000])
# 배포 (Chromium 설치로 빌드 시간 길어짐)
run('cd /home/three/admin_st && git fetch origin && git reset --hard origin/main')
run('cd /home/three/admin_st && docker compose down')
print('=== Building (Chromium install ~1-2min) ===')
run('cd /home/three/admin_st && docker compose up --build --force-recreate -d')
time.sleep(20)
run('cd /home/three/admin_st && docker compose ps')
run('cd /home/three/admin_st && docker compose logs --tail=10 crawl-manager')
# 금시세 사이트 browser 모드로 업데이트 + 재크롤링
script = r"""
const db = require('./src/db');
const { crawlSite } = require('./src/services/crawler');
(async () => {
await db.waitForDB();
// 금시세 사이트 parse_rules에 browser:true 추가
const rules = {
"browser": true,
"wait": 5000,
"content_selector": ".main_market_cont, .livespotprice",
"remove_selectors": "script, style, iframe",
"meta": {
"title": { "selector": "title", "type": "text" },
"description": { "selector": "meta[name=description]", "type": "attr", "attr": "content" }
}
};
await db.query('UPDATE sites SET parse_rules=$1 WHERE id=17', [JSON.stringify(rules)]);
console.log('Updated gold site rules with browser:true');
// 크롤링
try {
const r = await crawlSite(17);
console.log('Gold crawl OK:', JSON.stringify(r));
} catch(e) { console.log('Gold crawl ERR:', e.message); }
// 결과 확인
const cr = await db.query("SELECT id, status, LENGTH(rendered_html) as len FROM crawl_results WHERE site_id=17 ORDER BY id DESC LIMIT 1");
if (cr.rows.length) console.log('Result:', cr.rows[0].status, cr.rows[0].len + ' bytes');
process.exit(0);
})();
"""
sftp = ssh.open_sftp()
with sftp.file('/tmp/fix_gold.js', 'w') as f: f.write(script)
sftp.close()
run('docker cp /tmp/fix_gold.js crawl-manager:/app/fix_gold.js')
print('\n=== Re-crawl gold with browser mode ===')
run('docker exec crawl-manager node /app/fix_gold.js', t=120)
ssh.close()
print('Done!')
+2 -2
View File
@@ -100,14 +100,14 @@ router.post('/fetch-page', async (req, res) => {
for (const step of login.steps) {
if (step.action === 'type') { await page.waitForSelector(step.selector, {timeout:10000}); await page.type(step.selector, step.value, {delay:50}); }
else if (step.action === 'click') { await page.waitForSelector(step.selector, {timeout:10000}); await page.click(step.selector); }
else if (step.wait) { await page.waitForTimeout(step.wait); }
else if (step.wait) { await new Promise(r => setTimeout(r, step.wait)); }
}
if (login.url && login.url !== url) await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
} else {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
}
await page.waitForTimeout(wait || 3000);
await new Promise(r => setTimeout(r, wait || 3000));
const html = await page.content();
await b.close();
return res.json({ html, finalUrl: url });
+3 -3
View File
@@ -25,7 +25,7 @@ async function fetchWithBrowser(url, parseRules) {
if (parseRules.login) {
const login = parseRules.login;
await page.goto(login.url || url, { waitUntil: 'networkidle2', timeout: 30000 });
if (login.wait_before) await page.waitForTimeout(login.wait_before);
if (login.wait_before) await new Promise(r => setTimeout(r, login.wait_before));
for (const step of (login.steps || [])) {
if (step.action === 'type' && step.selector && step.value) {
@@ -35,7 +35,7 @@ async function fetchWithBrowser(url, parseRules) {
await page.waitForSelector(step.selector, { timeout: 10000 });
await page.click(step.selector);
} else if (step.wait) {
await page.waitForTimeout(step.wait);
await new Promise(r => setTimeout(r, step.wait));
}
}
@@ -49,7 +49,7 @@ async function fetchWithBrowser(url, parseRules) {
// JS 렌더링 대기
const waitMs = parseRules.wait || 3000;
await page.waitForTimeout(waitMs);
await new Promise(r => setTimeout(r, waitMs));
// 특정 셀렉터가 나타날 때까지 대기
if (parseRules.wait_for) {