feat: Puppeteer 헤드리스 브라우저 크롤링 지원

- JS 렌더링 대기 (wait ms 설정)
- 로그인 자동화 (아이디/비번 입력 → 버튼 클릭)
- 비주얼 매퍼에 JS렌더링 체크박스 + 로그인 설정 UI
- Dockerfile에 Chromium 설치
- parse_rules.browser=true 시 Puppeteer 사용
This commit is contained in:
chpark
2026-03-31 11:43:27 +09:00
parent da7ebe03c6
commit 6cd3bc218f
5 changed files with 187 additions and 7 deletions
+76 -2
View File
@@ -26,6 +26,9 @@
<div class="preview-panel">
<div class="toolbar">
<input id="m-url" placeholder="크롤링할 URL을 입력하세요" value="">
<label style="display:inline-flex;align-items:center;gap:.3rem;font-size:.75rem;white-space:nowrap;cursor:pointer">
<input type="checkbox" id="m-browser" style="width:auto"> JS렌더링
</label>
<button class="btn btn-primary btn-sm" onclick="fetchPage()" id="btn-fetch">페이지 가져오기</button>
</div>
<iframe id="preview-frame" sandbox="allow-same-origin allow-scripts allow-popups"></iframe>
@@ -36,6 +39,28 @@
<!-- 오른쪽: 설정 패널 -->
<div class="config-panel">
<!-- 크롤링 옵션 -->
<div class="step-card" id="step-options">
<h3><span class="step-num">0</span> 크롤링 옵션</h3>
<div class="field-row">
<span class="field-name">JS대기</span>
<input id="m-wait" type="number" value="3000" min="0" step="500" style="width:80px;padding:.2rem .4rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"> ms
</div>
<div style="margin-top:.5rem">
<label style="font-size:.78rem;color:var(--muted);display:flex;align-items:center;gap:.3rem;cursor:pointer">
<input type="checkbox" id="m-login-enable" style="width:auto" onchange="toggleLogin()"> 로그인 필요
</label>
</div>
<div id="login-fields" style="display:none;margin-top:.6rem">
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">로그인 URL</label><input id="m-login-url" placeholder="https://..." style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">아이디 셀렉터</label><input id="m-login-user-sel" placeholder="#username, input[name=id]" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">아이디 값</label><input id="m-login-user-val" placeholder="myuser" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">비밀번호 셀렉터</label><input id="m-login-pass-sel" placeholder="#password, input[name=pw]" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">비밀번호 값</label><input id="m-login-pass-val" type="password" placeholder="****" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:0"><label style="font-size:.72rem">로그인 버튼 셀렉터</label><input id="m-login-btn-sel" placeholder="button[type=submit]" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
</div>
</div>
<!-- Step 1: 데이터 타입 -->
<div class="step-card">
<h3><span class="step-num">1</span> 데이터 타입</h3>
@@ -195,9 +220,29 @@ async function fetchPage() {
document.getElementById('status-bar').textContent = '페이지 로딩 중...';
try {
var useBrowser = document.getElementById('m-browser').checked;
var waitMs = parseInt(document.getElementById('m-wait').value) || 3000;
var fetchBody = { url: url };
if (useBrowser) {
fetchBody.browser = true;
fetchBody.wait = waitMs;
// 로그인 설정
if (document.getElementById('m-login-enable').checked) {
fetchBody.login = {
url: document.getElementById('m-login-url').value.trim() || url,
steps: [
{ action: 'type', selector: document.getElementById('m-login-user-sel').value, value: document.getElementById('m-login-user-val').value },
{ action: 'type', selector: document.getElementById('m-login-pass-sel').value, value: document.getElementById('m-login-pass-val').value },
{ action: 'click', selector: document.getElementById('m-login-btn-sel').value },
{ wait: 2000 }
]
};
}
document.getElementById('status-bar').textContent = '브라우저 모드 로딩 중... (JS 렌더링 대기 ' + waitMs + 'ms)';
}
var resp = await fetch('/api/fetch-page', {
method: 'POST', headers: {'Content-Type':'application/json'}, credentials: 'same-origin',
body: JSON.stringify({ url: url })
body: JSON.stringify(fetchBody)
});
if (!resp.ok) { var err = await resp.json().catch(function(){return {error:'HTTP '+resp.status}}); throw new Error(err.error || 'HTTP '+resp.status); }
var res = await resp.json();
@@ -303,9 +348,39 @@ window.addEventListener('message', function(e) {
updateJson();
});
// === 로그인 필드 토글 ===
function toggleLogin() {
document.getElementById('login-fields').style.display = document.getElementById('m-login-enable').checked ? 'block' : 'none';
}
// === JSON 미리보기 업데이트 ===
function updateJson() {
var rules = {};
// 브라우저 모드
if (document.getElementById('m-browser').checked) {
rules.browser = true;
rules.wait = parseInt(document.getElementById('m-wait').value) || 3000;
}
// 로그인
if (document.getElementById('m-login-enable').checked) {
var userSel = document.getElementById('m-login-user-sel').value;
var passSel = document.getElementById('m-login-pass-sel').value;
var btnSel = document.getElementById('m-login-btn-sel').value;
if (userSel && passSel) {
rules.login = {
url: document.getElementById('m-login-url').value.trim(),
steps: [
{ action: 'type', selector: userSel, value: document.getElementById('m-login-user-val').value },
{ action: 'type', selector: passSel, value: document.getElementById('m-login-pass-val').value },
{ action: 'click', selector: btnSel || 'button[type=submit]' },
{ wait: 2000 }
]
};
}
}
if (dataType === 'landing') {
rules.content_selector = mappings.content_selector || 'body';
rules.remove_selectors = 'script, style, iframe, nav, header, footer, .ad, .ads, .sidebar';
@@ -318,7 +393,6 @@ function updateJson() {
rules.fields = {};
['name','url','url_text','rank','features'].forEach(function(f) {
if (mappings[f]) {
// 컨테이너 기준 상대 셀렉터로 변환
var sel = mappings[f].selector;
if (containerSelector && sel.indexOf(containerSelector) === 0) {
sel = sel.substring(containerSelector.length).replace(/^\s*>\s*/, '');