Files
crawlmanager/views/admin/mapper.ejs
T
chpark 6cd3bc218f feat: Puppeteer 헤드리스 브라우저 크롤링 지원
- JS 렌더링 대기 (wait ms 설정)
- 로그인 자동화 (아이디/비번 입력 → 버튼 클릭)
- 비주얼 매퍼에 JS렌더링 체크박스 + 로그인 설정 UI
- Dockerfile에 Chromium 설치
- parse_rules.browser=true 시 Puppeteer 사용
2026-03-31 11:43:27 +09:00

448 lines
23 KiB
Plaintext

<%- include('layout', { page: 'mapper', pageTitle: '비주얼 매퍼', body: `
<style>
.mapper-wrap{display:grid;grid-template-columns:1fr 340px;gap:1rem;height:calc(100vh - 120px)}
.preview-panel{background:var(--glass-bg);backdrop-filter:var(--glass-blur);-webkit-backdrop-filter:var(--glass-blur);border:1px solid var(--glass-border);border-radius:var(--radius);overflow:hidden;display:flex;flex-direction:column;box-shadow:var(--glass-shadow)}
.preview-panel .toolbar{padding:.6rem .8rem;border-bottom:1px solid var(--glass-border);display:flex;gap:.5rem;align-items:center;flex-shrink:0;background:rgba(255,255,255,.02)}
.preview-panel .toolbar input{flex:1}
.preview-panel iframe{flex:1;width:100%;border:none;background:#fff}
.config-panel{display:flex;flex-direction:column;gap:.8rem;overflow-y:auto}
.step-card{background:var(--glass-bg);backdrop-filter:var(--glass-blur);-webkit-backdrop-filter:var(--glass-blur);border:1px solid var(--glass-border);border-radius:var(--radius);padding:1rem;box-shadow:var(--glass-shadow);transition:all .2s}
.step-card:hover{background:var(--glass-bg-hover);border-color:var(--glass-border-hover)}
.step-card h3{font-size:.9rem;margin-bottom:.6rem;color:var(--primary)}
.step-card .step-num{display:inline-block;background:linear-gradient(135deg,var(--primary),#7c3aed);color:#fff;width:22px;height:22px;border-radius:50%;text-align:center;line-height:22px;font-size:.75rem;font-weight:700;margin-right:.4rem;box-shadow:0 2px 8px rgba(99,102,241,.3)}
.field-row{display:flex;align-items:center;gap:.5rem;margin-bottom:.5rem;padding:.4rem .6rem;background:rgba(0,0,0,.2);border:1px solid rgba(255,255,255,.05);border-radius:8px;font-size:.82rem}
.field-row .field-name{width:70px;font-weight:600;flex-shrink:0}
.field-row .field-selector{flex:1;color:var(--muted);font-family:monospace;font-size:.75rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
.field-row .field-selector.mapped{color:var(--success)}
.field-row button{flex-shrink:0}
.mapping-active{outline:3px solid var(--primary)!important;animation:pulse .8s infinite}
@keyframes pulse{0%,100%{outline-color:var(--primary)}50%{outline-color:var(--success)}}
.json-preview{background:rgba(0,0,0,.3);border:1px solid var(--glass-border);padding:.8rem;border-radius:var(--radius);font-size:.72rem;font-family:monospace;white-space:pre-wrap;max-height:200px;overflow:auto;color:var(--success)}
</style>
<div class="mapper-wrap">
<!-- 왼쪽: 페이지 미리보기 -->
<div class="preview-panel">
<div class="toolbar">
<input id="m-url" placeholder="크롤링할 URL을 입력하세요" value="">
<label style="display:inline-flex;align-items:center;gap:.3rem;font-size:.75rem;white-space:nowrap;cursor:pointer">
<input type="checkbox" id="m-browser" style="width:auto"> JS렌더링
</label>
<button class="btn btn-primary btn-sm" onclick="fetchPage()" id="btn-fetch">페이지 가져오기</button>
</div>
<iframe id="preview-frame" sandbox="allow-same-origin allow-scripts allow-popups"></iframe>
<div id="status-bar" style="padding:.4rem .8rem;font-size:.75rem;color:var(--muted);border-top:1px solid var(--border);flex-shrink:0">
URL을 입력하고 "페이지 가져오기"를 클릭하세요
</div>
</div>
<!-- 오른쪽: 설정 패널 -->
<div class="config-panel">
<!-- 크롤링 옵션 -->
<div class="step-card" id="step-options">
<h3><span class="step-num">0</span> 크롤링 옵션</h3>
<div class="field-row">
<span class="field-name">JS대기</span>
<input id="m-wait" type="number" value="3000" min="0" step="500" style="width:80px;padding:.2rem .4rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"> ms
</div>
<div style="margin-top:.5rem">
<label style="font-size:.78rem;color:var(--muted);display:flex;align-items:center;gap:.3rem;cursor:pointer">
<input type="checkbox" id="m-login-enable" style="width:auto" onchange="toggleLogin()"> 로그인 필요
</label>
</div>
<div id="login-fields" style="display:none;margin-top:.6rem">
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">로그인 URL</label><input id="m-login-url" placeholder="https://..." style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">아이디 셀렉터</label><input id="m-login-user-sel" placeholder="#username, input[name=id]" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">아이디 값</label><input id="m-login-user-val" placeholder="myuser" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">비밀번호 셀렉터</label><input id="m-login-pass-sel" placeholder="#password, input[name=pw]" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:.4rem"><label style="font-size:.72rem">비밀번호 값</label><input id="m-login-pass-val" type="password" placeholder="****" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
<div class="form-group" style="margin-bottom:0"><label style="font-size:.72rem">로그인 버튼 셀렉터</label><input id="m-login-btn-sel" placeholder="button[type=submit]" style="padding:.3rem .5rem;font-size:.78rem;background:rgba(0,0,0,.3);border:1px solid rgba(255,255,255,.1);border-radius:4px;color:var(--text)"></div>
</div>
</div>
<!-- Step 1: 데이터 타입 -->
<div class="step-card">
<h3><span class="step-num">1</span> 데이터 타입</h3>
<div class="form-row" style="gap:.5rem">
<button class="btn btn-sm" id="btn-type-list" onclick="setDataType('list')" style="flex:1">목록 (순위/리스트)</button>
<button class="btn btn-sm" id="btn-type-landing" onclick="setDataType('landing')" style="flex:1">랜딩 (본문)</button>
</div>
</div>
<!-- Step 2: 매핑 (목록) -->
<div class="step-card" id="step-list" style="display:none">
<h3><span class="step-num">2</span> 목록 매핑</h3>
<p style="font-size:.78rem;color:var(--muted);margin-bottom:.6rem">미리보기에서 <strong>반복되는 행(row)</strong> 하나를 클릭하세요</p>
<div class="field-row">
<span class="field-name">컨테이너</span>
<span class="field-selector" id="sel-container">클릭으로 선택</span>
<button class="btn btn-outline btn-sm" onclick="startMapping('container')">선택</button>
</div>
<hr style="border-color:var(--border);margin:.6rem 0">
<p style="font-size:.78rem;color:var(--muted);margin-bottom:.6rem">컨테이너 내 각 필드를 클릭하세요</p>
<div class="field-row">
<span class="field-name">제목</span>
<span class="field-selector" id="sel-name">-</span>
<button class="btn btn-outline btn-sm" onclick="startMapping('name')">선택</button>
</div>
<div class="field-row">
<span class="field-name">URL</span>
<span class="field-selector" id="sel-url">-</span>
<button class="btn btn-outline btn-sm" onclick="startMapping('url')">선택</button>
</div>
<div class="field-row">
<span class="field-name">URL텍스트</span>
<span class="field-selector" id="sel-url_text">-</span>
<button class="btn btn-outline btn-sm" onclick="startMapping('url_text')">선택</button>
</div>
<div class="field-row">
<span class="field-name">순위</span>
<span class="field-selector" id="sel-rank">-</span>
<button class="btn btn-outline btn-sm" onclick="startMapping('rank')">선택</button>
</div>
<div class="field-row">
<span class="field-name">특징</span>
<span class="field-selector" id="sel-features">-</span>
<button class="btn btn-outline btn-sm" onclick="startMapping('features')">선택</button>
</div>
</div>
<!-- Step 2: 매핑 (랜딩) -->
<div class="step-card" id="step-landing" style="display:none">
<h3><span class="step-num">2</span> 본문 영역 선택</h3>
<p style="font-size:.78rem;color:var(--muted);margin-bottom:.6rem">미리보기에서 <strong>본문 콘텐츠 영역</strong>을 클릭하세요</p>
<div class="field-row">
<span class="field-name">본문</span>
<span class="field-selector" id="sel-content">클릭으로 선택</span>
<button class="btn btn-outline btn-sm" onclick="startMapping('content')">선택</button>
</div>
</div>
<!-- Step 3: 사이트 정보 -->
<div class="step-card" id="step-save" style="display:none">
<h3><span class="step-num">3</span> 사이트 저장</h3>
<div class="form-group"><label>사이트명 *</label><input id="m-name" placeholder="예: 토렌트 순위"></div>
<div class="form-group"><label>슬러그</label><input id="m-slug" placeholder="예: torrent-rank"></div>
<div class="form-group"><label>설명</label><input id="m-desc" placeholder="사이트 설명"></div>
<div class="form-group"><label>AdSense</label><select id="m-adsense"><option value="">없음</option></select></div>
<div class="form-group">
<label>생성된 파싱 규칙</label>
<div class="json-preview" id="json-preview">{}</div>
</div>
<button class="btn btn-primary" onclick="saveMappedSite()" style="width:100%">사이트 저장 &amp; 크롤링</button>
</div>
</div>
</div>
<script id="mapper-inject-script" type="text/template">
(function(){
var highlight = null;
var overlay = document.createElement("div");
overlay.id = "__mapper_overlay__";
overlay.style.cssText = "position:fixed;pointer-events:none;border:3px solid #6366f1;background:rgba(99,102,241,.12);z-index:999999;transition:all .1s;display:none;border-radius:4px";
document.body.appendChild(overlay);
var label = document.createElement("div");
label.style.cssText = "position:fixed;z-index:999999;background:#6366f1;color:#fff;font-size:11px;padding:2px 8px;border-radius:4px;pointer-events:none;display:none;font-family:monospace";
document.body.appendChild(label);
document.addEventListener("mousemove", function(e) {
var el = e.target;
if (el.id === "__mapper_overlay__" || el === label) return;
highlight = el;
var r = el.getBoundingClientRect();
overlay.style.display = "block";
overlay.style.left = r.left + "px";
overlay.style.top = r.top + "px";
overlay.style.width = r.width + "px";
overlay.style.height = r.height + "px";
label.style.display = "block";
label.style.left = r.left + "px";
label.style.top = Math.max(0, r.top - 22) + "px";
label.textContent = getSelector(el);
});
document.addEventListener("click", function(e) {
e.preventDefault();
e.stopPropagation();
if (!highlight) return;
var sel = getSelector(highlight);
var text = (highlight.textContent || "").trim().substring(0, 80);
var tag = highlight.tagName.toLowerCase();
var href = highlight.getAttribute("href") || "";
window.parent.postMessage({type:"element-selected", selector:sel, text:text, tag:tag, href:href}, "*");
}, true);
function getSelector(el) {
if (el.id && el.id.indexOf("__") !== 0) return "#" + el.id;
var path = [];
while (el && el.nodeType === 1) {
var s = el.tagName.toLowerCase();
if (el.id && el.id.indexOf("__") !== 0) { path.unshift("#" + el.id); break; }
if (el.className && typeof el.className === "string") {
var cls = el.className.trim().split(" ").filter(function(c){ return c && c.indexOf("__") !== 0 && c.length < 40; }).slice(0, 2);
if (cls.length) s += "." + cls.join(".");
}
var sib = el.parentNode ? Array.from(el.parentNode.children).filter(function(c){ return c.tagName === el.tagName; }) : [];
if (sib.length > 1) { s += ":nth-child(" + (Array.from(el.parentNode.children).indexOf(el) + 1) + ")"; }
path.unshift(s);
el = el.parentNode;
if (path.length > 4) break;
}
return path.join(" > ");
}
})();
</script>
<script>
var adsenseList = (__INIT__ || {}).adsense || [];
var dataType = null;
var mappingField = null;
var mappings = {};
var containerSelector = null;
var pageLoaded = false;
// AdSense 드롭다운
(function(){
var sel = document.getElementById('m-adsense');
sel.innerHTML = '<option value="">없음</option>' + adsenseList.map(function(a){
return '<option value="'+a.id+'">'+a.name+'</option>';
}).join('');
})();
// === 페이지 가져오기 ===
async function fetchPage() {
var url = document.getElementById('m-url').value.trim();
if (!url) { toast('URL을 입력하세요','error'); return; }
var btn = document.getElementById('btn-fetch');
btn.disabled = true; btn.textContent = '로딩...';
document.getElementById('status-bar').textContent = '페이지 로딩 중...';
try {
var useBrowser = document.getElementById('m-browser').checked;
var waitMs = parseInt(document.getElementById('m-wait').value) || 3000;
var fetchBody = { url: url };
if (useBrowser) {
fetchBody.browser = true;
fetchBody.wait = waitMs;
// 로그인 설정
if (document.getElementById('m-login-enable').checked) {
fetchBody.login = {
url: document.getElementById('m-login-url').value.trim() || url,
steps: [
{ action: 'type', selector: document.getElementById('m-login-user-sel').value, value: document.getElementById('m-login-user-val').value },
{ action: 'type', selector: document.getElementById('m-login-pass-sel').value, value: document.getElementById('m-login-pass-val').value },
{ action: 'click', selector: document.getElementById('m-login-btn-sel').value },
{ wait: 2000 }
]
};
}
document.getElementById('status-bar').textContent = '브라우저 모드 로딩 중... (JS 렌더링 대기 ' + waitMs + 'ms)';
}
var resp = await fetch('/api/fetch-page', {
method: 'POST', headers: {'Content-Type':'application/json'}, credentials: 'same-origin',
body: JSON.stringify(fetchBody)
});
if (!resp.ok) { var err = await resp.json().catch(function(){return {error:'HTTP '+resp.status}}); throw new Error(err.error || 'HTTP '+resp.status); }
var res = await resp.json();
if (res.error) throw new Error(res.error);
var frame = document.getElementById('preview-frame');
var html = res.html;
// base 태그 주입 (이미지/CSS 경로 해결)
var baseUrl = new URL(url);
var baseTag = '<base href="' + baseUrl.origin + '/">';
html = html.replace(/<head([^>]*)>/i, '<head$1>' + baseTag);
// Mixed Content 방지: http → https 변환
while (html.indexOf('src="http://') !== -1) html = html.replace('src="http://', 'src="https://');
while (html.indexOf("src='http://") !== -1) html = html.replace("src='http://", "src='https://");
while (html.indexOf('href="http://') !== -1) html = html.replace('href="http://', 'href="https://');
while (html.indexOf("href='http://") !== -1) html = html.replace("href='http://", "href='https://");
// iframe에 매퍼 스크립트 주입
var mapperScript = getMapperScript();
html = html.replace(new RegExp('</' + 'body>', 'i'), mapperScript + '</' + 'body>');
frame.srcdoc = html;
pageLoaded = true;
document.getElementById('status-bar').textContent = '페이지 로드 완료. 데이터 타입을 선택하세요.';
toast('페이지 로드 완료');
} catch (e) {
toast('로드 실패: ' + e.message, 'error');
document.getElementById('status-bar').textContent = '로드 실패: ' + e.message;
}
btn.disabled = false; btn.textContent = '페이지 가져오기';
}
// === iframe 내부에 주입할 스크립트 (template 태그에서 읽기) ===
function getMapperScript() {
var code = document.getElementById('mapper-inject-script').textContent;
return '<scr' + 'ipt>' + code + '</scr' + 'ipt>';
}
// === 데이터 타입 선택 ===
function setDataType(type) {
dataType = type;
mappings = {};
containerSelector = null;
document.getElementById('btn-type-list').className = 'btn btn-sm ' + (type === 'list' ? 'btn-primary' : 'btn-outline');
document.getElementById('btn-type-landing').className = 'btn btn-sm ' + (type === 'landing' ? 'btn-primary' : 'btn-outline');
document.getElementById('step-list').style.display = type === 'list' ? 'block' : 'none';
document.getElementById('step-landing').style.display = type === 'landing' ? 'block' : 'none';
document.getElementById('step-save').style.display = 'block';
// reset displays
['container','name','url','url_text','rank','features','content'].forEach(function(f) {
var el = document.getElementById('sel-' + f);
if (el) { el.textContent = '-'; el.className = 'field-selector'; }
});
updateJson();
document.getElementById('status-bar').textContent = '미리보기에서 요소를 클릭하여 매핑하세요';
}
// === 매핑 시작 ===
function startMapping(field) {
if (!pageLoaded) { toast('먼저 페이지를 가져오세요', 'error'); return; }
mappingField = field;
// 모든 버튼 상태 초기화
document.querySelectorAll('.field-row button').forEach(function(b) { b.className = 'btn btn-outline btn-sm'; });
event.target.className = 'btn btn-primary btn-sm mapping-active';
document.getElementById('status-bar').textContent = '미리보기에서 "' + field + '" 에 해당하는 요소를 클릭하세요';
}
// === iframe에서 선택된 요소 수신 ===
window.addEventListener('message', function(e) {
if (!e.data || e.data.type !== 'element-selected' || !mappingField) return;
var selector = e.data.selector;
var text = e.data.text;
if (mappingField === 'container') {
// 컨테이너: 부모의 반복 자식 패턴을 자동 감지
containerSelector = selector;
document.getElementById('sel-container').textContent = selector;
document.getElementById('sel-container').className = 'field-selector mapped';
} else if (mappingField === 'content') {
mappings.content_selector = selector;
document.getElementById('sel-content').textContent = selector;
document.getElementById('sel-content').className = 'field-selector mapped';
} else if (mappingField === 'url') {
// URL: href 속성이면 attr 타입
mappings.url = { selector: selector, type: e.data.tag === 'a' ? 'attr' : 'text', attr: 'href' };
document.getElementById('sel-url').textContent = selector + (e.data.href ? ' [href]' : '');
document.getElementById('sel-url').className = 'field-selector mapped';
} else {
mappings[mappingField] = { selector: selector, type: 'text' };
var el = document.getElementById('sel-' + mappingField);
if (el) {
el.textContent = selector + ' → "' + text.substring(0, 30) + '"';
el.className = 'field-selector mapped';
}
}
// 매핑 완료 후 버튼 초기화
mappingField = null;
document.querySelectorAll('.field-row button').forEach(function(b) { b.className = 'btn btn-outline btn-sm'; });
document.getElementById('status-bar').textContent = '매핑 완료! 다른 필드를 선택하거나 저장하세요.';
updateJson();
});
// === 로그인 필드 토글 ===
function toggleLogin() {
document.getElementById('login-fields').style.display = document.getElementById('m-login-enable').checked ? 'block' : 'none';
}
// === JSON 미리보기 업데이트 ===
function updateJson() {
var rules = {};
// 브라우저 모드
if (document.getElementById('m-browser').checked) {
rules.browser = true;
rules.wait = parseInt(document.getElementById('m-wait').value) || 3000;
}
// 로그인
if (document.getElementById('m-login-enable').checked) {
var userSel = document.getElementById('m-login-user-sel').value;
var passSel = document.getElementById('m-login-pass-sel').value;
var btnSel = document.getElementById('m-login-btn-sel').value;
if (userSel && passSel) {
rules.login = {
url: document.getElementById('m-login-url').value.trim(),
steps: [
{ action: 'type', selector: userSel, value: document.getElementById('m-login-user-val').value },
{ action: 'type', selector: passSel, value: document.getElementById('m-login-pass-val').value },
{ action: 'click', selector: btnSel || 'button[type=submit]' },
{ wait: 2000 }
]
};
}
}
if (dataType === 'landing') {
rules.content_selector = mappings.content_selector || 'body';
rules.remove_selectors = 'script, style, iframe, nav, header, footer, .ad, .ads, .sidebar';
rules.meta = {
title: { selector: 'title', type: 'text' },
description: { selector: 'meta[name=description]', type: 'attr', attr: 'content' }
};
} else if (dataType === 'list') {
if (containerSelector) rules.container = containerSelector;
rules.fields = {};
['name','url','url_text','rank','features'].forEach(function(f) {
if (mappings[f]) {
var sel = mappings[f].selector;
if (containerSelector && sel.indexOf(containerSelector) === 0) {
sel = sel.substring(containerSelector.length).replace(/^\s*>\s*/, '');
}
rules.fields[f] = { selector: sel || mappings[f].selector, type: mappings[f].type || 'text' };
if (mappings[f].attr) rules.fields[f].attr = mappings[f].attr;
}
});
rules.meta = { title: { selector: 'title', type: 'text' } };
}
document.getElementById('json-preview').textContent = JSON.stringify(rules, null, 2);
}
// === 사이트 저장 ===
async function saveMappedSite() {
var name = document.getElementById('m-name').value.trim();
var url = document.getElementById('m-url').value.trim();
if (!name) { toast('사이트명을 입력하세요', 'error'); return; }
if (!url) { toast('URL을 입력하세요', 'error'); return; }
var rules;
try { rules = JSON.parse(document.getElementById('json-preview').textContent); } catch(e) { toast('JSON 오류', 'error'); return; }
var data = {
name: name,
url: url,
slug: document.getElementById('m-slug').value.trim() || null,
description: document.getElementById('m-desc').value.trim(),
parse_rules: rules,
template: dataType === 'landing' ? 'landing' : 'default',
adsense_config_id: document.getElementById('m-adsense').value || null
};
try {
// 사이트 생성
var site = await api('POST', '/api/sites', data);
if (site.error) throw new Error(site.error);
toast('사이트 생성 완료! 크롤링 시작...');
// 즉시 크롤링
var crawlResult = await api('POST', '/api/sites/' + site.id + '/crawl');
if (crawlResult.error) throw new Error(crawlResult.error);
toast('크롤링 완료! ' + (crawlResult.itemCount || 0) + '개 항목 추출');
// 사이트 관리 페이지로 이동
setTimeout(function() { location.href = '/admin/sites'; }, 1500);
} catch (e) {
toast('저장 실패: ' + e.message, 'error');
}
}
</script>
` }) %>