OpenWebUI: python knowledge PDF CLI API upload
Jump to navigation
Jump to search
owui_upload_kb.py
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Upload PDF ke Open WebUI Knowledge via API (tanpa GUI). Fitur: - Buat knowledge collection (jika belum ada) - Upload banyak PDF (folder / list path) - Tambah setiap file ke collection - Anti-duplikat sederhana (skip jika server balas "Duplicate content") - Uji query RAG terhadap collection (opsional) Dok: - Upload file: POST /api/v1/files/ - Tambah ke knowledge: POST /api/v1/knowledge/{id}/file/add - Buat knowledge: POST /api/v1/knowledge/create """ import os import sys import time import json import glob import argparse import requests from typing import List, Optional def api_headers(api_key: str, accept_json: bool = True): h = {"Authorization": f"Bearer {api_key}"} if accept_json: h["Accept"] = "application/json" return h def create_knowledge(base_url: str, api_key: str, name: str, description: str = "") -> str: url = f"{base_url}/api/v1/knowledge/create" payload = {"name": name, "description": description} r = requests.post(url, headers={**api_headers(api_key), "Content-Type": "application/json"}, json=payload, timeout=120) r.raise_for_status() data = r.json() kb_id = data.get("id") if not kb_id: raise RuntimeError(f"Gagal membuat knowledge: {data}") return kb_id def upload_file(base_url: str, api_key: str, file_path: str) -> Optional[str]: url = f"{base_url}/api/v1/files/" with open(file_path, "rb") as f: files = {"file": (os.path.basename(file_path), f, "application/pdf")} r = requests.post(url, headers=api_headers(api_key), files=files, timeout=600) if r.status_code >= 400: # Banyak kasus gagal karena file khusus, PDF rusak, dsb. print(f"[WARN] Upload gagal: {file_path} -> {r.status_code} {r.text}") return None data = r.json() return data.get("id") # file_id def add_file_to_knowledge(base_url: str, api_key: str, knowledge_id: str, file_id: str) -> bool: url = f"{base_url}/api/v1/knowledge/{knowledge_id}/file/add" payload = {"file_id": file_id} r = requests.post(url, headers={**api_headers(api_key), "Content-Type": "application/json"}, json=payload, timeout=120) if r.status_code == 400 and "Duplicate content" in r.text: # Open WebUI bisa mengembalikan 400 duplicate bila konten sama sudah ada print(f"[INFO] Duplikat terdeteksi, skip file_id={file_id}") return False r.raise_for_status() return True def find_or_create_kb(base_url: str, api_key: str, kb_name: str, kb_description: str = "") -> str: """ Jika Anda sudah tahu UUID knowledge, langsung pakai. Kalau belum, cara termudah adalah buat baru dengan nama yang diinginkan. """ print(f"[STEP] Membuat knowledge '{kb_name}'") return create_knowledge(base_url, api_key, kb_name, kb_description) def collect_pdf_paths(input_path: str) -> List[str]: if os.path.isdir(input_path): # Ambil semua PDF di folder (tanpa rekursif). Ubah ke **/*.pdf jika ingin rekursif. return sorted(glob.glob(os.path.join(input_path, "*.pdf"))) elif os.path.isfile(input_path): return [input_path] else: raise FileNotFoundError(f"Path tidak ditemukan: {input_path}") def rag_test_query(base_url: str, api_key: str, model: str, kb_id: str, user_query: str) -> str: """ Gunakan chat completions + files:[{type:'collection', id:kb_id}] untuk uji RAG. """ url = f"{base_url}/api/chat/completions" payload = { "model": model, "messages": [{"role": "user", "content": user_query}], "files": [{"type": "collection", "id": kb_id}], } r = requests.post(url, headers={**api_headers(api_key), "Content-Type": "application/json"}, json=payload, timeout=600) r.raise_for_status() data = r.json() # Bentuk respons mengikuti OpenAI-compatible schema. try: return data["choices"][0]["message"]["content"] except Exception: return json.dumps(data, ensure_ascii=False, indent=2) def main(): p = argparse.ArgumentParser(description="Upload PDF ke Open WebUI Knowledge (tanpa GUI).") p.add_argument("--base-url", required=True, help="Contoh: http://localhost:3000") p.add_argument("--api-key", required=True, help="API Key dari Settings > Account") p.add_argument("--kb-name", required=True, help="Nama knowledge (collection) yang akan dibuat") p.add_argument("--kb-desc", default="", help="Deskripsi knowledge") p.add_argument("--input", required=True, help="Path ke file PDF atau folder berisi PDF") p.add_argument("--model", default="llama3.1", help="Nama model untuk uji RAG (opsional)") p.add_argument("--test-query", default="", help="Jika diisi, lakukan uji query RAG ke collection") p.add_argument("--sleep-after-upload", type=int, default=3, help="Delay (detik) antar upload untuk memberi waktu proses embedding") args = p.parse_args() base_url = args.base_url.rstrip("/") api_key = args.api_key # 1) Buat knowledge kb_id = find_or_create_kb(base_url, api_key, args.kb_name, args.kb_desc) print(f"[OK] Knowledge dibuat: {kb_id}") # 2) Kumpulkan PDF pdfs = collect_pdf_paths(args.input) if not pdfs: print("[WARN] Tidak ada PDF ditemukan.") sys.exit(0) print(f"[STEP] Menemukan {len(pdfs)} file PDF") # 3) Upload + tambahkan ke knowledge uploaded = 0 for path in pdfs: print(f"[STEP] Upload: {path}") file_id = upload_file(base_url, api_key, path) if not file_id: continue print(f"[OK] File terupload, file_id={file_id} -> tambah ke knowledge") try: add_file_to_knowledge(base_url, api_key, kb_id, file_id) uploaded += 1 except requests.HTTPError as e: print(f"[ERR] Gagal tambah ke knowledge: {e.response.status_code} {e.response.text}") # beri jeda kecil agar proses embedding tidak numpuk (praktis untuk CPU-only) time.sleep(args.sleep_after_upload) print(f"[DONE] Selesai. Total file berhasil diproses: {uploaded}/{len(pdfs)}") # 4) Uji RAG (opsional) if args.test_query: print(f"[TEST] Jalankan uji RAG model={args.model}") answer = rag_test_query(base_url, api_key, args.model, kb_id, args.test_query) print("\n=== JAWABAN RAG ===\n") print(answer) print("\n===================\n") if __name__ == "__main__": main()
owui-pakai
# 0) Set variabel (opsional) export OWUI_URL="http://localhost:3000" export OWUI_KEY="sk-xxxxxx" # 1) Jalankan: buat KB + upload semua PDF di folder ./dokumen python3 owui_upload_kb.py \ --base-url "$OWUI_URL" \ --api-key "$OWUI_KEY" \ --kb-name "Dokumen Kampus" \ --kb-desc "Koleksi PDF Peraturan & Panduan" \ --input "./dokumen" \ --model "llama3.1" \ --test-query "Ringkas poin penting dari semua dokumen tentang akreditasi BAN-PT." # Atau untuk satu file saja: python3 owui_upload_kb.py \ --base-url "$OWUI_URL" \ --api-key "$OWUI_KEY" \ --kb-name "RIP ITTS 2025-2050" \ --input "./RIP-ITTS-2025-2050.pdf" \ --test-query "Apa visi utama dokumen ini?"