#!/usr/bin/env python3 """ Content Parser for Locust E2E Test Generation. Scans the content/ directory, parses lesson markdown files, extracts test-relevant data, and writes test_data.json. Also injects a LOCUST_TEST token into tokens_siswa.csv if not present. Usage (from elemes/load-test/): python content_parser.py python content_parser.py --content-dir ../../content --tokens-file ../../tokens_siswa.csv --num-tokens 50 """ import argparse import csv import json import os import re import sys import uuid # --------------------------------------------------------------------------- # Marker extraction (mirrors lesson_service.py logic) # --------------------------------------------------------------------------- def extract_section(content: str, start_marker: str, end_marker: str) -> tuple[str, str]: """Extract text between markers. Returns (extracted, remaining).""" if start_marker not in content or end_marker not in content: return "", content start_idx = content.find(start_marker) end_idx = content.find(end_marker) if start_idx == -1 or end_idx == -1 or end_idx <= start_idx: return "", content extracted = content[start_idx + len(start_marker):end_idx].strip() remaining = content[:start_idx] + content[end_idx + len(end_marker):] return extracted, remaining def detect_lesson_type(content: str) -> str: """Detect lesson type from markers present in content.""" has_arduino = '---INITIAL_CODE_ARDUINO---' in content has_c = '---INITIAL_CODE---' in content has_python = '---INITIAL_PYTHON---' in content has_circuit = '---INITIAL_CIRCUIT---' in content has_quiz = '---INITIAL_QUIZ---' in content has_velxio = '---VELXIO_CIRCUIT---' in content if has_arduino or (has_velxio and not has_c and not has_python): return 'arduino' if has_quiz: return 'quiz' if has_c and has_circuit: return 'hybrid' if has_circuit and not has_c and not has_python: return 'circuit' if has_python and not has_c: return 'python' return 'c' def parse_lesson(filepath: str) -> dict: """Parse a single lesson markdown file and extract test data.""" with open(filepath, 'r', encoding='utf-8') as f: content = f.read() slug = os.path.basename(filepath).replace('.md', '') lesson_type = detect_lesson_type(content) # Extract all relevant sections initial_code_c, _ = extract_section(content, '---INITIAL_CODE---', '---END_INITIAL_CODE---') initial_python, _ = extract_section(content, '---INITIAL_PYTHON---', '---END_INITIAL_PYTHON---') initial_code_arduino, _ = extract_section(content, '---INITIAL_CODE_ARDUINO---', '---END_INITIAL_CODE_ARDUINO---') velxio_circuit, _ = extract_section(content, '---VELXIO_CIRCUIT---', '---END_VELXIO_CIRCUIT---') expected_output, _ = extract_section(content, '---EXPECTED_OUTPUT---', '---END_EXPECTED_OUTPUT---') expected_output_python, _ = extract_section(content, '---EXPECTED_OUTPUT_PYTHON---', '---END_EXPECTED_OUTPUT_PYTHON---') expected_serial, _ = extract_section(content, '---EXPECTED_SERIAL_OUTPUT---', '---END_EXPECTED_SERIAL_OUTPUT---') expected_wiring, _ = extract_section(content, '---EXPECTED_WIRING---', '---END_EXPECTED_WIRING---') key_text, _ = extract_section(content, '---KEY_TEXT---', '---END_KEY_TEXT---') solution_code, _ = extract_section(content, '---SOLUTION_CODE---', '---END_SOLUTION_CODE---') solution_python, _ = extract_section(content, '---SOLUTION_PYTHON---', '---END_SOLUTION_PYTHON---') # A lesson is compilable if it has solution code or Arduino initial code is_compilable = bool(solution_code or solution_python or initial_code_arduino) data = { 'slug': slug, 'type': lesson_type, 'has_c': bool(initial_code_c), 'has_python': bool(initial_python), 'has_circuit': '---INITIAL_CIRCUIT---' in content, 'has_arduino': bool(initial_code_arduino), 'has_velxio': bool(velxio_circuit), 'compilable': is_compilable, 'key_text': key_text, } # Add type-specific fields if initial_code_c: data['initial_code_c'] = initial_code_c if solution_code: data['solution_code'] = solution_code if initial_python: data['initial_python'] = initial_python if solution_python: data['solution_python'] = solution_python if expected_output: data['expected_output'] = expected_output if expected_output_python: data['expected_output_python'] = expected_output_python if initial_code_arduino: data['initial_code_arduino'] = initial_code_arduino if velxio_circuit: data['velxio_circuit'] = velxio_circuit if expected_serial: data['expected_serial'] = expected_serial if expected_wiring: data['expected_wiring'] = expected_wiring return data def get_ordered_slugs(content_dir: str) -> list[str]: """Get lesson slugs in order from home.md's Available_Lessons section.""" home_path = os.path.join(content_dir, 'home.md') if not os.path.exists(home_path): return [] with open(home_path, 'r', encoding='utf-8') as f: home_content = f.read() parts = home_content.split('----Available_Lessons----') if len(parts) <= 1: # Try alternate separator parts = home_content.split('---Available_Lessons---') if len(parts) <= 1: return [] links = re.findall(r'\[([^\]]+)\]\((?:lesson/)?([^\)]+)\)', parts[1]) return [fn.replace('.md', '') for _, fn in links] # --------------------------------------------------------------------------- # Token injection # --------------------------------------------------------------------------- def ensure_test_tokens(tokens_file: str, lesson_slugs: list[str], num_tokens: int) -> list[str]: """ Ensure a specified number of test tokens exist in tokens_siswa.csv. Returns a list of token strings. """ existing_tokens = [] rows = [] fieldnames = [] if os.path.exists(tokens_file): with open(tokens_file, 'r', newline='', encoding='utf-8') as f: reader = csv.DictReader(f, delimiter=';') fieldnames = reader.fieldnames or [] rows = list(reader) for row in rows: if row.get('token', '').startswith('LOCUST_TEST_'): existing_tokens.append(row['token']) if not fieldnames: fieldnames = ['token', 'nama_siswa'] + lesson_slugs tokens_to_return = existing_tokens[:num_tokens] if len(tokens_to_return) < num_tokens: needed = num_tokens - len(tokens_to_return) new_tokens = [] for i in range(needed): token = f"LOCUST_TEST_{uuid.uuid4().hex[:8]}" new_tokens.append(token) new_row = {'token': token, 'nama_siswa': f"Locust Bot {len(tokens_to_return) + i + 1}"} for slug in lesson_slugs: if slug not in fieldnames: fieldnames.append(slug) new_row[slug] = 'not_started' rows.append(new_row) with open(tokens_file, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter=';') writer.writeheader() writer.writerows(rows) print(f" ✚ Injected {len(new_tokens)} new test tokens.") tokens_to_return.extend(new_tokens) else: print(f" ♻ Reusing {num_tokens} existing test tokens.") return tokens_to_return # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def _get_teacher_token(tokens_file: str) -> str: """Get the teacher token (first data row).""" if not os.path.exists(tokens_file): return "" with open(tokens_file, 'r', newline='', encoding='utf-8') as f: reader = csv.DictReader(f, delimiter=';') for row in reader: return row.get('token', '') return "" def main(): parser = argparse.ArgumentParser(description='Parse content/ for Locust test generation') parser.add_argument('--content-dir', default='../../content', help='Path to content directory (default: ../../content)') parser.add_argument('--tokens-file', default='../../tokens_siswa.csv', help='Path to tokens CSV (default: ../../tokens_siswa.csv)') parser.add_argument('--num-tokens', type=int, default=50, help='Number of test tokens to generate (default: 50)') parser.add_argument('--output', default='test_data.json', help='Output JSON file (default: test_data.json)') args = parser.parse_args() content_dir = os.path.abspath(args.content_dir) tokens_file = os.path.abspath(args.tokens_file) print(f"\n{'='*60}") print(f" Elemes Content Parser for Locust E2E Testing") print(f"{'='*60}") print(f" Content dir : {content_dir}") print(f" Tokens file : {tokens_file}") print(f" Num tokens : {args.num_tokens}") print(f" Output : {args.output}") print() # 1. Get ordered lesson slugs ordered_slugs = get_ordered_slugs(content_dir) if not ordered_slugs: # Fallback: scan directory ordered_slugs = [ f.replace('.md', '') for f in sorted(os.listdir(content_dir)) if f.endswith('.md') and f != 'home.md' ] print(f" 📚 Found {len(ordered_slugs)} lessons:") # 2. Parse each lesson lessons = [] for slug in ordered_slugs: filepath = os.path.join(content_dir, f'{slug}.md') if not os.path.exists(filepath): print(f" ⚠ {slug}.md not found, skipping") continue lesson = parse_lesson(filepath) lessons.append(lesson) # Summary icon per type icons = { 'c': '🔧', 'python': '🐍', 'hybrid': '🔀', 'circuit': '⚡', 'arduino': '🤖', 'quiz': '❓' } icon = icons.get(lesson['type'], '📄') compilable = '✓ compile' if lesson.get('compilable') else '✗ compile' print(f" {icon} {slug} [{lesson['type']}] {compilable}") # 3. Inject test tokens print() tokens = ensure_test_tokens(tokens_file, ordered_slugs, args.num_tokens) # 4. Build output test_data = { 'generated_by': 'content_parser.py', 'tokens': tokens, 'teacher_token': _get_teacher_token(tokens_file), 'lessons': lessons, 'stats': { 'total': len(lessons), 'c': sum(1 for l in lessons if l['type'] == 'c'), 'python': sum(1 for l in lessons if l['type'] == 'python'), 'hybrid': sum(1 for l in lessons if l['type'] == 'hybrid'), 'circuit': sum(1 for l in lessons if l['type'] == 'circuit'), 'arduino': sum(1 for l in lessons if l['type'] == 'arduino'), 'quiz': sum(1 for l in lessons if l['type'] == 'quiz'), 'compilable': sum(1 for l in lessons if l.get('compilable')), } } # 5. Write output with open(args.output, 'w', encoding='utf-8') as f: json.dump(test_data, f, indent=2, ensure_ascii=False) print(f"\n ✅ Wrote {args.output}") print(f" {test_data['stats']['total']} lessons " f"({test_data['stats']['compilable']} compilable)") print(f"\n Next: locust -f locustfile.py") print(f"{'='*60}\n") if __name__ == '__main__': main()