307 lines
11 KiB
Python
307 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Content Parser for Locust E2E Test Generation.
|
|
|
|
Scans the content/ directory, parses lesson markdown files,
|
|
extracts test-relevant data, and writes test_data.json.
|
|
|
|
Also injects a LOCUST_TEST token into tokens_siswa.csv if not present.
|
|
|
|
Usage (from elemes/load-test/):
|
|
python content_parser.py
|
|
python content_parser.py --content-dir ../../content --tokens-file ../../tokens_siswa.csv --num-tokens 50
|
|
"""
|
|
|
|
import argparse
|
|
import csv
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import uuid
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Marker extraction (mirrors lesson_service.py logic)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def extract_section(content: str, start_marker: str, end_marker: str) -> tuple[str, str]:
|
|
"""Extract text between markers. Returns (extracted, remaining)."""
|
|
if start_marker not in content or end_marker not in content:
|
|
return "", content
|
|
|
|
start_idx = content.find(start_marker)
|
|
end_idx = content.find(end_marker)
|
|
if start_idx == -1 or end_idx == -1 or end_idx <= start_idx:
|
|
return "", content
|
|
|
|
extracted = content[start_idx + len(start_marker):end_idx].strip()
|
|
remaining = content[:start_idx] + content[end_idx + len(end_marker):]
|
|
return extracted, remaining
|
|
|
|
|
|
def detect_lesson_type(content: str) -> str:
|
|
"""Detect lesson type from markers present in content."""
|
|
has_arduino = '---INITIAL_CODE_ARDUINO---' in content
|
|
has_c = '---INITIAL_CODE---' in content
|
|
has_python = '---INITIAL_PYTHON---' in content
|
|
has_circuit = '---INITIAL_CIRCUIT---' in content
|
|
has_quiz = '---INITIAL_QUIZ---' in content
|
|
has_velxio = '---VELXIO_CIRCUIT---' in content
|
|
|
|
if has_arduino or (has_velxio and not has_c and not has_python):
|
|
return 'arduino'
|
|
if has_quiz:
|
|
return 'quiz'
|
|
if has_c and has_circuit:
|
|
return 'hybrid'
|
|
if has_circuit and not has_c and not has_python:
|
|
return 'circuit'
|
|
if has_python and not has_c:
|
|
return 'python'
|
|
return 'c'
|
|
|
|
|
|
def parse_lesson(filepath: str) -> dict:
|
|
"""Parse a single lesson markdown file and extract test data."""
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
slug = os.path.basename(filepath).replace('.md', '')
|
|
lesson_type = detect_lesson_type(content)
|
|
|
|
# Extract all relevant sections
|
|
initial_code_c, _ = extract_section(content, '---INITIAL_CODE---', '---END_INITIAL_CODE---')
|
|
initial_python, _ = extract_section(content, '---INITIAL_PYTHON---', '---END_INITIAL_PYTHON---')
|
|
initial_code_arduino, _ = extract_section(content, '---INITIAL_CODE_ARDUINO---', '---END_INITIAL_CODE_ARDUINO---')
|
|
velxio_circuit, _ = extract_section(content, '---VELXIO_CIRCUIT---', '---END_VELXIO_CIRCUIT---')
|
|
expected_output, _ = extract_section(content, '---EXPECTED_OUTPUT---', '---END_EXPECTED_OUTPUT---')
|
|
expected_output_python, _ = extract_section(content, '---EXPECTED_OUTPUT_PYTHON---', '---END_EXPECTED_OUTPUT_PYTHON---')
|
|
expected_serial, _ = extract_section(content, '---EXPECTED_SERIAL_OUTPUT---', '---END_EXPECTED_SERIAL_OUTPUT---')
|
|
expected_wiring, _ = extract_section(content, '---EXPECTED_WIRING---', '---END_EXPECTED_WIRING---')
|
|
key_text, _ = extract_section(content, '---KEY_TEXT---', '---END_KEY_TEXT---')
|
|
solution_code, _ = extract_section(content, '---SOLUTION_CODE---', '---END_SOLUTION_CODE---')
|
|
solution_python, _ = extract_section(content, '---SOLUTION_PYTHON---', '---END_SOLUTION_PYTHON---')
|
|
|
|
# A lesson is compilable if it has solution code or Arduino initial code
|
|
is_compilable = bool(solution_code or solution_python or initial_code_arduino)
|
|
|
|
data = {
|
|
'slug': slug,
|
|
'type': lesson_type,
|
|
'has_c': bool(initial_code_c),
|
|
'has_python': bool(initial_python),
|
|
'has_circuit': '---INITIAL_CIRCUIT---' in content,
|
|
'has_arduino': bool(initial_code_arduino),
|
|
'has_velxio': bool(velxio_circuit),
|
|
'compilable': is_compilable,
|
|
'key_text': key_text,
|
|
}
|
|
|
|
# Add type-specific fields
|
|
if initial_code_c:
|
|
data['initial_code_c'] = initial_code_c
|
|
if solution_code:
|
|
data['solution_code'] = solution_code
|
|
if initial_python:
|
|
data['initial_python'] = initial_python
|
|
if solution_python:
|
|
data['solution_python'] = solution_python
|
|
if expected_output:
|
|
data['expected_output'] = expected_output
|
|
if expected_output_python:
|
|
data['expected_output_python'] = expected_output_python
|
|
if initial_code_arduino:
|
|
data['initial_code_arduino'] = initial_code_arduino
|
|
if velxio_circuit:
|
|
data['velxio_circuit'] = velxio_circuit
|
|
if expected_serial:
|
|
data['expected_serial'] = expected_serial
|
|
if expected_wiring:
|
|
data['expected_wiring'] = expected_wiring
|
|
|
|
return data
|
|
|
|
|
|
def get_ordered_slugs(content_dir: str) -> list[str]:
|
|
"""Get lesson slugs in order from home.md's Available_Lessons section."""
|
|
home_path = os.path.join(content_dir, 'home.md')
|
|
if not os.path.exists(home_path):
|
|
return []
|
|
|
|
with open(home_path, 'r', encoding='utf-8') as f:
|
|
home_content = f.read()
|
|
|
|
parts = home_content.split('----Available_Lessons----')
|
|
if len(parts) <= 1:
|
|
# Try alternate separator
|
|
parts = home_content.split('---Available_Lessons---')
|
|
if len(parts) <= 1:
|
|
return []
|
|
|
|
links = re.findall(r'\[([^\]]+)\]\((?:lesson/)?([^\)]+)\)', parts[1])
|
|
return [fn.replace('.md', '') for _, fn in links]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Token injection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def ensure_test_tokens(tokens_file: str, lesson_slugs: list[str], num_tokens: int) -> list[str]:
|
|
"""
|
|
Ensure a specified number of test tokens exist in tokens_siswa.csv.
|
|
Returns a list of token strings.
|
|
"""
|
|
existing_tokens = []
|
|
rows = []
|
|
fieldnames = []
|
|
|
|
if os.path.exists(tokens_file):
|
|
with open(tokens_file, 'r', newline='', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f, delimiter=';')
|
|
fieldnames = reader.fieldnames or []
|
|
rows = list(reader)
|
|
|
|
for row in rows:
|
|
if row.get('token', '').startswith('LOCUST_TEST_'):
|
|
existing_tokens.append(row['token'])
|
|
|
|
if not fieldnames:
|
|
fieldnames = ['token', 'nama_siswa'] + lesson_slugs
|
|
|
|
tokens_to_return = existing_tokens[:num_tokens]
|
|
|
|
if len(tokens_to_return) < num_tokens:
|
|
needed = num_tokens - len(tokens_to_return)
|
|
new_tokens = []
|
|
for i in range(needed):
|
|
token = f"LOCUST_TEST_{uuid.uuid4().hex[:8]}"
|
|
new_tokens.append(token)
|
|
|
|
new_row = {'token': token, 'nama_siswa': f"Locust Bot {len(tokens_to_return) + i + 1}"}
|
|
for slug in lesson_slugs:
|
|
if slug not in fieldnames:
|
|
fieldnames.append(slug)
|
|
new_row[slug] = 'not_started'
|
|
rows.append(new_row)
|
|
|
|
with open(tokens_file, 'w', newline='', encoding='utf-8') as f:
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter=';')
|
|
writer.writeheader()
|
|
writer.writerows(rows)
|
|
|
|
print(f" ✚ Injected {len(new_tokens)} new test tokens.")
|
|
tokens_to_return.extend(new_tokens)
|
|
else:
|
|
print(f" ♻ Reusing {num_tokens} existing test tokens.")
|
|
|
|
return tokens_to_return
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _get_teacher_token(tokens_file: str) -> str:
|
|
"""Get the teacher token (first data row)."""
|
|
if not os.path.exists(tokens_file):
|
|
return ""
|
|
with open(tokens_file, 'r', newline='', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f, delimiter=';')
|
|
for row in reader:
|
|
return row.get('token', '')
|
|
return ""
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Parse content/ for Locust test generation')
|
|
parser.add_argument('--content-dir', default='../../content',
|
|
help='Path to content directory (default: ../../content)')
|
|
parser.add_argument('--tokens-file', default='../../tokens_siswa.csv',
|
|
help='Path to tokens CSV (default: ../../tokens_siswa.csv)')
|
|
parser.add_argument('--num-tokens', type=int, default=50,
|
|
help='Number of test tokens to generate (default: 50)')
|
|
parser.add_argument('--output', default='test_data.json',
|
|
help='Output JSON file (default: test_data.json)')
|
|
args = parser.parse_args()
|
|
|
|
content_dir = os.path.abspath(args.content_dir)
|
|
tokens_file = os.path.abspath(args.tokens_file)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f" Elemes Content Parser for Locust E2E Testing")
|
|
print(f"{'='*60}")
|
|
print(f" Content dir : {content_dir}")
|
|
print(f" Tokens file : {tokens_file}")
|
|
print(f" Num tokens : {args.num_tokens}")
|
|
print(f" Output : {args.output}")
|
|
print()
|
|
|
|
# 1. Get ordered lesson slugs
|
|
ordered_slugs = get_ordered_slugs(content_dir)
|
|
if not ordered_slugs:
|
|
# Fallback: scan directory
|
|
ordered_slugs = [
|
|
f.replace('.md', '')
|
|
for f in sorted(os.listdir(content_dir))
|
|
if f.endswith('.md') and f != 'home.md'
|
|
]
|
|
|
|
print(f" 📚 Found {len(ordered_slugs)} lessons:")
|
|
|
|
# 2. Parse each lesson
|
|
lessons = []
|
|
for slug in ordered_slugs:
|
|
filepath = os.path.join(content_dir, f'{slug}.md')
|
|
if not os.path.exists(filepath):
|
|
print(f" ⚠ {slug}.md not found, skipping")
|
|
continue
|
|
|
|
lesson = parse_lesson(filepath)
|
|
lessons.append(lesson)
|
|
|
|
# Summary icon per type
|
|
icons = {
|
|
'c': '🔧', 'python': '🐍', 'hybrid': '🔀',
|
|
'circuit': '⚡', 'arduino': '🤖', 'quiz': '❓'
|
|
}
|
|
icon = icons.get(lesson['type'], '📄')
|
|
compilable = '✓ compile' if lesson.get('compilable') else '✗ compile'
|
|
print(f" {icon} {slug} [{lesson['type']}] {compilable}")
|
|
|
|
# 3. Inject test tokens
|
|
print()
|
|
tokens = ensure_test_tokens(tokens_file, ordered_slugs, args.num_tokens)
|
|
|
|
# 4. Build output
|
|
test_data = {
|
|
'generated_by': 'content_parser.py',
|
|
'tokens': tokens,
|
|
'teacher_token': _get_teacher_token(tokens_file),
|
|
'lessons': lessons,
|
|
'stats': {
|
|
'total': len(lessons),
|
|
'c': sum(1 for l in lessons if l['type'] == 'c'),
|
|
'python': sum(1 for l in lessons if l['type'] == 'python'),
|
|
'hybrid': sum(1 for l in lessons if l['type'] == 'hybrid'),
|
|
'circuit': sum(1 for l in lessons if l['type'] == 'circuit'),
|
|
'arduino': sum(1 for l in lessons if l['type'] == 'arduino'),
|
|
'quiz': sum(1 for l in lessons if l['type'] == 'quiz'),
|
|
'compilable': sum(1 for l in lessons if l.get('compilable')),
|
|
}
|
|
}
|
|
|
|
# 5. Write output
|
|
with open(args.output, 'w', encoding='utf-8') as f:
|
|
json.dump(test_data, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"\n ✅ Wrote {args.output}")
|
|
print(f" {test_data['stats']['total']} lessons "
|
|
f"({test_data['stats']['compilable']} compilable)")
|
|
print(f"\n Next: locust -f locustfile.py")
|
|
print(f"{'='*60}\n")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|