
Building a AI-powered study buddy for certifications
Your AI-powered CLI companion for any certification exam preparation! Ask questions about any topic in your study materials and get intelligent, personalized quizzes to test your knowledge.
main.py
, and then additional ones for the data_store
, quiz_engine
, rag_handler
, config_helper.py
, and requirements.txt
. It uses Amazon Bedrock, with a Knowledge Base, and stored data in Amazon DynamoDB.Sample-Certs-CLI-Quiz-App
with the following folder structure:1
2
3
4
5
6
7
8
9
10
11
.
├── config_helper.py
├── main.py
├── requirements.txt
└── src
├── data_store
│ └── dynamo_client.py
├── quiz_engine
│ └── generator.py
└── rag_handler
└── knowledge_base.py
1
2
3
4
5
6
7
8
9
10
11
# Create the required folders
mkdir -p Sample-CLI-Quiz-App/src/data_store \
Sample-CLI-Quiz-App/src/quiq_engine \
Sample-CLI-Quiz-App/src/rag_handler
# Create empty placeholder files
touch Sample-CLI-Quiz-App/src/main.py \
Sample-CLI-Quiz-App/src/config_helper.py \
Sample-CLI-Quiz-App/src/data_store/dynamo_client.py \
Sample-CLI-Quiz-App/src/quiz_engine/generator.py \
Sample-CLI-Quiz-App/src/rag_handler/knowledge_base.py
main.py
:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
#!/usr/bin/env python3
"""
CLI Study Buddy: An AI-powered quiz application for certification exam preparation.
Uses Bedrock Knowledge Base for intelligent question generation and DynamoDB for progress tracking.
Key Components:
- BedrockKnowledgeBase: Handles interaction with your study materials
- QuizGenerator: Creates personalized quiz questions
- DynamoDBClient: Stores and retrieves your progress
"""
import os
import sys
import argparse
import logging
import uuid
import json
from pathlib import Path
from datetime import datetime
# Add src directory to path for module imports
sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
from rag_handler.knowledge_base import BedrockKnowledgeBase
from quiz_engine.generator import QuizGenerator
from data_store.dynamo_client import DynamoDBClient
from config_helper import get_kb_id
# Set up logging to track application behavior
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
# Default configuration settings
DEFAULT_REGION = "us-east-1"
DEFAULT_MODEL_ID = "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0" # Default model ARN format
DEFAULT_TABLE_NAME = "certification_quiz_results" # DynamoDB table for storing results
DEFAULT_NUM_QUESTIONS = 5
DEFAULT_DIFFICULTY = "medium"
class CertQuizApp:
"""
Main application class that coordinates between Knowledge Base, Quiz Generator, and DynamoDB.
Handles user interaction and manages quiz sessions.
"""
def __init__(self, region=DEFAULT_REGION):
"""
Initialize the application with AWS region and set up components
Args:
region (str): AWS region for all services
"""
self.region = region
self.kb = None # Knowledge Base client
self.quiz_generator = None # Quiz generation engine
self.db_client = None # DynamoDB client
self.user_id = self._get_or_create_user_id() # Unique user identifier
def _get_or_create_user_id(self):
"""
Manage user identification by either retrieving existing ID or creating new one.
Stores user ID in local config file for persistence.
Returns:
str: Unique user identifier
"""
config_dir = os.path.expanduser("~/.cert_quiz")
config_file = os.path.join(config_dir, "config.json")
# Ensure config directory exists
os.makedirs(config_dir, exist_ok=True)
# Load existing user ID if available
if os.path.exists(config_file):
with open(config_file, "r") as f:
config = json.load(f)
return config.get("user_id", str(uuid.uuid4()))
# Create new user ID if none exists
user_id = str(uuid.uuid4())
with open(config_file, "w") as f:
json.dump({"user_id": user_id}, f)
return user_id
def initialize(self, kb_id):
"""
Set up all components needed for the quiz application
Args:
kb_id (str): Identifier for your Knowledge Base
"""
logger.info("Initializing app components")
# Set up connection to your study materials
# Create model ARN with proper region
model_arn = DEFAULT_MODEL_ID.replace("us-east-1", self.region)
self.kb = BedrockKnowledgeBase(kb_id=kb_id, region=self.region, model_id=model_arn)
# Initialize the quiz generation engine
self.quiz_generator = QuizGenerator(
knowledge_base=self.kb
)
# Set up progress tracking
self.db_client = DynamoDBClient(
table_name=DEFAULT_TABLE_NAME,
region=self.region
)
def run_quiz(self, topic, difficulty=DEFAULT_DIFFICULTY, num_questions=DEFAULT_NUM_QUESTIONS):
"""
Execute a complete quiz session including question generation, user interaction,
and result storage
Args:
topic (str): The subject to be tested
difficulty (str): Quiz difficulty level (easy/medium/hard)
num_questions (int): Number of questions to generate
Returns:
float: Quiz score as a percentage
"""
if not self.quiz_generator:
raise ValueError("Quiz generator not initialized")
logger.info(f"Starting quiz on {topic} ({difficulty}, {num_questions} questions)")
# Color coding for better user experience
GREEN = "\033[92m"
RED = "\033[91m"
YELLOW = "\033[93m"
BLUE = "\033[94m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"
END = "\033[0m"
# Get questions from the Knowledge Base
questions = self.quiz_generator.create_quiz(
topic=topic,
difficulty=difficulty,
num_questions=num_questions
)
# Track correct answers for scoring
correct_answers = 0
# Display quiz header
print(f"\n{BOLD}{UNDERLINE}===== QUIZ: {topic.upper()} ====={END}")
print(f"{BLUE}Difficulty: {difficulty.capitalize()}{END}")
print(f"{BLUE}Number of questions: {num_questions}{END}")
print(f"{YELLOW}Answer each question by entering A, B, C, or D.{END}\n")
# Process each question
for i, q in enumerate(questions):
print(f"\n{BOLD}Question {i+1}: {q['question']}{END}")
# Display answer options
for letter, option_text in q['options'].items():
print(f" {letter}. {option_text}")
# Get and validate user input
while True:
user_answer = input(f"\n{YELLOW}Your answer: {END}").strip().upper()
if user_answer in ['A', 'B', 'C', 'D']:
break
print(f"{RED}Invalid input. Please enter A, B, C, or D.{END}")
# Check answer and provide feedback
is_correct = user_answer == q['answer']
if is_correct:
correct_answers += 1
print(f"\n{GREEN}✓ Correct!{END}")
else:
print(f"\n{RED}✗ Incorrect. The correct answer is {q['answer']}.{END}")
# Show explanation for learning
print(f"{BLUE}Explanation: {q['explanation']}{END}")
# Calculate and display final score
score = (correct_answers / len(questions)) * 100
print(f"\n{BOLD}{UNDERLINE}===== QUIZ COMPLETE ====={END}")
if score >= 80:
print(f"{GREEN}{BOLD}Score: {score:.1f}% ({correct_answers}/{len(questions)} correct){END}")
elif score >= 60:
print(f"{YELLOW}{BOLD}Score: {score:.1f}% ({correct_answers}/{len(questions)} correct){END}")
else:
print(f"{RED}{BOLD}Score: {score:.1f}% ({correct_answers}/{len(questions)} correct){END}")
# Store quiz results for progress tracking
if self.db_client:
self.db_client.save_quiz_result(
user_id=self.user_id,
topic=topic,
score=score,
num_questions=num_questions,
difficulty=difficulty
)
return score
def show_history(self, limit=10):
"""
Display previous quiz results
Args:
limit (int): Maximum number of historical results to show
"""
if not self.db_client:
raise ValueError("DynamoDB client not initialized")
# Color coding for visual feedback
GREEN = "\033[92m"
RED = "\033[91m"
YELLOW = "\033[93m"
BLUE = "\033[94m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"
END = "\033[0m"
# Retrieve quiz history from DynamoDB
results = self.db_client.get_user_results(self.user_id, limit=limit)
if not results:
print(f"\n{YELLOW}No quiz history found.{END}")
return
# Display history header
print(f"\n{BOLD}{UNDERLINE}===== QUIZ HISTORY (Last {min(limit, len(results))}) ====={END}")
print(f"{BOLD}{'Date':<20} {'Topic':<30} {'Score':<10} {'Difficulty':<10}{END}")
print(f"{BLUE}{'-' * 70}{END}")
# Display each historical result
for result in results:
# Format the timestamp
timestamp = datetime.fromisoformat(result['timestamp'])
date_str = timestamp.strftime("%Y-%m-%d %H:%M")
# Color-code score based on performance
score = float(result['score'])
if score >= 80:
score_str = f"{GREEN}{score:<10.1f}{END}"
elif score >= 60:
score_str = f"{YELLOW}{score:<10.1f}{END}"
else:
score_str = f"{RED}{score:<10.1f}{END}"
print(f"{date_str:<20} {result['topic']:<30} {score_str} {result['difficulty']:<10}")
def show_topic_stats(self, topic):
"""
Display performance statistics for a specific topic
Args:
topic (str): Topic to analyze
"""
if not self.db_client:
raise ValueError("DynamoDB client not initialized")
# Color coding for visual feedback
GREEN = "\033[92m"
RED = "\033[91m"
YELLOW = "\033[93m"
BLUE = "\033[94m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"
END = "\033[0m"
# Get topic statistics from DynamoDB
stats = self.db_client.get_topic_statistics(self.user_id, topic)
print(f"\n{BOLD}{UNDERLINE}===== TOPIC STATISTICS: {topic.upper()} ====={END}")
if stats['attempts'] == 0:
print(f"{YELLOW}No quiz attempts found for this topic.{END}")
return
# Display comprehensive statistics
print(f"{BLUE}Attempts: {stats['attempts']}{END}")
# Color-code average score based on performance
avg_score = float(stats['average_score'])
if avg_score >= 80:
print(f"{GREEN}Average Score: {avg_score:.1f}%{END}")
elif avg_score >= 60:
print(f"{YELLOW}Average Score: {avg_score:.1f}%{END}")
else:
print(f"{RED}Average Score: {avg_score:.1f}%{END}")
print(f"{GREEN}Highest Score: {float(stats['highest_score']):.1f}%{END}")
print(f"{RED}Lowest Score: {float(stats['lowest_score']):.1f}%{END}")
print(f"{BLUE}Total Questions Answered: {stats['total_questions']}{END}")
def show_kb_info(self):
"""
Display information about the Knowledge Base and its data sources
"""
if not self.kb:
raise ValueError("Knowledge Base not initialized")
# Color coding for visual feedback
GREEN = "\033[92m"
BLUE = "\033[94m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"
END = "\033[0m"
# Get Knowledge Base details
kb_info = self.kb.get_knowledge_base_info()
# Display Knowledge Base information
print(f"\n{BOLD}{UNDERLINE}===== KNOWLEDGE BASE INFORMATION ====={END}")
print(f"{BLUE}ID: {kb_info.get('knowledgeBaseId')}{END}")
print(f"{BLUE}Name: {kb_info.get('name')}{END}")
print(f"{BLUE}Description: {kb_info.get('description')}{END}")
print(f"{GREEN}Status: {kb_info.get('status')}{END}")
print(f"{BLUE}Created: {kb_info.get('createdAt')}{END}")
print(f"{BLUE}Last Modified: {kb_info.get('updatedAt')}{END}")
# List connected data sources
data_sources = self.kb.list_data_sources()
print(f"\n{BOLD}Data Sources ({len(data_sources)}):{END}")
for ds in data_sources:
print(f" {GREEN}- {ds.get('name')}: {ds.get('dataSourceId')} ({ds.get('status')}){END}")
def main():
"""
CLI entry point with argument parsing and command routing
"""
parser = argparse.ArgumentParser(
description="CLI Study Buddy: Your AI-powered certification exam preparation assistant"
)
# Set up command subparsers
subparsers = parser.add_subparsers(dest="command", help="Command to run")
# Knowledge Base info command
info_parser = subparsers.add_parser("info", help="Show Knowledge Base information")
info_parser.add_argument("--kb-id", help="Knowledge Base ID (optional if set in .env)")
# Quiz command
quiz_parser = subparsers.add_parser("quiz", help="Start a quiz session")
quiz_parser.add_argument("--kb-id", help="Knowledge Base ID (optional if set in .env)")
quiz_parser.add_argument("--topic", help="Topic to be tested (optional, will prompt if not provided)")
quiz_parser.add_argument("--general", action="store_true", help="Generate a general quiz across all topics")
quiz_parser.add_argument(
"--difficulty",
choices=["easy", "medium", "hard"],
default=DEFAULT_DIFFICULTY,
help="Quiz difficulty level"
)
quiz_parser.add_argument(
"--questions",
type=int,
default=DEFAULT_NUM_QUESTIONS,
help="Number of questions to generate"
)
quiz_parser.add_argument(
"--model",
help="Bedrock model ID to use (defaults to Claude 3 Sonnet)"
)
# History command
history_parser = subparsers.add_parser("history", help="View your quiz history")
history_parser.add_argument("--kb-id", help="Knowledge Base ID (optional if set in .env)")
history_parser.add_argument(
"--limit",
type=int,
default=10,
help="Maximum number of results to show"
)
# Statistics command
stats_parser = subparsers.add_parser("stats", help="View topic statistics")
stats_parser.add_argument("--kb-id", help="Knowledge Base ID (optional if set in .env)")
stats_parser.add_argument("--topic", required=True, help="Topic to analyze")
# Global options
parser.add_argument("--region", default=DEFAULT_REGION, help="AWS region")
# Parse and process arguments
args = parser.parse_args()
# Get Knowledge Base ID from args or config
kb_id = get_kb_id(args)
if not kb_id and args.command != None:
print("Error: Knowledge Base ID is required. Please provide it with --kb-id or set it in .env file.")
sys.exit(1)
# Initialize application
app = CertQuizApp(region=args.region)
# Route to appropriate command handler
if args.command == "info":
app.initialize(kb_id=kb_id)
app.show_kb_info()
elif args.command == "quiz":
app.initialize(kb_id=kb_id)
# Override default model if specified
if hasattr(args, 'model') and args.model:
app.kb.model_id = args.model
# Handle general quiz option
if hasattr(args, 'general') and args.general:
topic = "General Knowledge"
else:
# If topic is not provided, prompt the user
topic = args.topic
if not topic:
topic = input("Enter a topic for your quiz: ")
app.run_quiz(topic, args.difficulty, args.questions)
elif args.command == "history":
app.initialize(kb_id=kb_id)
app.show_history(args.limit)
elif args.command == "stats":
app.initialize(kb_id=kb_id)
app.show_topic_stats(args.topic)
else:
parser.print_help()
if __name__ == "__main__":
main()
config_helper.py
:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
"""
Configuration Helper Module: Handles loading and managing configuration settings.
Provides functions for retrieving environment variables and command-line arguments.
"""
import os
import logging
from pathlib import Path
from dotenv import load_dotenv
logger = logging.getLogger(__name__)
# Load environment variables from .env file
load_dotenv()
def get_kb_id(args):
"""
Get Knowledge Base ID from command-line arguments or environment variables.
Args:
args: Command-line arguments
Returns:
str: Knowledge Base ID or None if not found
"""
# First check command-line arguments
if hasattr(args, 'kb_id') and args.kb_id:
return args.kb_id
# Then check environment variables
kb_id = os.getenv('KB_ID') or os.getenv('BEDROCK_KB_ID')
if kb_id:
return kb_id
# Not found
return None
scr/data_store/dynamo_client.py
:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
"""
DynamoDB Client Module: Handles storage and retrieval of quiz results.
Provides methods for tracking progress and analyzing performance.
"""
import logging
import boto3
from botocore.exceptions import ClientError
from datetime import datetime
from decimal import Decimal
logger = logging.getLogger(__name__)
class DynamoDBClient:
"""
Handles interaction with DynamoDB for storing and retrieving quiz results.
"""
def __init__(self, table_name, region="us-east-1"):
"""
Initialize the DynamoDB client.
Args:
table_name (str): DynamoDB table name
region (str): AWS region
"""
self.table_name = table_name
self.region = region
self.dynamodb = boto3.resource('dynamodb', region_name=region)
self.table = self.dynamodb.Table(table_name)
self._ensure_table_exists()
def _ensure_table_exists(self):
"""
Ensure the DynamoDB table exists, create it if it doesn't.
"""
try:
self.table.table_status
logger.info(f"DynamoDB table {self.table_name} exists")
except ClientError as e:
if e.response['Error']['Code'] == 'ResourceNotFoundException':
logger.info(f"Creating DynamoDB table {self.table_name}")
self._create_table()
else:
logger.error(f"Error checking DynamoDB table: {e}")
raise
def _create_table(self):
"""
Create the DynamoDB table for storing quiz results.
"""
try:
table = self.dynamodb.create_table(
TableName=self.table_name,
KeySchema=[
{
'AttributeName': 'user_id',
'KeyType': 'HASH' # Partition key
},
{
'AttributeName': 'timestamp',
'KeyType': 'RANGE' # Sort key
}
],
AttributeDefinitions=[
{
'AttributeName': 'user_id',
'AttributeType': 'S'
},
{
'AttributeName': 'timestamp',
'AttributeType': 'S'
},
{
'AttributeName': 'topic',
'AttributeType': 'S'
}
],
GlobalSecondaryIndexes=[
{
'IndexName': 'TopicIndex',
'KeySchema': [
{
'AttributeName': 'user_id',
'KeyType': 'HASH'
},
{
'AttributeName': 'topic',
'KeyType': 'RANGE'
}
],
'Projection': {
'ProjectionType': 'ALL'
},
'ProvisionedThroughput': {
'ReadCapacityUnits': 5,
'WriteCapacityUnits': 5
}
}
],
ProvisionedThroughput={
'ReadCapacityUnits': 5,
'WriteCapacityUnits': 5
}
)
# Wait for table creation
table.meta.client.get_waiter('table_exists').wait(TableName=self.table_name)
logger.info(f"DynamoDB table {self.table_name} created successfully")
except ClientError as e:
logger.error(f"Error creating DynamoDB table: {e}")
raise
def save_quiz_result(self, user_id, topic, score, num_questions, difficulty):
"""
Save a quiz result to DynamoDB.
Args:
user_id (str): Unique user identifier
topic (str): Quiz topic
score (float): Quiz score as percentage
num_questions (int): Number of questions in the quiz
difficulty (str): Quiz difficulty level
"""
logger.info(f"Saving quiz result for user {user_id}, topic {topic}")
timestamp = datetime.utcnow().isoformat()
try:
self.table.put_item(
Item={
'user_id': user_id,
'timestamp': timestamp,
'topic': topic,
'score': Decimal(str(score)), # Convert float to Decimal
'num_questions': num_questions,
'difficulty': difficulty
}
)
logger.info("Quiz result saved successfully")
except ClientError as e:
logger.error(f"Error saving quiz result: {e}")
raise
def get_user_results(self, user_id, limit=10):
"""
Get quiz results for a specific user.
Args:
user_id (str): Unique user identifier
limit (int): Maximum number of results to return
Returns:
list: Quiz results
"""
logger.info(f"Getting quiz results for user {user_id}")
try:
response = self.table.query(
KeyConditionExpression=boto3.dynamodb.conditions.Key('user_id').eq(user_id),
ScanIndexForward=False, # Sort in descending order (newest first)
Limit=limit
)
return response.get('Items', [])
except ClientError as e:
logger.error(f"Error getting user results: {e}")
raise
def get_topic_statistics(self, user_id, topic):
"""
Get statistics for a specific topic.
Args:
user_id (str): Unique user identifier
topic (str): Topic to analyze
Returns:
dict: Topic statistics
"""
logger.info(f"Getting statistics for user {user_id}, topic {topic}")
try:
response = self.table.query(
IndexName='TopicIndex',
KeyConditionExpression=boto3.dynamodb.conditions.Key('user_id').eq(user_id) &
boto3.dynamodb.conditions.Key('topic').eq(topic)
)
results = response.get('Items', [])
if not results:
return {
'attempts': 0,
'average_score': 0,
'highest_score': 0,
'lowest_score': 0,
'total_questions': 0
}
# Calculate statistics
scores = [float(result['score']) for result in results]
total_questions = sum(int(result['num_questions']) for result in results)
stats = {
'attempts': len(results),
'average_score': Decimal(str(sum(scores) / len(scores))),
'highest_score': Decimal(str(max(scores))),
'lowest_score': Decimal(str(min(scores))),
'total_questions': total_questions
}
return stats
except ClientError as e:
logger.error(f"Error getting topic statistics: {e}")
raise
src/quiq_engine/generator.py
:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
"""
Quiz generator module for creating quizzes based on Knowledge Base content
"""
import logging
import json
import re
from botocore.exceptions import ClientError
logger = logging.getLogger(__name__)
class QuizGenerator:
"""
Class for generating quizzes based on Knowledge Base content
"""
def __init__(self, knowledge_base):
"""
Initialize the quiz generator
Args:
knowledge_base (BedrockKnowledgeBase): Knowledge Base handler
"""
self.knowledge_base = knowledge_base
def create_quiz(self, topic, difficulty, num_questions):
"""
Create a quiz on a specific topic
Args:
topic (str): Topic for the quiz
difficulty (str): Difficulty level (easy, medium, hard)
num_questions (int): Number of questions to generate
Returns:
list: List of quiz questions
"""
logger.info(f"Generating {num_questions} {difficulty} questions about {topic}")
if topic.lower() == "general knowledge":
return self._create_general_quiz(difficulty, num_questions)
else:
return self._create_topic_quiz(topic, difficulty, num_questions)
def _create_topic_quiz(self, topic, difficulty, num_questions):
"""
Create a quiz on a specific topic
Args:
topic (str): Topic for the quiz
difficulty (str): Difficulty level (easy, medium, hard)
num_questions (int): Number of questions to generate
Returns:
list: List of quiz questions
"""
# Create prompt template for retrieve and generate
prompt_template = f"""
You are an expert quiz creator for certification exam preparation.
Based on the retrieved information about "{topic}", create {num_questions} multiple-choice questions at {difficulty} difficulty level.
For each question:
1. Make sure it's at {difficulty} difficulty level
2. Include 4 possible answers (A, B, C, D)
3. Mark the correct answer
4. Provide a brief explanation for why the answer is correct
Format each question like this:
Q1: [Question text]
A. [Specific option text for A]
B. [Specific option text for B]
C. [Specific option text for C]
D. [Specific option text for D]
Answer: [Correct letter]
Explanation: [Brief explanation]
Make sure the questions are challenging but fair, and directly related to "{topic}".
"""
try:
# Use retrieve and generate
generated_text = self.knowledge_base.retrieve_and_generate(
query=f"Tell me about {topic} for a certification exam",
prompt_template=prompt_template,
num_results=5
)
# Parse the generated text into structured questions
questions = self._parse_questions(generated_text)
return questions
except ClientError as e:
logger.error(f"Error generating topic quiz: {e}")
raise
def _create_general_quiz(self, difficulty, num_questions):
"""
Create a general quiz across multiple topics
Args:
difficulty (str): Difficulty level (easy, medium, hard)
num_questions (int): Number of questions to generate
Returns:
list: List of quiz questions
"""
logger.info(f"Generating general knowledge quiz with {num_questions} {difficulty} questions")
# Create prompt template for retrieve and generate
prompt_template = f"""
You are an expert quiz creator for certification exam preparation.
Based on the retrieved information from various topics, create {num_questions} multiple-choice questions at {difficulty} difficulty level.
Important: Include questions from different topics and subject areas to create a diverse general knowledge quiz.
For each question:
1. Make sure it's at {difficulty} difficulty level
2. Include 4 possible answers (A, B, C, D)
3. Mark the correct answer
4. Provide a brief explanation for why the answer is correct
Format each question like this:
Q1: [Question text]
A. [Specific option text for A - not just "A"]
B. [Specific option text for B - not just "B"]
C. [Specific option text for C - not just "C"]
D. [Specific option text for D - not just "D"]
Answer: [Correct letter]
Explanation: [Brief explanation]
Make sure the questions are challenging but fair, and cover a variety of topics from the study materials.
"""
try:
# Use retrieve and generate
generated_text = self.knowledge_base.retrieve_and_generate(
query="Give me diverse information from different topics for a general knowledge quiz",
prompt_template=prompt_template,
num_results=5
)
# Parse the generated text into structured questions
questions = self._parse_questions(generated_text)
return questions
except ClientError as e:
logger.error(f"Error generating general quiz: {e}")
raise
def _parse_questions(self, text):
"""
Parse the generated text into structured questions
Args:
text (str): Generated text containing questions
Returns:
list: List of structured questions
"""
# Split the text into individual questions
question_pattern = r'Q\d+:|Question \d+:'
raw_questions = re.split(question_pattern, text)
# Remove any empty strings
raw_questions = [q.strip() for q in raw_questions if q.strip()]
structured_questions = []
for i, raw_q in enumerate(raw_questions):
try:
# Extract the question text, options, answer, and explanation
lines = raw_q.split('\n')
question_text = lines[0].strip()
options = {}
answer = None
explanation = None
correct_index = None
# Process each line to extract options, answer, and explanation
for line in lines[1:]:
line = line.strip()
if not line:
continue
# Extract options (A, B, C, D)
option_match = re.match(r'^([A-D])[.:]?\s+(.+)$', line)
if option_match:
letter, text = option_match.groups()
options[letter] = text
# Extract answer
answer_match = re.match(r'^Answer:?\s+([A-D]).*$', line)
if answer_match:
answer = answer_match.group(1)
# Extract explanation
if line.startswith('Explanation:'):
explanation = line[12:].strip()
elif explanation and not option_match and not answer_match and not line.startswith('Q'):
explanation += ' ' + line
# Convert answer letter to index (A=0, B=1, etc.)
if answer:
correct_index = ord(answer) - 65 # A=0, B=1, C=2, D=3
# Create structured question if we have all required components
if question_text and options and answer and explanation and len(options) == 4:
structured_questions.append({
'question': question_text,
'options': options,
'answer': answer,
'explanation': explanation
})
except Exception as e:
logger.error(f"Error parsing question {i+1}: {e}")
continue
return structured_questions
src/rag_handler/knowledge_base.py
:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""
Knowledge Base handler for interacting with Amazon Bedrock Knowledge Bases
"""
import logging
import json
import boto3
from botocore.exceptions import ClientError
logger = logging.getLogger(__name__)
class BedrockKnowledgeBase:
"""
Class for interacting with Amazon Bedrock Knowledge Bases
"""
def __init__(self, kb_id, region="us-east-1", model_id=None):
"""
Initialize the Knowledge Base handler
Args:
kb_id (str): Knowledge Base ID
region (str): AWS region
model_id (str): Bedrock model ID for generation
"""
self.kb_id = kb_id
self.region = region
self.model_id = model_id
self.bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name=region)
self.bedrock_runtime = boto3.client('bedrock-runtime', region_name=region)
def query_knowledge_base(self, query, num_results=3):
"""
Query the Knowledge Base using the retrieve API
Args:
query (str): Query text
num_results (int): Number of results to return
Returns:
list: List of retrieved passages
"""
logger.info(f"Querying Knowledge Base: {query}")
try:
response = self.bedrock_agent_runtime.retrieve(
knowledgeBaseId=self.kb_id,
retrievalQuery={
'text': query
},
retrievalConfiguration={
'vectorSearchConfiguration': {
'numberOfResults': num_results
}
}
)
# Extract and return the retrieved passages
retrieved_results = []
for result in response.get('retrievalResults', []):
content = result.get('content', {}).get('text', '')
if content:
retrieved_results.append(content)
return retrieved_results
except ClientError as e:
logger.error(f"Error querying Knowledge Base: {e}")
raise
def retrieve_and_generate(self, query, prompt_template, num_results=3):
"""
Query the Knowledge Base and generate content using the RetrieveAndGenerate API
Args:
query (str): Query text
prompt_template (str): Template for the prompt to send to the model
num_results (int): Number of results to return
Returns:
str: Generated content
"""
logger.info(f"Using RetrieveAndGenerate API for query: {query}")
try:
# Create the full prompt with template
full_prompt = f"""
You are an expert quiz creator for certification exam preparation.
Based on the retrieved information, please:
{prompt_template}
"""
# Use the unified RetrieveAndGenerate API
response = self.bedrock_agent_runtime.retrieve_and_generate(
input={
'text': query + "\n\n" + full_prompt
},
retrieveAndGenerateConfiguration={
'type': 'KNOWLEDGE_BASE',
'knowledgeBaseConfiguration': {
'knowledgeBaseId': self.kb_id,
'modelArn': self.model_id
}
}
)
# Parse response
generated_text = response.get('output', {}).get('text', '')
return generated_text
except ClientError as e:
logger.error(f"Error with retrieve and generate: {e}")
raise
def get_knowledge_base_info(self):
"""
Get information about the Knowledge Base
Returns:
dict: Knowledge Base information
"""
try:
bedrock_agent = boto3.client('bedrock-agent', region_name=self.region)
response = bedrock_agent.get_knowledge_base(
knowledgeBaseId=self.kb_id
)
return response
except ClientError as e:
logger.error(f"Error getting Knowledge Base info: {e}")
raise
def list_data_sources(self):
"""
List data sources for the Knowledge Base
Returns:
list: List of data sources
"""
try:
bedrock_agent = boto3.client('bedrock-agent', region_name=self.region)
response = bedrock_agent.list_data_sources(
knowledgeBaseId=self.kb_id
)
return response.get('dataSourceSummaries', [])
except ClientError as e:
logger.error(f"Error listing data sources: {e}")
raise
requirements.txt
:1
2
3
4
boto3>=1.28.0
botocore>=1.31.0
python-dateutil>=2.8.2
python-dotenv>=1.0.0
Any opinions in this post are those of the individual author and may not reflect the opinions of AWS.