Build Your First AI Chatbot with Python — Complete Guide 2026
Reviewed: June 4, 2026
Last updated: May 2026
In this hands-on tutorial, you’ll build a fully functional AI chatbot using Python, OpenAI’s API, and the LangChain framework. By the end, you’ll have a conversational agent with memory, streaming responses, and a clean CLI interface — all in under 100 lines of code.
What We’re Building
A terminal-based chatbot that:
- Maintains conversation memory across turns
- Streams responses token-by-token for a natural feel
- Supports multiple conversation sessions
- Handles errors gracefully with retry logic
Prerequisites
- Python 3.10+
- An OpenAI API key (or Ollama for local models)
- Basic Python knowledge
Step 1: Set Up the Project
mkdir ai-chatbot && cd ai-chatbot
python -m venv .venv
source .venv/bin/activate
pip install langchain-openai langchain-core python-dotenv
Create a .env file:
OPENAI_API_KEY=sk-your-key-here
MODEL=gpt-4o-mini
Step 2: Basic Chatbot with Memory
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.chat_history import InMemoryChatMessageHistory
load_dotenv()
class ChatBot:
def __init__(self, system_prompt="You are a helpful AI assistant."):
self.llm = ChatOpenAI(
model=os.getenv("MODEL", "gpt-4o-mini"),
temperature=0.7,
streaming=True
)
self.history = InMemoryChatMessageHistory()
self.history.add_message(SystemMessage(content=system_prompt))
def chat(self, user_input: str) -> str:
self.history.add_message(HumanMessage(content=user_input))
full_response = ""
print("🤖: ", end="", flush=True)
for chunk in self.llm.stream(self.history.messages):
content = chunk.content or ""
print(content, end="", flush=True)
full_response += content
print() # newline after streaming
self.history.add_message(AIMessage(content=full_response))
return full_response
def reset(self):
system = self.history.messages[0]
self.history = InMemoryChatMessageHistory()
self.history.add_message(system)
if __name__ == "__main__":
bot = ChatBot()
print("AI Chatbot ready! Type 'quit' to exit, 'reset' to clear memory.n")
while True:
user_input = input("You: ").strip()
if user_input.lower() == "quit":
break
if user_input.lower() == "reset":
bot.reset()
print("Memory cleared.n")
continue
if not user_input:
continue
bot.chat(user_input)
print()
Step 3: Add Streaming and Better Error Handling
import time
from langchain_core.outputs import LLMResult
class ResilientChatBot(ChatBot):
def __init__(self, max_retries=3, **kwargs):
super().__init__(**kwargs)
self.max_retries = max_retries
def chat(self, user_input: str) -> str:
self.history.add_message(HumanMessage(content=user_input))
for attempt in range(self.max_retries):
try:
full_response = ""
print("🤖: ", end="", flush=True)
for chunk in self.llm.stream(self.history.messages):
content = chunk.content or ""
print(content, end="", flush=True)
full_response += content
print()
self.history.add_message(AIMessage(content=full_response))
return full_response
except Exception as e:
wait = 2 ** attempt
print(f"n⚠️ Error (attempt {attempt+1}/{self.max_retries}): {e}")
if attempt < self.max_retries - 1:
print(f"Retrying in {wait}s...")
time.sleep(wait)
else:
self.history.messages.pop() # remove the human message
raise
Step 4: Use Local Models with Ollama
Don’t want to use OpenAI? Swap in a local model with Ollama:
from langchain_ollama import ChatOllama
# Replace ChatOpenAI with ChatOllama
self.llm = ChatOllama(
model="llama3.2:latest", # or mistral, gemma2, etc.
temperature=0.7,
base_url="http://localhost:11434"
)
Installation:
pip install langchain-ollama
# Make sure Ollama is running: ollama serve
# Pull a model: ollama pull llama3.2
Step 5: Add Session Persistence
Save conversations to disk so they survive restarts:
import json
from pathlib import Path
class PersistentChatBot(ResilientChatBot):
def __init__(self, session_file="session.json", **kwargs):
super().__init__(**kwargs)
self.session_file = Path(session_file)
self._load_session()
def _load_session(self):
if self.session_file.exists():
data = json.loads(self.session_file.read_text())
self.history = InMemoryChatMessageHistory()
for msg in data.get("messages", []):
self.history.add_message(
{"system": SystemMessage, "human": HumanMessage, "ai": AIMessage}[msg["type"]](content=msg["content"])
)
def _save_session(self):
messages = []
for msg in self.history.messages:
type_name = type(msg).__name__.replace("Message", "").lower()
messages.append({"type": type_name, "content": msg.content})
self.session_file.write_text(json.dumps({"messages": messages}, indent=2))
def chat(self, user_input: str) -> str:
result = super().chat(user_input)
self._save_session()
return result
Complete Code (All-in-One)
#!/usr/bin/env python3
"""
AI Chatbot — Complete implementation with memory, streaming,
retry logic, and session persistence.
"""
import os, json, time
from pathlib import Path
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.chat_history import InMemoryChatMessageHistory
load_dotenv()
class ChatBot:
def __init__(self, system_prompt="You are a helpful AI assistant.",
session_file="chat_session.json"):
self.llm = ChatOpenAI(
model=os.getenv("MODEL", "gpt-4o-mini"),
temperature=0.7, streaming=True
)
self.session_file = Path(session_file)
self.system_prompt = system_prompt
self.history = InMemoryChatMessageHistory()
self.history.add_message(SystemMessage(content=system_prompt))
self._load_session()
def _load_session(self):
if self.session_file.exists():
data = json.loads(self.session_file.read_text())
self.history = InMemoryChatMessageHistory()
for m in data.get("messages", []):
cls = {"system": SystemMessage, "human": HumanMessage, "ai": AIMessage}[m["type"]]
self.history.add_message(cls(content=m["content"]))
def _save_session(self):
msgs = []
for msg in self.history.messages:
t = type(msg).__name__.replace("Message", "").lower()
msgs.append({"type": t, "content": msg.content})
self.session_file.write_text(json.dumps({"messages": msgs}, indent=2))
def chat(self, text: str, retries=3) -> str:
self.history.add_message(HumanMessage(content=text))
for attempt in range(retries):
try:
response = ""
print("🤖: ", end="", flush=True)
for chunk in self.llm.stream(self.history.messages):
c = chunk.content or ""
print(c, end="", flush=True)
response += c
print()
self.history.add_message(AIMessage(content=response))
self._save_session()
return response
except Exception as e:
wait = 2 ** attempt
print(f"n⚠️ Attempt {attempt+1}/{retries} failed: {e}")
if attempt < retries - 1:
time.sleep(wait)
else:
self.history.messages.pop()
raise
if __name__ == "__main__":
bot = ChatBot()
print("🚀 AI Chatbot ready! Commands: quit | reset | historyn")
while True:
inp = input("You: ").strip()
if not inp: continue
if inp == "quit": break
if inp == "reset":
bot = ChatBot(); print("🔄 Reset.n"); continue
if inp == "history":
for m in bot.history.messages:
print(f" [{type(m).__name__[:1]}]: {m.content[:80]}...")
print()
continue
bot.chat(input)
print()
Next Steps
Now that you have a working chatbot, here are ways to extend it:
- Web UI: Add a Gradio or Streamlit interface with
pip install gradio - RAG: Connect a vector database (ChromaDB, Pinecone) for document-aware responses
- Tools: Give the agent the ability to run code, search the web, or call APIs using LangChain’s tool system
- Multi-agent: Use CrewAI or LangGraph to coordinate multiple specialized agents
Key Takeaways
- LangChain’s
InMemoryChatMessageHistorygives you conversation memory for free - Streaming responses via
.stream()dramatically improves perceived latency - Exponential backoff retry logic makes your bot resilient to transient API failures
- Session persistence lets conversations survive restarts with minimal code
- Swapping between OpenAI and local Ollama models requires changing one line
