How to Make our own Jarvis with the help of python

Jarvis is more than just a project for me — it’s an emotion. I started coding because of it. As a huge fan of Iron Man, I always dreamed of having a similar virtual assistant of my own.

So, I began searching on YouTube, and I found several tutorials about creating your own Jarvis using programming. At that time, I thought I just needed an app that could act like Jarvis. But soon, I realized that most of those apps weren’t designed for personal use — they were made for general public demonstrations.

I started following one of those tutorials step by step. But eventually, I messed things up — nothing worked as expected. Then it hit me — the tutorial I was following was actually the last video of a playlist called “Project Video.” That meant I had to go back and learn everything from the beginning if I truly wanted to build my own assistant.

So, I did. And after about one year, I finally made it work. But even after that, I kept improving it — and by 2019, I was still refining my Jarvis with my newly learned skills.

Today, I’m going to show you how you can create your own Jarvis, capable of voice commands, audio and text output, file handling, terminal-based interaction, and intelligent memory management.

Starting From Installing required modules

Run these commands in your terminal (as Administrator on Windows or with Sudo on Mac/Linux):

pip install openai
pip install requests
pip install BeautifulSoup
pip install html
pip install re
pip install edge-tts
pip install asyncio
pip install pygame
pip install speechRecognition

We’ll create a function that generates AI responses. For this, you’ll need an API key — you can use any, but Perplexity is recommended. Check it here: Perplexity Documentation

def askAI(userInput=None, file=None, url=None):
API_KEY = "Your API Key"
API_URL = "https://api.perplexity.ai/chat/completions"
HEADERS = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
MODEL_NAME = "sonar"

# Handle file upload
if file:
ext = os.path.splitext(file)[1].lower()
try:
if ext == ".txt":
with open(file, "r", encoding="utf-8") as f:
userInput = f.read()
elif ext == ".pdf":
from PyPDF2 import PdfReader
reader = PdfReader(file)
userInput = "\n".join([page.extract_text() for page in reader.pages])
elif ext in [".docx", ".doc"]:
import docx
doc = docx.Document(file)
userInput = "\n".join([p.text for p in doc.paragraphs])
else:
raise ValueError("Unsupported file format. Use txt, pdf, or docx.")
except Exception as e:
return [], f"Error reading file: {str(e)}", []

# Handle URL analysis
elif url:
try:
page = requests.get(url, timeout=10)
soup = BeautifulSoup(page.content, "html.parser")
for script in soup(["script", "style"]):
script.extract()
userInput = soup.get_text(separator="\n")
userInput = "\n".join([line.strip() for line in userInput.splitlines() if line.strip()])
except Exception as e:
return [], f"Error fetching URL: {str(e)}", []

if not userInput:
return [], "No input provided.", []

'''Make a chat_history.txt in your directory for saving past
interaction to use it as a context for next question
(it helps in preserving old interactions)'''

with open("chat_history.txt", "r") as f:
context = f.read()

payload = {
"model": MODEL_NAME,
"messages": [
{
"role": "system",
"content": (
"Your name is Jarvis"
"you are designed and made by {Your Name}"
"Brand that made you is {Your Brand Name If have one}"
"You are an super intelligent assistant. "
"Give Answers in one line or maximum 2 lines."
"Keep the answer very precise and filled with useful information without using so much jargons."
"Give response in Jarvis accent but without jargons."
"Give answer in plain text without using markdown syntax bold, italic, underline, bullets, citations and reference markers etc."
f"{context}"
)
},
{
"role": "user",
"content": userInput
}
],
"temperature": 0.7,
"max_tokens": 3000,
"stream": False,
"search_mode": "academic",
}

response = requests.post(API_URL, headers=HEADERS, json=payload)
response.raise_for_status()
data = response.json()

answer = data["choices"][0]["message"]["content"]
content = answer
citations = data.get("citations", [])
search_results = data.get("search_results", [])

return citations, content, search_results

Now let’s try it first to ensure everything is working perfect.

if name == "__main__":
question = input("Enter your question: ")
print(askAI(question)[1])

If it gives an answer, you’re all set!
Save this file as askAI.py — we’ll import it later into our main script.

Create a function to convert AI responses into speech.

# Pygame Initialization
pygame.mixer.init()
async def text_to_speech(text: str, filename: str = "output.mp3"):
try:
filepath = os.path.abspath(filename)

if pygame.mixer.get_init():
pygame.mixer.music.stop()
pygame.mixer.quit()
await asyncio.sleep(0.1)

if os.path.exists(filepath):
for in range(10):
try:
os.remove(filepath)
break
except PermissionError:
await asyncio.sleep(0.1)
else:
print(f"Warning: Could not delete locked file {filepath}")

tts = edgetts.Communicate(text, "en-US-EmmaMultilingualNeural")
await tts.save(filepath)

for in range(10):
try:
with open(filepath, "rb") as f:
f.read(1)
break
except PermissionError:
await asyncio.sleep(0.1)

pygame.mixer.init()
pygame.mixer.music.load(filepath)
pygame.mixer.music.play()

while pygame.mixer.music.getbusy():
await asyncio.sleep(0.1)

pygame.mixer.music.stop()
pygame.mixer.quit()

except Exception as e:
print(f"Error during TTS: {e}")

Now Write a voice recognition function that will take input from microphone to make it more portable and easier to use.

r = sr.Recognizer()
def takeCommand():
with sr.Microphone() as source:
print("Listening...")
r.pause_threshold = 1
audio = r.listen(source)

try:
print("Recognizing...")
query = r.recognize_google(audio, language='en-in'
print(f"User said: {query}\n")

except Exception as e:
print("Say that again please...")
return "None"
return query

Now Combine all these steps and this will be your final looking files.

main.py

import edge_tts
import asyncio
import time
import os
from askAI import askAI
os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "1"
import pygame
import playsound
import speech_recognition as sr
import asyncio
import threading

# Pygame Initialization
pygame.mixer.init()
async def text_to_speech(text: str, filename: str = "output.mp3"):
try:
filepath = os.path.abspath(filename)

if pygame.mixer.get_init():
pygame.mixer.music.stop()
pygame.mixer.quit()
await asyncio.sleep(0.1)

if os.path.exists(filepath):
for in range(10):
try:
os.remove(filepath)
break
except PermissionError:
await asyncio.sleep(0.1)
else:
print(f"Warning: Could not delete locked file {filepath}")

tts = edgetts.Communicate(text, "en-US-EmmaMultilingualNeural")
await tts.save(filepath)

for in range(10):
try:
with open(filepath, "rb") as f:
f.read(1)
break
except PermissionError:
await asyncio.sleep(0.1)

pygame.mixer.init()
pygame.mixer.music.load(filepath)
pygame.mixer.music.play()

while pygame.mixer.music.getbusy():
await asyncio.sleep(0.1)

pygame.mixer.music.stop()
pygame.mixer.quit()

except Exception as e:
print(f"Error during TTS: {e}")

r = sr.Recognizer()
def takeCommand():
with sr.Microphone() as source:
print("Listening...")
r.pause_threshold = 1
audio = r.listen(source)

try:
print("Recognizing...")
query = r.recognize_google(audio, language='en-in'
print(f"User said: {query}\n")

except Exception as e:
print("Say that again please...")
return "None"
return query

if name == "__main__":
while True:
'''uncomment this just below line if you want text input'''
# question = input("\n--> ")
'''comment this line if you want microphone input'''
text = takeCommand()
answer = askAI(text)[1]
asyncio.run(text_to_speech(answer))
stop = takeCommand()
if stop.lower() == "stop":
print("Exiting...")
break

askAI.py

from openai import OpenAI
import requests

import requests
import os
from bs4 import BeautifulSoup
import html
import re

def askAI(userInput=None, file=None, url=None):
API_KEY = "Your API Key"
API_URL = "https://api.perplexity.ai/chat/completions"
HEADERS = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
MODEL_NAME = "sonar"

# Handle file upload
if file:
ext = os.path.splitext(file)[1].lower()
try:
if ext == ".txt":
with open(file, "r", encoding="utf-8") as f:
userInput = f.read()
elif ext == ".pdf":
from PyPDF2 import PdfReader
reader = PdfReader(file)
userInput = "\n".join([page.extract_text() for page in reader.pages])
elif ext in [".docx", ".doc"]:
import docx
doc = docx.Document(file)
userInput = "\n".join([p.text for p in doc.paragraphs])
else:
raise ValueError("Unsupported file format. Use txt, pdf, or docx.")
except Exception as e:
return [], f"Error reading file: {str(e)}", []

# Handle URL analysis
elif url:
try:
page = requests.get(url, timeout=10)
soup = BeautifulSoup(page.content, "html.parser")
for script in soup(["script", "style"]):
script.extract()
userInput = soup.get_text(separator="\n")
userInput = "\n".join([line.strip() for line in userInput.splitlines() if line.strip()])
except Exception as e:
return [], f"Error fetching URL: {str(e)}", []

if not userInput:
return [], "No input provided.", []

'''Make a chat_history.txt in your directory for saving past
interaction to use it as a context for next question
(it helps in preserving old interactions)'''

with open("chat_history.txt", "r") as f:
context = f.read()

payload = {
"model": MODEL_NAME,
"messages": [
{
"role": "system",
"content": (
"Your name is Jarvis"
"you are designed and made by {Your Name}"
"Brand that made you is {Your Brand Name If have one}"
"You are an super intelligent assistant. "
"Give Answers in one line or maximum 2 lines."
"Keep the answer very precise and filled with useful information without using so much jargons."
"Give response in Jarvis accent but without jargons."
"Give answer in plain text without using markdown syntax bold, italic, underline, bullets, citations and reference markers etc."
f"{context}"
)
},
{
"role": "user",
"content": userInput
}
],
"temperature": 0.7,
"max_tokens": 3000,
"stream": False,
"search_mode": "academic",
}

response = requests.post(API_URL, headers=HEADERS, json=payload)
response.raise_for_status()
data = response.json()

answer = data["choices"][0]["message"]["content"]
content = answer
citations = data.get("citations", [])
search_results = data.get("search_results", [])

return citations, content, search_results

Once everything is ready, simply run main.py, and your personal Jarvis assistant will come to life!
Make sure your askAI.py file is in the same directory.

Thank you for reading this far.
If you found this useful, follow me and subscribe for more in-depth projects like this.

How to Make our own Jarvis with the help of python

Starting From Installing required modules

Dexteritycoder