{"name":"Vedant Hirekar","role":"AI & Software Engineer","location":"United States","email":"hirekarvedant@gmail.com","links":{"github":"https://github.com/vedanthirekar","linkedin":"https://www.linkedin.com/in/vedanthirekar","resume":"https://drive.google.com/file/d/1a2mlnk0AoPdrX5QFqKxZ4FUxSjizVMPq/view?usp=sharing","site":"https://vedanthirekar.com"},"experience":[{"slug":"project-990","company":"Project 990","role":"AI Engineer","start":"2026-01","end":null,"summary":"Designing and productionizing a multi-stage LLM pipeline on HPC that generates mission statements for 175,000+ nonprofits from IRS Form 990-EZ data, powering downstream analytics and grantor–grantee matching.","tagline":"LLM pipelines on HPC, at 175k+ scale","highlights":["Designed and deployed a 3-stage LLM pipeline on HPC using Mistral-7B for generation and Gemma-7B for evaluation across 175,000+ nonprofits.","Built a fallback architecture integrating organization descriptions and grant-purpose data, raising mission-statement coverage from 50% to 93.5% across 163,700+ organizations.","Productionized the pipeline with Slurm job scheduling, fault-tolerant checkpointing, and a config/model-agnostic design for end-to-end automated execution."],"stack":["LLMs","HPC","Slurm","Mistral-7B","Python"]},{"slug":"math-tutor","company":"Indiana University","role":"Mathematics Tutor","start":"2026-01","end":"2026-05","summary":"Tutored undergraduate students in mathematics - breaking dense concepts into simple, workable steps, one student at a time.","tagline":"Teaching math, one concept at a time","highlights":["Tutored undergraduates across core mathematics courses, adapting explanations to each student's way of thinking."],"stack":["Calculus","Statistics","Problem Solving"]},{"slug":"ai-cybersecurity-research","company":"Indiana University","role":"Research Assistant - AI in Cybersecurity","start":"2025-06","end":"2025-12","summary":"Researched multi-agent reinforcement learning for autonomous cyber defense in the CAGE Challenge 4 environment - training and evaluating agents that detect and respond to threats.","tagline":"Multi-agent RL for cyber defense","highlights":["Applied, trained, and evaluated RL algorithms from Stable-Baselines3 and RLlib in a multi-agent reinforcement learning (MARL) environment, CAGE Challenge 4.","Designed reward-shaping strategies with additional reward signals to improve agent behavior, stability, and threat-detection performance in simulated adversarial scenarios."],"stack":["Reinforcement Learning","MARL","Stable-Baselines3","RLlib","PyTorch"]},{"slug":"methix","company":"Methix","role":"AI Developer Intern","start":"2025-06","end":"2025-12","summary":"Built an artist-management agentic AI system - a personal manager for music artists with retrieval, scheduling, and outreach capabilities on Azure.","tagline":"Agentic AI on Azure OpenAI","highlights":["Developed an agentic AI system using Azure OpenAI and LangChain with tool-use capabilities: RAG across 100+ documents, scheduling, and outreach.","Engineered the data retrieval layer for an AI search feature, writing SQL template queries to fetch and rank 200+ artist profiles against real-time user queries.","Optimized AI pipelines and deployments in Azure AI Foundry by analyzing usage and storage tiers, reducing production cost by 20% while maintaining reliability."],"stack":["Azure OpenAI","LangChain","Agentic AI","RAG","SQL"]},{"slug":"parallel-wireless","company":"Parallel Wireless","role":"Software Engineering Intern","start":"2024-02","end":"2024-07","summary":"Debugged and resolved production defects in React and TypeScript applications, and automated GUI validation with Jest - improving stability, coverage, and frontend performance.","tagline":"React/TypeScript debugging & Jest automation","highlights":["Debugged and resolved production software defects in React and TypeScript applications, improving reliability and user experience in an Agile environment.","Developed and maintained Jest unit-testing suites, improving application stability, test coverage, and frontend performance by 30%."],"stack":["React","TypeScript","Jest","Agile"]},{"slug":"krios","company":"Krios Info Solutions","role":"Data Science Intern","start":"2023-02","end":"2023-05","summary":"Built a retail demand-forecasting system over 5+ years of daily sales data for 300+ SKUs, with Power BI dashboards that turned forecasts into stakeholder decisions.","tagline":"Sales forecasting & Power BI dashboards","highlights":["Cleaned and transformed 5+ years of daily sales data for 300+ SKUs using Python and Pandas into a model-ready dataset for demand forecasting.","Improved forecasting accuracy by 10% through ensemble architectures and hyperparameter tuning via cross-validation and grid search.","Designed and maintained 5+ interactive Power BI dashboards visualizing sales trends, seasonal patterns, and forecast accuracy for stakeholders."],"stack":["Python","Pandas","Power BI","Time Series"]}],"education":[{"institution":"Indiana University, Bloomington","degree":"M.S. in Data Science","start":"2024","end":"2026","gpa":"3.9 / 4.0","coursework":["Data Structures & Algorithms","Applied Machine Learning","Usable AI","Computer Vision","Information Visualization","Big Data Management","Database Technologies","Computer Networks","Statistics"],"recordUrl":"https://drive.google.com/file/d/1AWM9YvbbD8M5D7mOGXu64U-fMvym-ilk/view?usp=sharing"},{"institution":"Savitribai Phule Pune University","degree":"B.E. in AI and Data Science","start":"2020","end":"2024","gpa":"3.6 / 4.0","coursework":["Software Engineering","Data Structures & Algorithms","Machine Learning & Deep Learning","Big Data Analytics","Database Management Systems"],"recordUrl":"https://drive.google.com/file/d/1IWX800R2BTD_0p6wlrdE-5ujkn2BjH8W/view?usp=sharing"}],"projects":[{"slug":"ncaa-analytics-1","name":"NCAA Analytics Challenge","oneLiner":"Won the NCAA Final Four Analytics Challenge - Predicting seeds for 2026 NCAA March Madness.","description":"Predicted NCAA Tournament seedings for over 360 college basketball teams using five seasons of historical data. The real challenge was reverse-engineering how the selection committee weighs NET rankings, quadrant records, and conference strength. We built a seven-model gradient-boosting ensemble over 104 engineered features that reached 78% accuracy, cutting prediction error by 43% versus the baseline. I then used Tableau dashboards to turn the findings into a clear narrative for NCAA stakeholders.","stack":["Machine Learning","Gradient Boosting","Feature Engineering","Tableau"],"links":{"linkedIn":"https://www.linkedin.com/posts/vedanthirekar_ncaafinalfour-analytics-challenge-activity-7101870919055691776-0g7A?utm_source=share&utm_medium=member_desktop","github":"https://github.com/vedanthirekar/NCAA-Final-Four-Analytics-Challenge"},"tradeoffs":["Minimizing RMSE on seeds sounds clean on paper, but committee logic is inconsistent - most of the work was iterative error analysis to find where the model was systematically wrong, then encoding those patterns as features.","The Tableau narrative ended up mattering as much as the model when presenting to judges - a lesson in how far accuracy alone gets you."],"featured":true},{"slug":"yanck","name":"Yanck","oneLiner":"No-code RAG chatbot platform for teams with zero ML engineers.","description":"Created a platform that lets non-technical users create and deploy AI assistants on their own data through a guided workflow: upload documents, generate embeddings, and serve responses through Google Gemini. The hardest part was making integration painless for whoever handled it on the client's side, so we shipped three deployment paths: an embeddable JS widget, an iframe, and a REST API with key-based auth.","stack":["LangChain","Google Gemini","RAG","Flask"],"links":{"github":"https://github.com/vedanthirekar/Yanck"},"tradeoffs":["Chose a guided linear workflow over a flexible node editor - less powerful, but non-technical users finished setup instead of abandoning it.","Retrieval quality depends heavily on how users chunk their uploads; automatic chunking heuristics are good enough, not great."],"featured":true},{"slug":"sofi-2035","name":"SOFI 2035","oneLiner":"Interactive global-futures dashboard built for The Millennium Project.","description":"Built an interactive dashboard that lets users explore global development trends across economic, social, environmental, governance, and technology indicators through dynamic visualizations and scenario analysis. We built it for a real client, The Millennium Project.","stack":["Data Visualization","Plotly","Python","Data Pre-processing"],"links":{"github":"https://github.com/vedanthirekar/SOFI-2035-Info-Viz-Project","live":"https://sofi2035.pythonanywhere.com/"},"tradeoffs":["Server-rendered Plotly charts kept development fast but make first load heavier than a hand-rolled D3 build would be.","Scenario analysis is precomputed rather than live - simpler and more reliable on free-tier hosting, at the cost of interactivity."],"featured":true},{"slug":"healthcare-data-pipeline","name":"Healthcare Data Pipeline","oneLiner":"ETL pipeline standardizing patient and device data, with live monitoring dashboards.","description":"I engineeredan end-to-end pipeline using Spark and Airflow to standardize FHIR patient and device data landing in AWS S3. I connected Tableau dashboards to live MySQL pipelines so the team could monitor throughput and utilization metrics in real time.","stack":["Airflow","PySpark","AWS S3","Tableau","FHIR"],"links":{"github":"https://github.com/vedanthirekar/healthcare-data-pipeline"},"tradeoffs":["Spark is overkill for the demo data volume; it was used deliberately to exercise the same tooling that production-scale volumes need.","Live MySQL-backed dashboards demo well but need connection pooling and caching before they'd survive real concurrent load."],"featured":false},{"slug":"expense-tracker","name":"AI Expense Tracker","oneLiner":"Voice-driven expense tracking with semantic categorization at 92% accuracy.","description":"I built a FastAPI backend for an AI-powered expense tracker that supports voice-based transaction processing and real-time categorization. Categorized expenses semantically using BERT embeddings with Qdrant vector search, reaching 92% classification accuracy, and added Power BI dashboards for time-series spending forecasts. The project evolved from Yafa, my GeeksforGeeks Hackathon winner.","stack":["FastAPI","BERT","Qdrant","Time Series","Power BI"],"links":{"github":"https://github.com/vedanthirekar/Yafa-Personal-Finance-Manager"},"tradeoffs":["Voice-to-text accuracy drops with background noise and accents; a confirmation step was added rather than chasing model accuracy.","Embedding-based categorization beats rules at 92%, but the last 8% is ambiguous even to humans - a category-correction flow mattered more than model tuning."],"featured":false},{"slug":"optical-music-recognition","name":"Optical Music Recognition","oneLiner":"Computer vision system that digitizes sheet music from images.","description":"Implemented a computer vision system that detects and digitizes music notes from images of sheet music. It uses image processing and pattern recognition to convert physical sheets into editable digital formats.","stack":["Computer Vision","OpenCV","Python","Image Processing"],"links":{},"tradeoffs":["Classical template matching over a learned model - explainable and training-data-free, but it degrades on handwritten or low-quality scans."],"featured":false}],"achievements":[{"slug":"ncaa-analytics-challenge","title":"NCAA Final Four Analytics Challenge - Winner","detail":"Won by predicting tournament seedings for 360+ teams at 78% accuracy with a 7-model gradient-boosting ensemble over 104 engineered features, presented to NCAA stakeholders through Tableau dashboards.","year":2026},{"slug":"innoquest-2025","title":"InnoQuest 2025 - Honorable Mention","detail":"Recognized by the Shoemaker Innovation Center at Indiana University for building an AI assistant builder that lets SMBs put AI on their own data in 3–4 simple steps.","year":2025},{"slug":"geeksforgeeks-hackathon","title":"GeeksforGeeks Hackathon - Winner","detail":"Won with a personal finance app featuring AI-based voice-to-text expense logging and expense forecasting.","year":2023},{"slug":"tedx-curator","title":"TEDx Curator","detail":"Organized a TEDx event - coordinating speakers and logistics, delivering talks to an audience of 500+.","year":2023}]}