Stack: Oracle Cloud (OCI) + Oracle AI Autonomous Database + APEX + Oracle ML + Select AI (GenAI) Budget: $300 USD Oracle Academy credits (~$15 actual usage) Data: 48,944 matches | 44,569 goals | 666 shootouts | Historical 1872-2024
Deploy Oracle AI Autonomous Database on OCI
Navigation: Oracle AI Database โ Autonomous AI Database โ Create Autonomous AI Database
Configuration:
- Display Name: WorldCupDB
- Database Name: WorldCupDB
- Compartment: Select any department or root
- Workload type: Lakehouse
- Choose database version: 26ai
- OCPU: 1 (Always Free) - Be sure you have 26ai, otherwise, don't select Always Free
- Storage: 20GB
- Admin Password: [Strong password - save this!] WorldCupDB1234
- Network: Secure access from everywhere
SELECT SYSDATE FROM DUAL;CREATE USER worldcup IDENTIFIED BY "YourPassword123#";
GRANT CONNECT, RESOURCE, UNLIMITED TABLESPACE TO worldcup;
GRANT CREATE VIEW, CREATE SESSION TO worldcup;
GRANT DWROLE TO worldcup;
Load football data and create analytical views
Method: Database Actions Data Load Tool
CREATE INDEX idx_results_date ON match_results(date_rw);
CREATE INDEX idx_results_teams ON match_results(home_team, away_team);
CREATE INDEX idx_goalscorers_scorer ON goalscorers(scorer);
-- View 1: Competitive matches only
CREATE OR REPLACE FORCE EDITIONABLE VIEW "VW_COMPETITIVE_MATCHES" ("DATE_RW", "HOME_TEAM", "AWAY_TEAM", "HOME_SCORE", "AWAY_SCORE", "TOURNAMENT", "CITY", "COUNTRY", "NEUTRAL", "WINNER") AS
SELECT
m."DATE_RW",m."HOME_TEAM",m."AWAY_TEAM",m."HOME_SCORE",m."AWAY_SCORE",m."TOURNAMENT",m."CITY",m."COUNTRY",m."NEUTRAL",
CASE
WHEN home_score > away_score THEN home_team
WHEN away_score > home_score THEN away_team
ELSE 'Draw'
END AS winner
FROM match_results m
WHERE tournament IN (
'FIFA World Cup')
AND date_rw >= DATE '1950-01-01';
-- View 2: Team statistics
CREATE OR REPLACE VIEW vw_team_statistics AS
WITH team_matches AS (
SELECT home_team AS team,
CASE WHEN home_score > away_score THEN 1 ELSE 0 END AS wins,
home_score AS goals_for, away_score AS goals_against
FROM vw_competitive_matches
UNION ALL
SELECT away_team AS team,
CASE WHEN away_score > home_score THEN 1 ELSE 0 END AS wins,
away_score AS goals_for, home_score AS goals_against
FROM vw_competitive_matches
)
SELECT
team,
COUNT(*) AS total_matches,
SUM(wins) AS total_wins,
ROUND(SUM(wins) * 100.0 / COUNT(*), 2) AS win_percentage,
SUM(goals_for) AS total_goals_scored,
SUM(goals_for) - SUM(goals_against) AS goal_difference
FROM team_matches
GROUP BY team;
-- View 3: 2026 Venues
CREATE TABLE wc2026_venues (
venue_id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
venue_name VARCHAR2(200),
city VARCHAR2(100),
country VARCHAR2(50),
latitude NUMBER(10,6),
longitude NUMBER(10,6),
altitude_meters NUMBER
);
INSERT INTO wc2026_venues
(venue_name, city, country, latitude, longitude, altitude_meters)
VALUES
('Estadio Azteca', 'Mexico City', 'Mexico', 19.302969, -99.150635, 2240),
('SoFi Stadium', 'Los Angeles', 'USA', 33.953467, -118.339038, 30),
('MetLife Stadium', 'New York', 'USA', 40.813611, -74.074444, 3);
Query 1: Spainโs High-Temperature Performance
WITH spain_matches AS (
SELECT
CASE
WHEN (home_team = 'Spain' AND home_score > away_score) OR
(away_team = 'Spain' AND away_score > home_score) THEN 1
ELSE 0
END AS is_win
FROM match_results
WHERE (home_team = 'Spain' OR away_team = 'Spain')
)
SELECT
COUNT(*) AS total_matches,
SUM(is_win) AS wins,
ROUND(SUM(is_win) * 100.0 / COUNT(*), 2) AS win_percentage
FROM spain_matches;
Query 2: Top World Cup Scorers
SELECT
g.scorer,
COUNT(*) AS total_goals,
COUNT(DISTINCT g.DATE_RW) AS matches_played,
ROUND(COUNT(*) * 1.0 / COUNT(DISTINCT g.date_rw), 2) AS goals_per_match
FROM goalscorers g
JOIN match_results m ON g.DATE_RW = m.DATE_RW
WHERE m.tournament = 'FIFA World Cup'
AND g.own_goal = 'FALSE'
GROUP BY g.SCORER
ORDER BY total_goals DESC
FETCH FIRST 10 ROWS ONLY;
Query 3: Home Advantage Analysis
SELECT
CASE WHEN neutral = 'TRUE' THEN 'Neutral' ELSE 'Home' END AS venue_type,
COUNT(*) AS matches,
SUM(CASE WHEN home_score > away_score THEN 1 ELSE 0 END) AS home_wins,
ROUND(SUM(CASE WHEN home_score > away_score THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS home_win_pct
FROM vw_competitive_matches
GROUP BY CASE WHEN neutral = 'TRUE' THEN 'Neutral' ELSE 'Home' END;
Query 4: Penalty Shootout Masters
SELECT
winner AS team,
COUNT(*) AS shootout_wins,
ROUND(COUNT(*) * 100.0 / (
SELECT COUNT(*) FROM shootouts
WHERE home_team = winner OR away_team = winner
), 2) AS win_rate
FROM shootouts
GROUP BY winner
ORDER BY shootout_wins DESC
FETCH FIRST 10 ROWS ONLY;
Query 5: Tournament Evolution
SELECT
TRUNC(EXTRACT(YEAR FROM date_rw) / 10) * 10 AS decade,
COUNT(*) AS matches,
ROUND(AVG(home_score + away_score), 2) AS avg_goals_per_match
FROM match_results
WHERE tournament = 'FIFA World Cup'
GROUP BY TRUNC(EXTRACT(YEAR FROM date_rw) / 10) * 10
ORDER BY decade;
SELECT
venue_name,
city || ', ' || country AS location,
latitude,
longitude
FROM wc2026_venues;
โ Next โ Points โ Geomery Column:LOCATION(Varchar2) โ Create Page
Region 1: Bar Chart - Matches Over Time
SELECT EXTRACT(YEAR FROM date_rw) AS year, COUNT(*) AS matches
FROM match_results
WHERE EXTRACT(YEAR FROM date_rw) >= 1950
AND TOURNAMENT = 'FIFA World Cup'
GROUP BY EXTRACT(YEAR FROM date_rw)
ORDER BY year;
Region 2: Pie Chart - Top Teams
SELECT team, total_wins
FROM vw_team_statistics
ORDER BY total_wins DESC
FETCH FIRST 10 ROWS ONLY;CREATE OR REPLACE FORCE EDITIONABLE VIEW "VW_COMPETITIVE_MATCHES" ("DATE_RW", "HOME_TEAM", "AWAY_TEAM", "HOME_SCORE", "AWAY_SCORE", "TOURNAMENT", "CITY", "COUNTRY", "NEUTRAL", "WINNER") AS
SELECT m."DATE_RW",m."HOME_TEAM",m."AWAY_TEAM",m."HOME_SCORE",m."AWAY_SCORE",m."TOURNAMENT",m."CITY",m."COUNTRY",m."NEUTRAL",
CASE
WHEN home_score > away_score THEN home_team
WHEN away_score > home_score THEN away_team
ELSE 'Draw'
END AS winner
FROM match_results m
WHERE tournament IN ('FIFA World Cup')
AND date_rw >= DATE '1950-01-01';
Region 3: Bar Chart - Goalers
SELECT SCORER, COUNT(1) GOAL
FROM match_results mr,
GOALSCORERS g
where tournament = 'FIFA World Cup'
AND MR.DATE_RW = G.DATE_RW
AND MR.HOME_TEAM = G.HOME_TEAM
AND OWN_GOAL ='FALSE'
GROUP BY SCORER
ORDER BY GOAL DESC;
Train a machine learning model using Python (scikit-learn) to predict 2026 World Cup outcomes.
OCI - Analytics & AI - Data Science Create Project Name: worldcup_ds Create notebook session Name: worldcup_nb Create Open Python 3 (ipykernel) Create Notebook
Install Required Libraries:
Python 3
# Install packages
!pip install pandas numpy scikit-learn oracledb matplotlib seaborn joblib
Test Oracle Connection:
import oracledb
# Configure connection
username = "worldcup"
password = "YourPassword123#"
dsn = "your_connection_string" # From tnsnames.ora
username = "worldcup"
password = "YourPassword123#"
dsn = "worldcupdb_high" # From tnsnames.ora
connection = oracledb.connect(
user=username,
password=password,
dsn=dsn,
config_dir="./wallet",
wallet_location="./wallet",
wallet_password='WorldCupDB1234'
)
print("โ
Connected to Oracle!")
connection.close()
Execute notebooks/world_cup_ml_tutorial.ipynb to train the model and get the results.
extract_data.py - Oracle extractionfeature_engineering.py - Feature creationtrain_model.py - Model trainingpredict_2026.py - Generate predictionsupload_to_oracle.py - Database uploadmodels/best_model.pkl - Trained modeloutput/confusion_matrix.png - Evaluation chartoutput/feature_importance.png - Feature analysisPREDICCIONES_FINAL table (frozen, read-only)Estimated Time: 6-7 hours Cost: ~$0.50 (database operations only)
โ Level 5 Complete with Python!
The live workshop now uses Grok 4 through OCI Generative AI Inference plus Oracle AI Database as the memory and retrieval layer. After Level 5 creates PREDICCIONES_FINAL, run scripts/load_langchain_vectors.py --reset to build the langchain-oracledb OracleVS table SOCCER_LANGCHAIN_DOCS from prediction documents and football facts. The final chat should use hybrid_retrieve / startup hybrid grounding first, then use vector_search only as the semantic-only baseline or fallback.
PREDICCIONES_FINAL are converted into LangChain Document rows.Legacy Select AI / APEX instructions below are retained for the original hackathon guide, but the agent workshop path is Grok 4 + LangChain OracleVS hybrid retrieval.
oci_api_key.pem)Do not commit the generated API key, fingerprint, OCIDs, or private key. Store them in your local OCI config or .env only.
[DEFAULT]
user=ocid1.user.oc1..aaaaaa... [YOUR USER OCID]
fingerprint=xx:xx:xx:xx:xx... [YOUR FINGERPRINT]
tenancy=ocid1.tenancy.oc1..aaa... [YOUR TENANCY OCID]
region=us-ashburn-1 [YOUR REGION]
key_file=~/oci_api_key.pem [PATH TO PRIVATE KEY]
Connect as ADMIN user, then run:
-- Grant necessary privileges to worldcup user
GRANT EXECUTE ON DBMS_CLOUD TO worldcup;
GRANT EXECUTE ON DBMS_CLOUD_AI TO worldcup;
GRANT CREATE MINING MODEL TO worldcup;
Switch to WORLDCUP user, then create credential:
-- Method 1: Using API Key Authentication (Recommended)
BEGIN
DBMS_CLOUD.CREATE_CREDENTIAL(
credential_name => 'OCI_GENAI_CRED',
user_ocid => '<YOUR_USER_OCID>',
tenancy_ocid => '<YOUR_TENANCY_OCID>',
private_key => '<PASTE YOUR ENTIRE PRIVATE KEY HERE>',
fingerprint => 'aa:bb:cc:dd:ee:ff:00:11:22:33:44:55:66:77:88:99' -- Your fingerprint
);
END;
/
-- Verify credential was created
SELECT credential_name, username
FROM user_credentials
WHERE credential_name = 'OCI_GENAI_CRED';
Connect as WORLDCUP user, then run:
-- Create AI profile with Meta Llama
BEGIN
-- Drop existing profile if exists
BEGIN
DBMS_CLOUD_AI.DROP_PROFILE(
profile_name => 'WORLDCUP_AI_PROFILE'
);
EXCEPTION
WHEN OTHERS THEN NULL;
END;
-- Create new profile with OCI GenAI and Llama
DBMS_CLOUD_AI.CREATE_PROFILE(
profile_name => 'WORLDCUP_AI_PROFILE',
attributes => JSON_OBJECT(
'provider' VALUE 'oci',
'credential_name' VALUE 'OCI_GENAI_CRED',
'model' VALUE 'meta.llama-3.3-70b-instruct', -- Most powerful Llama model
'oci_apiformat' VALUE 'GENERIC',
'object_list' VALUE JSON_ARRAY(
JSON_OBJECT('owner' VALUE 'WORLDCUP', 'name' VALUE 'MATCH_RESULTS'),
JSON_OBJECT('owner' VALUE 'WORLDCUP', 'name' VALUE 'GOALSCORERS'),
JSON_OBJECT('owner' VALUE 'WORLDCUP', 'name' VALUE 'VW_TEAM_STATISTICS'),
JSON_OBJECT('owner' VALUE 'WORLDCUP', 'name' VALUE 'SHOOTOUTS')
),
'temperature' VALUE 0.1, -- Lower = more deterministic SQL
'max_tokens' VALUE 500
)
);
DBMS_OUTPUT.PUT_LINE('โ
AI Profile created with Meta Llama 3.3 70B');
END;
/
-- List all AI profiles
SELECT profile_name, status
FROM user_cloud_ai_profiles;
-- Set active AI profile
EXEC DBMS_CLOUD_AI.SET_PROFILE('WORLDCUP_AI_PROFILE');
Test 1: Simple Count Query
SELECT AI "How many matches has Spain played?";
-- Expected: AI translates to SQL like:
-- SELECT COUNT(*) FROM match_results
-- WHERE home_team = 'Spain' OR away_team = 'Spain'
Test 2: Statistical Query
EXEC DBMS_CLOUD_AI.SET_PROFILE('WORLDCUP_AI_PROFILE');
SELECT AI "What is Spain's win percentage in competitive matches?";
-- Expected: Uses vw_team_statistics view
-- SELECT win_percentage FROM vw_team_statistics WHERE team = 'Spain'
Test 3: Top Scorers
EXEC DBMS_CLOUD_AI.SET_PROFILE('WORLDCUP_AI_PROFILE');
SELECT AI "Who are the top 5 goal scorers in World Cup history?";
-- Expected: Joins goalscorers with match_results, filters by tournament
Test 4: Complex Aggregation
EXEC DBMS_CLOUD_AI.SET_PROFILE('WORLDCUP_AI_PROFILE');
SELECT AI "Compare Spain's and Germany's performance in the last 20 years";
-- Expected: Multi-table query with date filtering and comparison
Test 5: Head-to-Head
EXEC DBMS_CLOUD_AI.SET_PROFILE('WORLDCUP_AI_PROFILE');
SELECT AI "What is the head-to-head record between Brazil and Argentina?";
-- Expected: Filters matches between these teams, counts wins/draws
Create a Classic Report Region and Type as Function Body returning SQL Query. Enable Use Generic Column Names property and enter number of columns in Generic Column Count.
Enter PL/SQL Function Body as follows. DBMS_CLOUD_AI.GENERATE returns the SQL query using SELECT AI. BEGIN IF :PROMPT IS NOT NULL THEN RETURN DBMS_CLOUD_AI.GENERATE(:PROMPT, profile_name => โOCI_GENAIโ); END IF; END;
Add helpful comments to tables/columns:
-- Table comments
COMMENT ON TABLE match_results IS 'Historical international football match results from 1872 to present. Includes World Cups, regional tournaments, and qualifiers.';
COMMENT ON TABLE goalscorers IS 'Individual goal records showing who scored, when, and match context. Includes penalty and own goal indicators.';
-- Column comments (helps AI understand data)
COMMENT ON COLUMN match_results.home_score IS 'Number of goals scored by home team (non-negative integer)';
COMMENT ON COLUMN match_results.away_score IS 'Number of goals scored by away team (non-negative integer)';
COMMENT ON COLUMN match_results.neutral IS 'TRUE if match played at neutral venue, FALSE if home advantage';
COMMENT ON COLUMN match_results.tournament IS 'Official tournament name - FIFA World Cup, UEFA Euro, Copa America, etc';
-- This helps the LLM generate better SQL queries!