PRAISELab-PicusLab · Madhumithra1321 · May 30, 2026 · May 31, 2026 · May 31, 2026 · Jun 3, 2026
diff --git a/.streamlit/config.toml b/.streamlit/config.toml
@@ -0,0 +1,6 @@
+[theme]
+primaryColor="#00A86B"
+backgroundColor="#0A0A0A"
+secondaryBackgroundColor="#1A1A1A"
+textColor="#FFFFFF"
+font="sans serif"
diff --git a/dashboard.py/dashboard_app.py b/dashboard.py/dashboard_app.py
@@ -0,0 +1,326 @@
+import sys
+from pathlib import Path
+import plotly.express as px
+
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+
+import streamlit as st
+from www.services.etl.pipeline import openalex_pipeline
+import pandas as pd
+
+st.set_page_config(
+    page_title="Bibliometrix Dashboard",
+    layout="wide"
+)
+
+st.markdown("""
+<style>
+
+.stApp {
+    background-color: #0A0A0A;
+}
+
+h1 {
+    color: #D4AF37 !important;
+}
+
+h2, h3 {
+    color: #00A86B !important;
+}
+
+div[data-testid="metric-container"] {
+    background-color: #1A1A1A;
+    border: 1px solid #D4AF37;
+    padding: 15px;
+    border-radius: 12px;
+}
+
+</style>
+""", unsafe_allow_html=True)
+
+st.title("📚 Bibliometrix Dashboard")
+st.write("OpenAlex ETL + Bibliometric Analysis")
+
+colA, colB = st.columns([3, 1])
+
+with colA:
+    query = st.text_input(
+    "Search Topic",
+    placeholder="🔍 Search topics like AI, Machine Learning, Data Science...",
+    label_visibility="collapsed"
+)
+
+with colB:
+    max_results = st.selectbox(
+        "Documents",
+        [50, 100, 200, 500],
+        index=1
+    )
+
+if not query:
+    query = "machine learning"
+
+df = openalex_pipeline(
+    query=query,
+    max_results=max_results
+)
+col1, col2, col3, col4 = st.columns(4)
+
+with col1:
+    st.metric("Documents", len(df))
+
+with col2:
+    st.metric(
+        "Authors",
+        df["AU"].explode().dropna().nunique()
+    )
+
+with col3:
+    st.metric(
+        "Keywords",
+        df["DE"].explode().dropna().nunique()
+    )
+
+with col4:
+    st.metric(
+        "Total Citations",
+        int(df["TC"].fillna(0).sum())
+    )
+# ==================================================
+# DATASET PREVIEW
+# ==================================================
+
+st.markdown("""
+<h2 style='color:#D4AF37;
+font-size:42px;
+font-weight:700;'>
+📄 STANDARDIZED DATASET PREVIEW
+</h2>
+""", unsafe_allow_html=True)
+
+st.dataframe(df, use_container_width=True)
+
+# ==================================================
+# PUBLICATIONS BY YEAR
+# ==================================================
+
+st.divider()
+
+st.markdown("""
+<h2 style='color:#D4AF37;
+font-size:42px;
+font-weight:700;'>
+📊 PUBLICATIONS BY YEAR
+</h2>
+""", unsafe_allow_html=True)
+
+year_counts = df["PY"].value_counts().sort_index()
+
+fig = px.bar(
+    x=year_counts.index,
+    y=year_counts.values
+)
+
+fig.update_traces(
+    marker_color="#D4AF37"
+)
+
+fig.update_layout(
+    paper_bgcolor="#0A0A0A",
+    plot_bgcolor="#0A0A0A",
+    font_color="white",
+    xaxis_title="Year",
+    yaxis_title="Publications",
+    yaxis=dict(dtick=1),
+    showlegend=False
+)
+
+st.plotly_chart(fig, width="stretch")
+
+# ==================================================
+# TOP CITED PAPERS
+# ==================================================
+
+st.divider()
+
+st.markdown("""
+<h2 style='color:#D4AF37;
+font-size:42px;
+font-weight:700;'>
+🏆 TOP 10 MOST CITED PAPERS
+</h2>
+""", unsafe_allow_html=True)
+
+if "TC" in df.columns:
+
+    top_papers = (
+    df.sort_values("TC", ascending=False)
+      [["TI", "TC", "PY"]]
+      .head(10)
+      .reset_index(drop=True)
+)
+
+st.dataframe(
+    top_papers,
+    width="stretch"
+)
+
+st.write("Rows shown:", len(top_papers))
+# ==================================================
+# TOP AUTHORS
+# ==================================================
+
+st.divider()
+
+st.markdown("""
+<h2 style='color:#00C78C;
+font-size:42px;
+font-weight:700;'>
+👥 TOP AUTHORS
+</h2>
+""", unsafe_allow_html=True)
+
+top_authors = (
+    df["AU"]
+    .explode()
+    .dropna()
+    .value_counts()
+    .head(10)
+    .reset_index()
+)
+
+top_authors.columns = ["Author", "Publications"]
+
+# Ranking Column
+top_authors.insert(
+    0,
+    "Rank",
+    ["🥇", "🥈", "🥉", "4", "5", "6", "7", "8", "9", "10"]
+)
+
+col1, col2 = st.columns([3, 1])
+
+with col1:
+    st.dataframe(
+        top_authors,
+        use_container_width=True,
+        hide_index=True
+    )
+
+with col2:
+    st.metric(
+        "TOP AUTHOR",
+        top_authors.iloc[0]["Author"]
+    )
+
+    st.metric(
+        "PUBLICATIONS",
+        int(top_authors.iloc[0]["Publications"])
+    )
+# ==================================================
+# TOP KEYWORDS
+# ==================================================
+
+st.divider()
+
+st.markdown("""
+<h2 style='color:#D4AF37;
+font-size:42px;
+font-weight:700;'>
+🔑 TOP KEYWORDS
+</h2>
+""", unsafe_allow_html=True)
+
+top_keywords = (
+    df["DE"]
+    .explode()
+    .dropna()
+    .value_counts()
+    .head(10)
+    .reset_index()
+)
+
+top_keywords.columns = ["Keyword", "Frequency"]
+
+fig = px.bar(
+    top_keywords.sort_values("Frequency"),
+    x="Frequency",
+    y="Keyword",
+    orientation="h",
+    text="Frequency"
+)
+
+fig.update_traces(
+    marker_color="#D4AF37",
+    textposition="outside"
+)
+
+fig.update_layout(
+    paper_bgcolor="#0A0A0A",
+    plot_bgcolor="#0A0A0A",
+    font_color="white",
+    xaxis_title="Frequency",
+    yaxis_title="",
+    showlegend=False,
+    height=600,
+    margin=dict(l=20, r=20, t=20, b=20)
+)
+
+fig.update_xaxes(
+    nticks=8
+)
+
+
+st.plotly_chart(fig, use_container_width=True)
+st.divider()
+
+st.markdown("""
+<h2 style='color:#D4AF37;font-size:42px;font-weight:700;'>
+⬇ EXPORT RESULTS
+</h2>
+""", unsafe_allow_html=True)
+
+csv = df.to_csv(index=False)
+
+st.download_button(
+    label="📥 Download Dataset (CSV)",
+    data=csv,
+    file_name=f"{query}_bibliometric_data.csv",
+    mime="text/csv"
+)
+
+st.divider()
+
+st.markdown("""
+<div style="
+text-align:center;
+padding:30px;
+font-size:15px;
+line-height:1.8;
+color:#CCCCCC;
+">
+
+<h3 style="color:#D4AF37;">
+📚 Bibliometrix Dashboard 
+</h3>
+
+<b style="color:#00C78C;">Developed by</b><br>
+Madhumithra Balasubramanian<br>
+Aya Soundous Hechaichi<br>
+Alina Siddiqui
+
+<br>
+
+<b style="color:#00C78C;">Technologies Used</b><br>
+Python • Streamlit • OpenAlex API • Bibliometrix Framework
+
+<br>
+
+<b style="color:#00C78C;">Hardware and Software for Big Data – Mod B</b><br>
+University of Naples Federico II
+
+<br>
+
+<b style="color:#00C78C;">Professor:</b> Vincenzo Moscato<br>Data Science Course – Academic Year 2025/2026
+</div>
+""", unsafe_allow_html=True)
diff --git a/functions/get_co_occurence_network.py b/functions/get_co_occurence_network.py
@@ -479,7 +479,7 @@ def field_by_year(df, field_cn, timespan=None, min_freq=2, n_items=5, remove_ter
         The field to analyze ('ID', 'DE', 'TI', 'AB', 'WC')
     """
     # Get the field data
-    M = df.get()
+    M = df
 
     # Create co-occurrence matrix
     A = cocMatrix(df, field_cn, binary=False, remove_terms=remove_terms, synonyms=synonyms)

diff --git a/functions/get_collaborationnetwork.py b/functions/get_collaborationnetwork.py
@@ -46,7 +46,6 @@ def get_collaboration_network(
     print("Generating collaboration network...")
 
     M = df
-    m = df.get()
     NetRefs = None
     Title = ""
 

diff --git a/functions/get_factorialanalysis.py b/functions/get_factorialanalysis.py
@@ -74,7 +74,7 @@ def get_factorial_analysis(
     # Set ngrams based on word_type
     ngrams = int(ngram) if field in ['TI', 'AB'] else 1
 
-    M = df.get()
+    M = df
     tab = table_tag(M, field, ngrams)
 
     if len(tab) >= 2:

diff --git a/functions/get_frequentwords.py b/functions/get_frequentwords.py
@@ -100,7 +100,7 @@ def table_tag(df, tag, ngrams=1, remove_terms=None, synonyms=None):
     """
     Extract and count words from a specified field in the DataFrame.
     """
-    M = df.get()
+    M = df
 
     # Remove duplicates
     M = M.drop_duplicates(subset='SR')

diff --git a/functions/get_localcitedauthors.py b/functions/get_localcitedauthors.py
@@ -20,7 +20,7 @@ def get_local_cited_authors(df, num_of_cited_authors, fast_search=False):
         loccit = 1
 
     df = metaTagExtraction(df, "SR")
-    M = df.get()
+    M = df
 
     # Fill missing values
     M['TC'] = M['TC'].fillna(0)

diff --git a/functions/get_localciteddocuments.py b/functions/get_localciteddocuments.py
@@ -14,7 +14,7 @@ def get_local_cited_documents(df, num_of_local_cited_docs, field_separator, fast
         A Plotly figure object and a DataFrame of the most local cited documents.
     """
     df = metaTagExtraction(df, "SR")
-    M = df.get()
+    M = df
 
     # Determine the local citation threshold
     if fast_search:

diff --git a/functions/get_treemap.py b/functions/get_treemap.py
@@ -75,7 +75,7 @@ def table_tag(df, tag, ngrams=1, remove_terms=None, synonyms=None):
     """
     Extract and count words from a specified field in the DataFrame.
     """
-    M = df.get()
+    M = df
 
     # Remove duplicates
     M = M.drop_duplicates(subset='SR')