Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 58 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,8 @@ def create_loading_modal(analysis_type="analysis"):
"": "-",
"1A": "Import raw data file(s)",
"1B": "Load Bibliometrix file(s)",
"1C": "Use a sample dataset"
"1C": "Use a sample dataset",
"1D": "Retrieve via API (PubMed / OpenAlex)"
},
)

Expand Down Expand Up @@ -711,6 +712,18 @@ def select_db():
ui.input_action_button("start_button", "Start", icon=ICONS["play"])
ui.markdown("Select a predefined sample dataset for testing purposes.")

elif input.select() == "1D":
ui.input_text("api_query", "Search Query", value="machine learning",
placeholder="e.g. cancer immunotherapy")
ui.input_select("api_platform", "Platform", {
"pubmed": "PubMed",
"openalex": "OpenAlex",
}, selected="pubmed")
ui.input_numeric("api_max_results", "Max Results", value=100, min=10, max=500)
ui.p("Data will be retrieved automatically via API β€” no file download needed.",
style="color: gray; font-size: 10px;")
ui.input_action_button("start_button", "Retrieve & Load", icon=ICONS["play"])

else:
ui.p("Please select a valid action to begin managing your data.", style="color: gray;")
ui.p("Follow the instructions below to manage your data efficiently:")
Expand Down Expand Up @@ -748,7 +761,48 @@ def mostra():

if database == "Sample":
data = df.set(pd.read_excel("sources/samples/sample.xlsx"))
reset_all_analyses() # Reset analysis results when sample is loaded
reset_all_analyses()

if input.select() == "1D":
# Advanced Level: retrieve via API
try:
from www.services.api_retriever import retrieve_from_api
from www.services.transformer import transform
from www.services.field_calculator import add_sr
from www.services.validator import validate as etl_validate

query = input.api_query()
platform = input.api_platform()
max_res = input.api_max_results()

source_map = {"pubmed": "pubmed", "openalex": "scopus"}
ext_map = {"pubmed": "virtual.txt", "openalex": "virtual.csv"}

records = retrieve_from_api(query=query, platform=platform, max_results=max_res)
source = source_map[platform]
loaded = transform(records, source=source, filepath=ext_map[platform])
loaded = add_sr(loaded)
etl_validate(loaded)
df.set(loaded)
reset_all_analyses()

preview = loaded[["TI", "AU", "PY", "SO"]].head(5).copy()
preview["AU"] = preview["AU"].apply(
lambda x: ", ".join(x[:2]) if isinstance(x, list) else str(x))
preview["TI"] = preview["TI"].str[:50] + "..."

ui.div(
ui.h5(f"βœ… Retrieved {len(loaded)} records from {platform.upper()}",
style="color:green;"),
ui.p(f"Query: '{query}' β€” data loaded. Use the analysis panels now."),
ui.HTML(preview.to_html(index=False,
classes="table table-striped table-sm", border=0)),
)
except Exception as e:
ui.div(
ui.h5("❌ API Error:", style="color:red;"),
ui.p(str(e), style="color:red;"),
)

@render.express()
@reactive.event(input.Dataset)
Expand Down Expand Up @@ -854,7 +908,8 @@ def indicator_types_ui_all():
),

with ui.nav_panel("None", value="API"):
ui.h3("🚧 Warning: API is under construction 🚧")
ui.h3("πŸ”— API Data Retrieval", style="color: #5567BB;")
ui.p("Use Data menu on the left β†’ Choose an action β†’ Retrieve via API (PubMed / OpenAlex)")

with ui.nav_panel("None", value="collections"):
ui.h3("🚧 Warning: Merge Collection is under construction 🚧")
Expand Down
37 changes: 37 additions & 0 deletions demo_etl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
demo_etl.py β€” generates standardised CSV files from all sources.
Run from the bibliometrix-python root:
python demo_etl.py
"""
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from www.services.standardizer import convert2df

SOURCES = [
("Scopus", "scopus", "sources/Scopus/Scopus.csv", "output_scopus.csv"),
("Dimensions", "dimensions", "sources/Dimensions/Dimensions.csv", "output_dimensions.csv"),
("PubMed", "pubmed", "sources/PubMed/pubmed-allergicrh-set.txt", "output_pubmed.csv"),
]

print("\n" + "="*55)
print(" Bibliometrix-Python ETL Pipeline \u2014 Demo")
print("="*55)

for name, source, inpath, outpath in SOURCES:
if not os.path.exists(inpath):
print(f"\n[SKIP] {name} file not found: {inpath}")
continue
print(f"\n[{name}] Processing {inpath} ...")
df = convert2df(inpath, source)
df.to_csv(outpath, index=False)
print(f" Records : {len(df)}")
print(f" Columns : {len(df.columns)}")
print(f" Saved : {outpath}")
print(f" SR[0] : {df['SR'].iloc[0]}")
print(f" AU[0] : {df['AU'].iloc[0][:2]}")
print(f" PY type : {type(df['PY'].iloc[0]).__name__}")

print("\n" + "="*55)
print(" All done. Upload the output_*.csv files to the dashboard.")
print("="*55 + "\n")
5 changes: 2 additions & 3 deletions functions/get_affiliationproductionovertime.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ def get_affiliation_production_over_time(df, top_k_affiliations):
# Customize the grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF')
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



return fig, AffOverTime
5 changes: 2 additions & 3 deletions functions/get_annualproduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,7 @@ def get_annual_production(df):
# Customize the grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF')
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



return fig, publications_per_year
5 changes: 2 additions & 3 deletions functions/get_authorlocalimpact.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,7 @@ def g_calc(x):
coloraxis_showscale=False,
showlegend=False
)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



return fig, source_counts
5 changes: 2 additions & 3 deletions functions/get_authorproductionovertime.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,8 @@ def get_author_production_over_time(df, top_k_authors):
showlegend=False,
margin=dict(l=0, r=0, t=40, b=0), # Margins
)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



# Sort production table by year and author
table_authors_production = author_production.sort_values(by=["PY", "AU"])[["AU", "PY", "Production", "TotalCitations", "TCpY"]]
Expand Down
5 changes: 2 additions & 3 deletions functions/get_averagecitations.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,7 @@ def get_average_citations(df):
# Customize the grid
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#EFEFEF')
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



return fig, table
5 changes: 2 additions & 3 deletions functions/get_citedcountries.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,7 @@ def get_cited_countries(df, num_of_cited_countries, cited_countries_measure):
),
coloraxis_showscale=False,
)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



return fig, table
5 changes: 2 additions & 3 deletions functions/get_citeddocuments.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,7 @@ def get_cited_documents(df, num_of_cited_docs, cited_docs_measure):
),
coloraxis_showscale=False,
)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



return fig, table
5 changes: 2 additions & 3 deletions functions/get_clusteringcoupling.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,8 @@ def get_clustering_coupling(df, unit_of_analysis, coupling_measured, stemmer, im
showlegend=True,
margin=dict(t=20) # aggiunge spazio bianco sopra
)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



cm_data = coupling_map['data']
cm_clusters = coupling_map['clusters']
Expand Down
7 changes: 3 additions & 4 deletions functions/get_co_occurence_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,8 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg

# Remove hover info
fig.update_traces(hoverinfo='none')
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



####################### Table #########################
# Create cluster results dataframe with renamed columns
Expand Down Expand Up @@ -460,7 +459,7 @@ def get_co_occurence_network(df, field_cn, ngram, network_layout, clustering_alg
bordercolor="#5567BB"
),
)
degree_plot = go.FigureWidget(degree_plot)
degree_plot = go.Figure(degree_plot)
degree_plot._config = degree_plot._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}

Expand Down
4 changes: 2 additions & 2 deletions functions/get_cocitation.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def get_co_citation(
showlegend=True,
margin=dict(t=20) # aggiunge spazio bianco sopra
)
fig_density = go.FigureWidget(fig_density)
fig_density = go.Figure(fig_density)
fig_density._config = fig_density._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}

Expand Down Expand Up @@ -390,7 +390,7 @@ def get_co_citation(
showlegend=True,
margin=dict(t=20) # aggiunge spazio bianco sopra
)
degree_plot = go.FigureWidget(degree_plot)
degree_plot = go.Figure(degree_plot)
degree_plot._config = degree_plot._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}

Expand Down
4 changes: 2 additions & 2 deletions functions/get_collaborationnetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def get_collaboration_network(
showlegend=True,
margin=dict(t=20) # aggiunge spazio bianco sopra
)
fig_density = go.FigureWidget(fig_density)
fig_density = go.Figure(fig_density)
fig_density._config = fig_density._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}

Expand Down Expand Up @@ -393,7 +393,7 @@ def get_collaboration_network(
bordercolor="#5567BB"
),
)
degree_plot = go.FigureWidget(degree_plot)
degree_plot = go.Figure(degree_plot)
degree_plot._config = degree_plot._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}

Expand Down
5 changes: 2 additions & 3 deletions functions/get_correspondingauthorcountries.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,7 @@ def get_corresponding_author_countries(df, top_k_countries):
y=1.1
)
)
fig = go.FigureWidget(fig)
fig._config = fig._config | {'modeBarButtonsToRemove': ['pan', 'select', 'lasso2d', 'toImage'],
'displaylogo': False}



return fig, table
Loading