A custom data source control that integrates with the Census API and lets users explore demographic data through natural language queries. The tutorial follows three steps:
Start with a minimal example - Build a basic year selector with ~70 lines of runnable code
Understand the components - Learn how each part works
Extend to full version - Add dynamic options and more features
Each step introduces key Lumen AI concepts for building custom data source integrations.
Lumen AI ships with built-in support for common data sources like CSV files, DuckDB, and SQL databases. But what if your data lives behind an API that requires authentication, has complex query parameters, or needs real-time fetching?
Once you create a custom control, users can fetch data through a simple UI without writing code, then immediately ask natural language questions about it using Lumen AI's conversational interface.
importasyncioimportcensusdis.dataascedimportpanelaspnimportparamimportlumen.aiaslmaifromlumen.ai.controlsimportDownloadControlsfromlumen.sources.duckdbimportDuckDBSourcefrompanel_material_uiimportButton,IntSlider,Columnpn.extension()classCensusControls(DownloadControls):"""Fetch U.S. Census population data."""vintage=param.Integer(default=2023,bounds=(2011,2023),doc="Year of data")label='<span class="material-icons">assessment</span> Census Data'def__init__(self,**params):super().__init__(**params)self._year_select=IntSlider.from_param(self.param.vintage,label="Year")self._fetch_button=Button(label="Fetch Population Data",on_click=self._on_fetch)self._layout=Column(self._year_select,self._fetch_button)asyncdef_on_fetch(self,event):"""Fetch census population data."""withself._layout.param.update(loading=True):awaitasyncio.sleep(0.01)df=awaitasyncio.to_thread(ced.download,dataset="acs/acs5",# ACS 5-Yearvintage=self.vintage,download_variables=["NAME"],group="B01003",# Total populationstate="*",# All states)ifdfisnotNoneandnotdf.empty:awaitself._add_table(df)self.param.trigger("upload_successful")asyncdef_add_table(self,df):"""Register DataFrame as a DuckDB source."""table_name=f"census_{self.vintage}_population"source=DuckDBSource.from_df(tables={table_name:df})source.tables[table_name]=f"SELECT * FROM {table_name}"self.outputs["source"]=sourceself.outputs["sources"]=self.outputs.get("sources",[])+[source]self.outputs["table"]=table_nameself.param.trigger("outputs")def__panel__(self):returnself._layoutui=lmai.ExplorerUI(source_controls=[CensusControls],title="Census Data Explorer",log_level="DEBUG",)ui.servable()
This ~70 line example is immediately runnable! Try clicking on "Sources" in the sidebar, selecting the desired year, and clicking "Fetch Population Data".
Custom controls extend data sources by subclassing lmai.controls.DownloadControls (see Source Controls):
classCensusControls(DownloadControls):vintage=param.Integer(default=2023,bounds=(2011,2023),doc="Year of data")label='<span class="material-icons">assessment</span> Census Data'
The bounds parameter defines the valid range for the slider. The label appears in the sidebar with a Material Design icon.
Don't block the main thread by using asyncio.to_thread():
df=awaitasyncio.to_thread(ced.download,dataset="acs/acs5",# ACS 5-Year datasetvintage=self.vintage,download_variables=["NAME"],# Include place namesgroup="B01003",# Total population variable groupstate="*",# All states)
table_name=f"census_{self.vintage}_population"source=DuckDBSource.from_df(tables={table_name:df})source.tables[table_name]=f"SELECT * FROM {table_name}"# Make available to Lumen AIself.outputs["source"]=sourceself.outputs["table"]=table_nameself.param.trigger("outputs")
Note the dynamic table name that includes the year for clarity.
The minimal example uses fixed values: ACS 5-Year dataset, population data (group B01003), state geography. The full example below adds nice-to-have features like progress bars and error messages, plus:
Dynamic variable groups - Load and display all available Census variable groups:
def_get_group_options(self):"""Fetch variable groups and return {label: value} dict."""cache_key=(self.dataset,self.vintage)ifcache_keynotinself._groups_cache:groups_df=ced.variables.all_groups(self.dataset,self.vintage)self._groups_cache[cache_key]={row["GROUP"]:row["DESCRIPTION"]for_,rowingroups_df.iterrows()}groups=self._groups_cache[cache_key]return{f"{code}: {groups[code]}":codeforcodeingroups.keys()}
Multiple datasets - Choose between ACS 1-Year and 5-Year:
Reactive updates - Options update when dataset/year changes:
@param.depends("dataset","vintage",watch=True)def_on_dataset_vintage_change(self):"""Update group and geo options when dataset or vintage changes."""self._group_select.options=self._get_group_options()self._geo_select.options=self._get_geo_options()
"""Census Data Explorer - Full ExampleComplete implementation with dynamic options and features"""importasyncioimportcensusdis.dataascedimportcensusdis.geographyascgeoimportpanelaspnimportparamimportlumen.aiaslmaifromcensusdis.datasetsimportACS1,ACS5fromcensusdis.statesimportNAMES_FROM_IDSfromlumen.ai.controlsimportDownloadControlsfromlumen.sources.duckdbimportDuckDBSourcefromlumen.utilimportnormalize_table_namefrompanel_material_uiimportButton,FlexBox,Select,TextInput,Markdownpn.extension()# State FIPS codesSTATES={"All States":"*",**{name:fipsforfips,nameinNAMES_FROM_IDS.items()}}# Available years per datasetDATASET_YEARS={ACS5:[2023,2022,2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011],ACS1:[2023,2022,2021,2019,2018,2017,2016,2015,2014,2013,2012,2011],}classCensusControlsFull(DownloadControls):"""Full-featured Census data control with dynamic options."""dataset=param.Selector(default=ACS5,objects=[ACS5,ACS1],doc="Census dataset")vintage=param.Integer(default=2023,doc="Year of data")group=param.String(default="B01003",doc="Variable group code")geography=param.String(default="state",doc="Geographic level")state_filter=param.String(default="All States",doc="State to filter by")table_alias=param.String(default="census_data",doc="Table name in database")label='<span class="material-icons">assessment</span> Census Data'def__init__(self,**params):super().__init__(**params)# Caches for API dataself._groups_cache={}self._geo_cache={}# Load initial optionsgroup_options=self._get_group_options()geo_options=self._get_geo_options()year_options=self._get_year_options()# Build UIself._dataset_select=Select(value=self.dataset,options={"ACS 5-Year":ACS5,"ACS 1-Year":ACS1},label="Dataset",sizing_mode="stretch_width",)self._vintage_select=Select(value=self.vintage,options=year_options,label="Year",sizing_mode="stretch_width",)self._group_select=Select(value=self.group,options=group_options,label="Variable Group",sizing_mode="stretch_width",)self._geo_select=Select(value=self.geography,options=geo_options,label="Geography",sizing_mode="stretch_width",)self._state_select=Select(value=self.state_filter,options={name:namefornameinSTATES.keys()},label="State Filter",sizing_mode="stretch_width",)self._alias_input=TextInput.from_param(self.param.table_alias,label="Table Name",sizing_mode="stretch_width",)self._fetch_button=Button(label="Fetch Census Data",icon="download",color="primary",sizing_mode="stretch_width",height=42,)self._fetch_button.on_click(self._on_fetch)self._layout=FlexBox(Markdown("### Census Data"),FlexBox(self._dataset_select,self._vintage_select,flex_direction="row",gap="10px",sizing_mode="stretch_width",),self._group_select,FlexBox(self._geo_select,self._state_select,flex_direction="row",gap="10px",sizing_mode="stretch_width",),self._alias_input,self._fetch_button,self._error_placeholder,self._message_placeholder,self._progress_bar,self._progress_description,flex_direction="column",gap="10px",sizing_mode="stretch_width",margin=(10,10),)# Link widgets to paramsself._dataset_select.link(self,value='dataset')self._vintage_select.link(self,value='vintage')self._group_select.link(self,value='group')self._geo_select.link(self,value='geography')self._state_select.link(self,value='state_filter')def_get_year_options(self):"""Get available years for current dataset."""years=DATASET_YEARS.get(self.dataset,[2023])return{str(y):yforyinyears}def_get_group_options(self):"""Fetch variable groups from Census API."""cache_key=(self.dataset,self.vintage)ifcache_keynotinself._groups_cache:try:groups_df=ced.variables.all_groups(self.dataset,self.vintage)self._groups_cache[cache_key]={row["GROUP"]:row["DESCRIPTION"]for_,rowingroups_df.iterrows()}exceptExceptionase:print(f"Failed to load groups: {e}")self._groups_cache[cache_key]={}groups=self._groups_cache[cache_key]# Prioritize common groupspopular=["B01003","B01001","B19013","B02001","B25001","B15003","B17001"]options={}forcodeinpopular:ifcodeingroups:options[f"{code}: {groups[code]}"]=codeforcodeinsorted(groups.keys()):ifcodenotinpopularandlen(options)<500:options[f"{code}: {groups[code]}"]=codereturnoptionsdef_get_geo_options(self):"""Fetch geographies from Census API."""cache_key=(self.dataset,self.vintage)ifcache_keynotinself._geo_cache:try:self._geo_cache[cache_key]=cgeo.geo_path_snake_specs(self.dataset,self.vintage)exceptExceptionase:print(f"Failed to load geographies: {e}")self._geo_cache[cache_key]={}geo_specs=self._geo_cache[cache_key]friendly_names={"state":"State","county":"County","tract":"Census Tract","block_group":"Block Group","place":"Place (City/Town)","county_subdivision":"County Subdivision","us":"United States (National)","region":"Region","division":"Division",}# Get unique leaf geographiesoptions={}seen=set()forhierarchyingeo_specs.values():leaf=hierarchy[-1]ifleafnotinseen:seen.add(leaf)label=friendly_names.get(leaf,leaf.replace("_"," ").title())options[label]=leafreturnoptions@param.depends("dataset",watch=True)def_on_dataset_change(self):"""Update year options when dataset changes."""new_year_options=self._get_year_options()self._vintage_select.options=new_year_optionsifself.vintagenotinnew_year_options.values():first_year=list(new_year_options.values())[0]self._vintage_select.value=first_yearself.vintage=first_year@param.depends("dataset","vintage",watch=True)def_on_dataset_vintage_change(self):"""Update group and geo options when dataset or vintage changes."""new_group_options=self._get_group_options()current_group=self.groupself._group_select.options=new_group_optionsifcurrent_groupinnew_group_options.values():self._group_select.value=current_groupelifnew_group_options:self._group_select.value=list(new_group_options.values())[0]new_geo_options=self._get_geo_options()current_geo=self.geographyself._geo_select.options=new_geo_optionsifcurrent_geoinnew_geo_options.values():self._geo_select.value=current_geoelifnew_geo_options:self._geo_select.value=list(new_geo_options.values())[0]def_build_geo_kwargs(self):"""Build geography kwargs for ced.download."""cache_key=(self.dataset,self.vintage)geo_specs=self._geo_cache.get(cache_key,{})# Find hierarchy ending with selected geographytarget_geo=self.geographyhierarchy=Noneforhingeo_specs.values():ifh[-1]==target_geo:hierarchy=hbreakifnothierarchy:return{"state":"*"}# Build kwargskwargs={}state_fips=STATES.get(self.state_filter,"*")forlevelinhierarchy:iflevel=="state":kwargs["state"]=state_fipselse:kwargs[level]="*"returnkwargsasyncdef_on_fetch(self,event):"""Fetch census data."""ifnotself.group:self._error_placeholder.object="⚠️ Please select a variable group"self._error_placeholder.visible=Truereturnself._error_placeholder.visible=Falseself._message_placeholder.visible=Falseself._progress_bar.visible=Trueself._progress_bar.variant="indeterminate"self._progress_description.object="Fetching data..."self._progress_description.visible=Trueself._fetch_button.disabled=Truetry:geo_kwargs=self._build_geo_kwargs()df=awaitasyncio.to_thread(ced.download,dataset=self.dataset,vintage=self.vintage,download_variables=["NAME"],group=self.group,**geo_kwargs,)ifdfisNoneordf.empty:self._error_placeholder.object="⚠️ No data returned"self._error_placeholder.visible=Truereturnawaitself._add_table(df)self._message_placeholder.object=f"✓ Loaded {len(df):,} rows into '{self.table_alias}'"self._message_placeholder.visible=Trueself.param.trigger("upload_successful")exceptExceptionase:self._error_placeholder.object=f"⚠️ Error: {e}"self._error_placeholder.visible=Truefinally:self._progress_bar.visible=Falseself._progress_description.visible=Falseself._fetch_button.disabled=Falseasyncdef_add_table(self,df):"""Register DataFrame as a DuckDB source."""table_name=normalize_table_name(self.table_alias)source=DuckDBSource.from_df(tables={table_name:df})source.tables[table_name]=f"SELECT * FROM {table_name}"source.metadata={table_name:{"dataset":self.dataset,"vintage":self.vintage,"group":self.group,"geography":self.geography,}}self.outputs["source"]=sourceself.outputs["sources"]=self.outputs.get("sources",[])+[source]self.outputs["table"]=table_nameself.param.trigger("outputs")self._last_table=table_nameself._count+=1def__panel__(self):returnself._layoutui=lmai.ExplorerUI(source_controls=[CensusControlsFull],title="Census Data Explorer - Full",suggestions=[("bar_chart","Show the top 10 states by population"),("compare_arrows","Compare different demographic variables"),("question_mark","What are the demographic patterns?"),],log_level="DEBUG",)ui.servable()
This complete version includes all the features discussed: dynamic variable groups, multiple datasets, geography selection, state filtering, progress indicators, error handling, and reactive UI updates.