# + import streamlit as st import leafmap.maplibregl as leafmap import pandas as pd import numpy as np from matplotlib import cm import ibis from ibis import _ from langchain_openai import ChatOpenAI from langchain_community.utilities import SQLDatabase from langchain.chains import create_sql_query_chain from huggingface_hub import HfApi, login login(st.secrets["HF_TOKEN"]) # + ## Benchmark possible access locations, but local blows everything else away # h3_parquet = "https://data.source.coop/cboettig/gbif/gbif_us_h3.parquet" # h3_parquet = "hf://datasets/boettiger-lab/gbif/usa_h3/*/*.parquet" # h3_parquet = "hf://datasets/boettiger-lab/gbif/gbif_ca_h3.parquet" # h3_parquet = "https://data.source.coop/cboettig/gbif/gbif_ca.geoparquet" # h3_parquet = "https://huggingface.co/datasets/boettiger-lab/gbif/resolve/main/gbif_ca.geoparquet" h3_parquet = "gbif_ca.geoparquet" con = ibis.duckdb.connect(extensions=["spatial", "httpfs"]) gbif_h3 = con.read_parquet(h3_parquet, "gbif_h3") # - # + st.set_page_config(page_title="GBIF Observations Explorer", layout="wide") st.header("GBIF Observations Explorer", divider="rainbow") # st.set_page_config(page_title="H3 in Streamlit", layout="wide") # - # + # ibis doesn't know that duckdb can access hf:// #con.raw_sql(f"CREATE OR REPLACE VIEW gbif AS SELECT * FROM read_parquet('{h3_parquet}')") #gbif_h3 = con.table("gbif") # + # ibis helper functions for h3. # Currently we've pre-processed this, but technically could be done on the fly @ibis.udf.scalar.builtin def h3_latlng_to_cell(lat: float, lng: float, zoom: int) -> int: ... @ibis.udf.scalar.builtin def hex(array) -> str: ... @ibis.udf.scalar.builtin def h3_cell_to_boundary_wkt (array) -> str: ... ## some versions need this manual install of h3 duckdb extension # con.raw_sql(''' # INSTALL h3 FROM community; # LOAD h3; # ''') # - # ------ Visualisation 1 --------- def filter_gbif(_df, species="Canis lupus", bbox = [-130., 30., -90., 60.]): return (_df .filter(_.decimallongitude >= bbox[0], _.decimallongitude < bbox[2], _.decimallatitude >= bbox[1], _.decimallatitude < bbox[3], _.species == species ) ) def get_h3point_df(_df, resolution: float) -> pd.DataFrame: column = "h" + str(resolution) df = (_df .rename(hex = column) .group_by(_.hex) .agg(n = _.count()) # .mutate(wkt = h3_cell_to_boundary_wkt(_.hex)) .mutate(v = _.n.log()) .mutate(normalized_values = _.v / _.v.max()) .to_pandas() ) rgb = cm.viridis(df.normalized_values) rgb_array = np.round( rgb * 255 ).astype(int).clip(0,255).tolist() df['rgb'] = rgb_array #df['viridis_hex'] = colors.to_hex(rgb) # not robust? df['viridis_hex'] = [f"#{int(c[0] * 255):02x}{int(c[1] * 255):02x}{int(c[2] * 255):02x}" for c in rgb] return df # + # #%%time #df = filter_gbif(gbif_h3, species = "Canis lupus") #df = get_h3point_df(df, 6) #df.head(10) # + def host_df(df, filename = "live.json", repo_id="boettiger-lab/gbif"): df.to_json(".static/"+filename, orient='records', indent=2) api = HfApi() info = api.upload_file( path_or_fileobj=".static/"+filename, path_in_repo="live/" + filename, repo_id=repo_id, repo_type="dataset", ) # to avoid cache, use unique commit url commit_hash = info.oid return f"https://huggingface.co/datasets/{repo_id}/resolve/{commit_hash}/live/{filename}" import pydeck as pdk def hex_layer(m, df: pd.DataFrame, v_scale = 1): url = host_df(df) layer = pdk.Layer( "H3HexagonLayer", data = url, get_hexagon="hex", get_fill_color="rgb", extruded=True, get_elevation="normalized_values", elevation_scale= 5000 * 10 ** v_scale, elevation_range = [0,1] ) return m.add_deck_layers([layer]) # + # #%%time def local_test(): bbox = [-120, 37, -118, 39] df = filter_gbif(gbif_h3, species = "Canis lupus", bbox = bbox) df = get_h3point_df(df, 6) m = leafmap.Map(style="openstreetmap", center=(-121.4, 37.74), zoom=7,) hex_layer(m, df) return m #local_test() # + import os import streamlit as st os.environ["MAPTILER_KEY"] = st.secrets["MAPTILER_KEY"] import leafmap.maplibregl as leafmap m = leafmap.Map(style="positron", center=(-121.4, 37.50), zoom=7,) #m # + # Set up Langchain SQL access db = SQLDatabase.from_uri("duckdb:///tmp.duckdb", view_support=True) db.run(f"create or replace view gbif_h3 as select * from read_parquet('{h3_parquet}');") llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]) chain = create_sql_query_chain(llm, db) example_question = "Show me all birds" additional_advice = '. Return all matching columns using SELECT * in the query. You must use only the space-separated two-word binomial scientific name as the "species" column, and not the "genus" column, such as "species"="Homo sapiens". Avoid double quoting. Do not use LIMIT, always return all results. Do not include explanations of queries.' #@st.cache_data def manual_query(species, zoom): df = filter_gbif(gbif_h3, species) df = get_h3point_df(df, zoom) return df #@st.cache_data def chat_query(query, zoom): df = con.sql(query) df = get_h3point_df(df, zoom) return df # + col1, col2, col3 = st.columns(3) with col1: zoom = st.slider("H3 resolution", min_value=2, max_value=11, value=8) v_scale = st.slider("vertical scale", min_value=-3, max_value=3, value=1) with col2: "🌍 Data Layers" if st.toggle("satellite"): m.add_basemap("satellite") if st.toggle("redlining"): #redlining = "https://dsl.richmond.edu/panorama/redlining/static/mappinginequality.json" redlining = "https://dsl.richmond.edu/panorama/redlining/static/citiesData/CASanFrancisco1937/geojson.json" paint = {"fill-color": ["get", "fill"], "fill-opacity": 0.8} m.add_geojson(redlining, layer_type="fill", name = "redlining", paint=paint, fit_bounds = False) # if st.toggle("Threatened Species Richness"): # m.add_tile_layer(url="https://data.source.coop/cboettig/mobi/tiles/red/species-richness-all/{z}/{x}/{y}.png", # name="MOBI Species Richness", # attribution="NatureServe", # opacity=0.9 # ) with col3: species = st.text_input("Species name:", "Canis latrans") df = manual_query(species, zoom) chatbox = st.container() st.markdown("🦜 Or try our chat-based query:") if prompt := st.chat_input(example_question, key="chain"): st.chat_message("user").write(prompt) with st.chat_message("assistant"): query = chain.invoke({"question": prompt + additional_advice}) st.write(query) df = chat_query(query, zoom) # if st.button("refresh"): # chat_query.clear() " " st.divider() # + # with col2: # min_lng, max_lng = st.slider( # "Select longitude range", # min_value=-130.0, # max_value=-65.0, # value=(-128.0, -115.0), # Default selected range # step=0.1) # min_lat, max_lat = st.slider( # "Select latitude range", # min_value=20.0, # max_value=70.0, # value=(30.0, 42.0), # Default selected range # step=0.1) # - # + map = st.container() with map: hex_layer(m, df, v_scale) m.add_layer_control(position="top-left") m.to_streamlit() # + st.divider() ''' ## Credits DRAFT. Open Source Software developed at UC Berkeley. '''