Spaces:
Build error
Build error
| # + | |
| import streamlit as st | |
| import leafmap.maplibregl as leafmap | |
| import pandas as pd | |
| import numpy as np | |
| from matplotlib import cm | |
| import ibis | |
| from ibis import _ | |
| from langchain_openai import ChatOpenAI | |
| from langchain_community.utilities import SQLDatabase | |
| from langchain.chains import create_sql_query_chain | |
| from huggingface_hub import HfApi, login | |
| login(st.secrets["HF_TOKEN"]) | |
| # + | |
| ## Benchmark possible access locations, but local blows everything else away | |
| # h3_parquet = "https://data.source.coop/cboettig/gbif/gbif_us_h3.parquet" | |
| # h3_parquet = "hf://datasets/boettiger-lab/gbif/usa_h3/*/*.parquet" | |
| # h3_parquet = "hf://datasets/boettiger-lab/gbif/gbif_ca_h3.parquet" | |
| # h3_parquet = "https://data.source.coop/cboettig/gbif/gbif_ca.geoparquet" | |
| # h3_parquet = "https://huggingface.co/datasets/boettiger-lab/gbif/resolve/main/gbif_ca.geoparquet" | |
| h3_parquet = "gbif_ca.geoparquet" | |
| con = ibis.duckdb.connect(extensions=["spatial", "httpfs"]) | |
| gbif_h3 = con.read_parquet(h3_parquet, "gbif_h3") | |
| # - | |
| # + | |
| st.set_page_config(page_title="GBIF Observations Explorer", layout="wide") | |
| st.header("GBIF Observations Explorer", divider="rainbow") | |
| # st.set_page_config(page_title="H3 in Streamlit", layout="wide") | |
| # - | |
| # + | |
| # ibis doesn't know that duckdb can access hf:// | |
| #con.raw_sql(f"CREATE OR REPLACE VIEW gbif AS SELECT * FROM read_parquet('{h3_parquet}')") | |
| #gbif_h3 = con.table("gbif") | |
| # + | |
| # ibis helper functions for h3. | |
| # Currently we've pre-processed this, but technically could be done on the fly | |
| def h3_latlng_to_cell(lat: float, lng: float, zoom: int) -> int: | |
| ... | |
| def hex(array) -> str: | |
| ... | |
| def h3_cell_to_boundary_wkt (array) -> str: | |
| ... | |
| ## some versions need this manual install of h3 duckdb extension | |
| # con.raw_sql(''' | |
| # INSTALL h3 FROM community; | |
| # LOAD h3; | |
| # ''') | |
| # - | |
| # ------ Visualisation 1 --------- | |
| def filter_gbif(_df, species="Canis lupus", bbox = [-130., 30., -90., 60.]): | |
| return (_df | |
| .filter(_.decimallongitude >= bbox[0], | |
| _.decimallongitude < bbox[2], | |
| _.decimallatitude >= bbox[1], | |
| _.decimallatitude < bbox[3], | |
| _.species == species | |
| ) | |
| ) | |
| def get_h3point_df(_df, resolution: float) -> pd.DataFrame: | |
| column = "h" + str(resolution) | |
| df = (_df | |
| .rename(hex = column) | |
| .group_by(_.hex) | |
| .agg(n = _.count()) | |
| # .mutate(wkt = h3_cell_to_boundary_wkt(_.hex)) | |
| .mutate(v = _.n.log()) | |
| .mutate(normalized_values = _.v / _.v.max()) | |
| .to_pandas() | |
| ) | |
| rgb = cm.viridis(df.normalized_values) | |
| rgb_array = np.round( rgb * 255 ).astype(int).clip(0,255).tolist() | |
| df['rgb'] = rgb_array | |
| #df['viridis_hex'] = colors.to_hex(rgb) # not robust? | |
| df['viridis_hex'] = [f"#{int(c[0] * 255):02x}{int(c[1] * 255):02x}{int(c[2] * 255):02x}" for c in rgb] | |
| return df | |
| # + | |
| # #%%time | |
| #df = filter_gbif(gbif_h3, species = "Canis lupus") | |
| #df = get_h3point_df(df, 6) | |
| #df.head(10) | |
| # + | |
| def host_df(df, filename = "live.json", repo_id="boettiger-lab/gbif"): | |
| df.to_json(".static/"+filename, orient='records', indent=2) | |
| api = HfApi() | |
| info = api.upload_file( | |
| path_or_fileobj=".static/"+filename, | |
| path_in_repo="live/" + filename, | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| ) | |
| # to avoid cache, use unique commit url | |
| commit_hash = info.oid | |
| return f"https://huggingface.co/datasets/{repo_id}/resolve/{commit_hash}/live/{filename}" | |
| import pydeck as pdk | |
| def hex_layer(m, df: pd.DataFrame, v_scale = 1): | |
| url = host_df(df) | |
| layer = pdk.Layer( | |
| "H3HexagonLayer", | |
| data = url, | |
| get_hexagon="hex", | |
| get_fill_color="rgb", | |
| extruded=True, | |
| get_elevation="normalized_values", | |
| elevation_scale= 5000 * 10 ** v_scale, | |
| elevation_range = [0,1] | |
| ) | |
| return m.add_deck_layers([layer]) | |
| # + | |
| # #%%time | |
| def local_test(): | |
| bbox = [-120, 37, -118, 39] | |
| df = filter_gbif(gbif_h3, species = "Canis lupus", bbox = bbox) | |
| df = get_h3point_df(df, 6) | |
| m = leafmap.Map(style="openstreetmap", center=(-121.4, 37.74), zoom=7,) | |
| hex_layer(m, df) | |
| return m | |
| #local_test() | |
| # + | |
| import os | |
| import streamlit as st | |
| os.environ["MAPTILER_KEY"] = st.secrets["MAPTILER_KEY"] | |
| import leafmap.maplibregl as leafmap | |
| m = leafmap.Map(style="positron", center=(-121.4, 37.50), zoom=7,) | |
| #m | |
| # + | |
| # Set up Langchain SQL access | |
| db = SQLDatabase.from_uri("duckdb:///tmp.duckdb", view_support=True) | |
| db.run(f"create or replace view gbif_h3 as select * from read_parquet('{h3_parquet}');") | |
| llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]) | |
| chain = create_sql_query_chain(llm, db) | |
| example_question = "Show me all birds" | |
| additional_advice = '. Return all matching columns using SELECT * in the query. You must use only the space-separated two-word binomial scientific name as the "species" column, and not the "genus" column, such as "species"="Homo sapiens". Avoid double quoting. Do not use LIMIT, always return all results. Do not include explanations of queries.' | |
| #@st.cache_data | |
| def manual_query(species, zoom): | |
| df = filter_gbif(gbif_h3, species) | |
| df = get_h3point_df(df, zoom) | |
| return df | |
| #@st.cache_data | |
| def chat_query(query, zoom): | |
| df = con.sql(query) | |
| df = get_h3point_df(df, zoom) | |
| return df | |
| # + | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| zoom = st.slider("H3 resolution", min_value=2, max_value=11, value=8) | |
| v_scale = st.slider("vertical scale", min_value=-3, max_value=3, value=1) | |
| with col2: | |
| "🌍 Data Layers" | |
| if st.toggle("satellite"): | |
| m.add_basemap("satellite") | |
| if st.toggle("redlining"): | |
| #redlining = "https://dsl.richmond.edu/panorama/redlining/static/mappinginequality.json" | |
| redlining = "https://dsl.richmond.edu/panorama/redlining/static/citiesData/CASanFrancisco1937/geojson.json" | |
| paint = {"fill-color": ["get", "fill"], "fill-opacity": 0.8} | |
| m.add_geojson(redlining, layer_type="fill", name = "redlining", paint=paint, fit_bounds = False) | |
| # if st.toggle("Threatened Species Richness"): | |
| # m.add_tile_layer(url="https://data.source.coop/cboettig/mobi/tiles/red/species-richness-all/{z}/{x}/{y}.png", | |
| # name="MOBI Species Richness", | |
| # attribution="NatureServe", | |
| # opacity=0.9 | |
| # ) | |
| with col3: | |
| species = st.text_input("Species name:", "Canis latrans") | |
| df = manual_query(species, zoom) | |
| chatbox = st.container() | |
| st.markdown("🦜 Or try our chat-based query:") | |
| if prompt := st.chat_input(example_question, key="chain"): | |
| st.chat_message("user").write(prompt) | |
| with st.chat_message("assistant"): | |
| query = chain.invoke({"question": prompt + additional_advice}) | |
| st.write(query) | |
| df = chat_query(query, zoom) | |
| # if st.button("refresh"): | |
| # chat_query.clear() | |
| " " | |
| st.divider() | |
| # + | |
| # with col2: | |
| # min_lng, max_lng = st.slider( | |
| # "Select longitude range", | |
| # min_value=-130.0, | |
| # max_value=-65.0, | |
| # value=(-128.0, -115.0), # Default selected range | |
| # step=0.1) | |
| # min_lat, max_lat = st.slider( | |
| # "Select latitude range", | |
| # min_value=20.0, | |
| # max_value=70.0, | |
| # value=(30.0, 42.0), # Default selected range | |
| # step=0.1) | |
| # - | |
| # + | |
| map = st.container() | |
| with map: | |
| hex_layer(m, df, v_scale) | |
| m.add_layer_control(position="top-left") | |
| m.to_streamlit() | |
| # + | |
| st.divider() | |
| ''' | |
| ## Credits | |
| DRAFT. Open Source Software developed at UC Berkeley. | |
| ''' | |