Spaces:
Sleeping
Sleeping
Commit
·
6d6db63
1
Parent(s):
bbe8f2e
filtering by sub_ids because I want to do habitat type queries
Browse files- app/app.py +6 -6
- app/system_prompt.txt +12 -12
- app/utils.py +2 -2
- app/variables.py +2 -2
app/app.py
CHANGED
|
@@ -95,7 +95,7 @@ def main():
|
|
| 95 |
sql_query = output.sql_query
|
| 96 |
explanation =output.explanation
|
| 97 |
if not sql_query: # if the chatbot can't generate a SQL query.
|
| 98 |
-
return pd.DataFrame({'
|
| 99 |
result = ca.sql(sql_query).execute()
|
| 100 |
if result.empty:
|
| 101 |
explanation = "This query did not return any results. Please try again with a different query."
|
|
@@ -139,7 +139,7 @@ def main():
|
|
| 139 |
st.stop()
|
| 140 |
|
| 141 |
# output without mapping columns (id, geom)
|
| 142 |
-
elif "
|
| 143 |
st.write(llm_output)
|
| 144 |
not_mapping = True
|
| 145 |
|
|
@@ -151,8 +151,8 @@ def main():
|
|
| 151 |
st.code(sql_query,language = "sql")
|
| 152 |
|
| 153 |
# extract ids, columns, bounds if present
|
| 154 |
-
if "
|
| 155 |
-
ids = list(set(llm_output['
|
| 156 |
llm_cols = extract_columns(sql_query)
|
| 157 |
bounds = llm_output.total_bounds.tolist()
|
| 158 |
else:
|
|
@@ -390,12 +390,12 @@ def main():
|
|
| 390 |
if ('geom' in llm_output.columns) and (not llm_output.empty):
|
| 391 |
llm_output = llm_output.drop('geom',axis = 1)
|
| 392 |
if not llm_output.empty:
|
| 393 |
-
if 'name' in llm_output.columns and '
|
| 394 |
llm_grouped = (llm_output.groupby('name')
|
| 395 |
.agg({col: ('sum' if col == 'acres' else 'first')
|
| 396 |
for col in llm_output.columns
|
| 397 |
if col != 'name'})).reset_index()
|
| 398 |
-
llm_grouped.drop(['
|
| 399 |
st.dataframe(llm_grouped, use_container_width = True)
|
| 400 |
else:
|
| 401 |
st.dataframe(llm_output, use_container_width = True)
|
|
|
|
| 95 |
sql_query = output.sql_query
|
| 96 |
explanation =output.explanation
|
| 97 |
if not sql_query: # if the chatbot can't generate a SQL query.
|
| 98 |
+
return pd.DataFrame({'sub_id' : []}),'', explanation
|
| 99 |
result = ca.sql(sql_query).execute()
|
| 100 |
if result.empty:
|
| 101 |
explanation = "This query did not return any results. Please try again with a different query."
|
|
|
|
| 139 |
st.stop()
|
| 140 |
|
| 141 |
# output without mapping columns (id, geom)
|
| 142 |
+
elif "sub_id" not in llm_output.columns and "geom" not in llm_output.columns:
|
| 143 |
st.write(llm_output)
|
| 144 |
not_mapping = True
|
| 145 |
|
|
|
|
| 151 |
st.code(sql_query,language = "sql")
|
| 152 |
|
| 153 |
# extract ids, columns, bounds if present
|
| 154 |
+
if "sub_id" in llm_output.columns and not llm_output.empty:
|
| 155 |
+
ids = list(set(llm_output['sub_id'].tolist()))
|
| 156 |
llm_cols = extract_columns(sql_query)
|
| 157 |
bounds = llm_output.total_bounds.tolist()
|
| 158 |
else:
|
|
|
|
| 390 |
if ('geom' in llm_output.columns) and (not llm_output.empty):
|
| 391 |
llm_output = llm_output.drop('geom',axis = 1)
|
| 392 |
if not llm_output.empty:
|
| 393 |
+
if 'name' in llm_output.columns and 'sub_id' in llm_output.columns:
|
| 394 |
llm_grouped = (llm_output.groupby('name')
|
| 395 |
.agg({col: ('sum' if col == 'acres' else 'first')
|
| 396 |
for col in llm_output.columns
|
| 397 |
if col != 'name'})).reset_index()
|
| 398 |
+
llm_grouped.drop(['sub_id'], axis=1, inplace = True)
|
| 399 |
st.dataframe(llm_grouped, use_container_width = True)
|
| 400 |
else:
|
| 401 |
st.dataframe(llm_output, use_container_width = True)
|
app/system_prompt.txt
CHANGED
|
@@ -32,7 +32,7 @@ Ensure the response contains only this JSON object, with no additional text, for
|
|
| 32 |
- Unless the users asks about biodiversity overlap or coverage, use columns with the prefix "mean_" to quantify biodiversity.
|
| 33 |
|
| 34 |
# Column Descriptions
|
| 35 |
-
- "id": The parent id for sub_id. "id" identifes a protected area, while "sub_id" identifies the particular feature in a protected area. "
|
| 36 |
- "sub_id": unique string identifier for each feature.
|
| 37 |
- "established": The time range which the land was acquired, either "2024" or "pre-2024".
|
| 38 |
- "gap_code": The GAP code corresponds to the level of biodiversity protection for an area; GAP 1 has the highest protections whereas GAP 4 has the weakest. There are 4 gap codes and are defined as the following. In the data, non-conservation areas do not have a GAP code.
|
|
@@ -73,7 +73,7 @@ Only use the following table:
|
|
| 73 |
## Example:
|
| 74 |
example_user: "Show me the best areas to go birdwatching in San Diego County."
|
| 75 |
example_assistant: {{"sql_query":
|
| 76 |
-
SELECT "
|
| 77 |
FROM mydata
|
| 78 |
WHERE "county" = 'San Diego'
|
| 79 |
ORDER BY "mean_bird_richness" DESC
|
|
@@ -90,7 +90,7 @@ limit the uses of private land to protect conservation values.
|
|
| 90 |
## Example:
|
| 91 |
example_user: "Show me all non-profit land."
|
| 92 |
example_assistant: {{"sql_query":
|
| 93 |
-
SELECT "
|
| 94 |
FROM mydata
|
| 95 |
WHERE "manager_type" = 'Non Profit';
|
| 96 |
"explanation":"I selected all data where `manager_type` is 'Non Profit'."
|
|
@@ -99,7 +99,7 @@ example_assistant: {{"sql_query":
|
|
| 99 |
## Example:
|
| 100 |
example_user: "Where are areas with high plant biodiversity"
|
| 101 |
example_assistant: {{"sql_query":
|
| 102 |
-
SELECT "
|
| 103 |
FROM mydata
|
| 104 |
ORDER BY "mean_plant_richness" DESC
|
| 105 |
LIMIT 50;
|
|
@@ -107,7 +107,7 @@ example_assistant: {{"sql_query":
|
|
| 107 |
|
| 108 |
example_user: "Show me areas open to the public in disadvantaged communities."
|
| 109 |
example_assistant: {{"sql_query":
|
| 110 |
-
SELECT "
|
| 111 |
WHERE "access_type" = 'Open Access'
|
| 112 |
WHERE "pct_disadvantaged_community" > 0;
|
| 113 |
"explanation": "I used `access_type` to filter for areas that are open to the public (`Open Access`) and `pct_disadvantaged_community` > 0 to include only those located in disadvantaged communities. `pct_disadvantaged_community` identifies communities burdened by multiple sources of pollution with population characteristics that make them more sensitive to pollution."
|
|
@@ -141,13 +141,13 @@ sql_query:
|
|
| 141 |
## Example:
|
| 142 |
example_user: "Show me all land managed by the United States Forest Service."
|
| 143 |
sql_query:
|
| 144 |
-
SELECT "
|
| 145 |
WHERE LOWER("manager") LIKE '%united states forest service%';
|
| 146 |
|
| 147 |
## Example:
|
| 148 |
example_user: "Show me areas with more than 25% overlap in bird species richness."
|
| 149 |
sql_query:
|
| 150 |
-
SELECT "
|
| 151 |
FROM mydata
|
| 152 |
AND (
|
| 153 |
"pct_bird_richness" > 0.25
|
|
@@ -165,7 +165,7 @@ sql_query:
|
|
| 165 |
|
| 166 |
example_user: "Show me GAP 3 lands where more than 50% of the area overlaps with regions of high biodiversity."
|
| 167 |
example_assistant: {{"sql_query":
|
| 168 |
-
SELECT "
|
| 169 |
"pct_top_amphibian_richness", "pct_top_reptile_richness",
|
| 170 |
"pct_top_bird_richness", "pct_top_mammal_richness",
|
| 171 |
"pct_top_freshwater_richness", "pct_top_plant_richness"
|
|
@@ -186,21 +186,21 @@ example_assistant: {{"sql_query":
|
|
| 186 |
SELECT PERCENTILE_CONT(0.85) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_amphibian_richness_85_percentile
|
| 187 |
FROM mydata
|
| 188 |
)
|
| 189 |
-
SELECT "
|
| 190 |
FROM mydata
|
| 191 |
WHERE "land_tenure" = 'Easement'
|
| 192 |
AND "mean_amphibian_richness" >= (SELECT mean_amphibian_richness_85_percentile FROM percentile);
|
| 193 |
|
| 194 |
example_user: "Show nonconserved areas in climate zone 2"
|
| 195 |
example_assistant: {{"sql_query":
|
| 196 |
-
SELECT "
|
| 197 |
FROM mydata
|
| 198 |
WHERE "climate_zone" = 'Zone 2'
|
| 199 |
AND "status" = 'Non-Conservation Area';
|
| 200 |
|
| 201 |
example_user: "Show me working lands that 30x30 conservation areas"
|
| 202 |
example_assistant: {{"sql_query":
|
| 203 |
-
SELECT "
|
| 204 |
FROM mydata
|
| 205 |
WHERE "status" = '30x30 Conservation Area'
|
| 206 |
AND "pct_farmland" >0;
|
|
@@ -211,7 +211,7 @@ example_assistant: {{"sql_query":
|
|
| 211 |
SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_mammal_richness_95_percentile
|
| 212 |
FROM mydata
|
| 213 |
)
|
| 214 |
-
SELECT "
|
| 215 |
FROM mydata
|
| 216 |
WHERE "status" != '30x30 Conservation Area'
|
| 217 |
AND "mean_mammal_richness" >= (SELECT mean_amphibian_richness_95_percentile FROM percentile);
|
|
|
|
| 32 |
- Unless the users asks about biodiversity overlap or coverage, use columns with the prefix "mean_" to quantify biodiversity.
|
| 33 |
|
| 34 |
# Column Descriptions
|
| 35 |
+
- "id": The parent id for sub_id. "id" identifes a protected area, while "sub_id" identifies the particular feature in a protected area. "sub_id" is necessary for displaying queried results on a map.
|
| 36 |
- "sub_id": unique string identifier for each feature.
|
| 37 |
- "established": The time range which the land was acquired, either "2024" or "pre-2024".
|
| 38 |
- "gap_code": The GAP code corresponds to the level of biodiversity protection for an area; GAP 1 has the highest protections whereas GAP 4 has the weakest. There are 4 gap codes and are defined as the following. In the data, non-conservation areas do not have a GAP code.
|
|
|
|
| 73 |
## Example:
|
| 74 |
example_user: "Show me the best areas to go birdwatching in San Diego County."
|
| 75 |
example_assistant: {{"sql_query":
|
| 76 |
+
SELECT "sub_id", "geom", "name", "acres", "county","mean_bird_richness"
|
| 77 |
FROM mydata
|
| 78 |
WHERE "county" = 'San Diego'
|
| 79 |
ORDER BY "mean_bird_richness" DESC
|
|
|
|
| 90 |
## Example:
|
| 91 |
example_user: "Show me all non-profit land."
|
| 92 |
example_assistant: {{"sql_query":
|
| 93 |
+
SELECT "sub_id", "geom", "name", "acres", "manager_type"
|
| 94 |
FROM mydata
|
| 95 |
WHERE "manager_type" = 'Non Profit';
|
| 96 |
"explanation":"I selected all data where `manager_type` is 'Non Profit'."
|
|
|
|
| 99 |
## Example:
|
| 100 |
example_user: "Where are areas with high plant biodiversity"
|
| 101 |
example_assistant: {{"sql_query":
|
| 102 |
+
SELECT "sub_id", "geom", "name", "acres", "mean_plant_richness"
|
| 103 |
FROM mydata
|
| 104 |
ORDER BY "mean_plant_richness" DESC
|
| 105 |
LIMIT 50;
|
|
|
|
| 107 |
|
| 108 |
example_user: "Show me areas open to the public in disadvantaged communities."
|
| 109 |
example_assistant: {{"sql_query":
|
| 110 |
+
SELECT "sub_id", "geom", "name", "acres", "access_type", "pct_disadvantaged_community" FROM mydata
|
| 111 |
WHERE "access_type" = 'Open Access'
|
| 112 |
WHERE "pct_disadvantaged_community" > 0;
|
| 113 |
"explanation": "I used `access_type` to filter for areas that are open to the public (`Open Access`) and `pct_disadvantaged_community` > 0 to include only those located in disadvantaged communities. `pct_disadvantaged_community` identifies communities burdened by multiple sources of pollution with population characteristics that make them more sensitive to pollution."
|
|
|
|
| 141 |
## Example:
|
| 142 |
example_user: "Show me all land managed by the United States Forest Service."
|
| 143 |
sql_query:
|
| 144 |
+
SELECT "sub_id", "geom", "name", "acres", "manager" FROM mydata
|
| 145 |
WHERE LOWER("manager") LIKE '%united states forest service%';
|
| 146 |
|
| 147 |
## Example:
|
| 148 |
example_user: "Show me areas with more than 25% overlap in bird species richness."
|
| 149 |
sql_query:
|
| 150 |
+
SELECT "sub_id", "geom", "name", "acres", "pct_bird_richness"
|
| 151 |
FROM mydata
|
| 152 |
AND (
|
| 153 |
"pct_bird_richness" > 0.25
|
|
|
|
| 165 |
|
| 166 |
example_user: "Show me GAP 3 lands where more than 50% of the area overlaps with regions of high biodiversity."
|
| 167 |
example_assistant: {{"sql_query":
|
| 168 |
+
SELECT "sub_id", "geom", "name", "acres", "county",
|
| 169 |
"pct_top_amphibian_richness", "pct_top_reptile_richness",
|
| 170 |
"pct_top_bird_richness", "pct_top_mammal_richness",
|
| 171 |
"pct_top_freshwater_richness", "pct_top_plant_richness"
|
|
|
|
| 186 |
SELECT PERCENTILE_CONT(0.85) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_amphibian_richness_85_percentile
|
| 187 |
FROM mydata
|
| 188 |
)
|
| 189 |
+
SELECT "sub_id", "geom", "name", "acres","mean_amphibian_richness"
|
| 190 |
FROM mydata
|
| 191 |
WHERE "land_tenure" = 'Easement'
|
| 192 |
AND "mean_amphibian_richness" >= (SELECT mean_amphibian_richness_85_percentile FROM percentile);
|
| 193 |
|
| 194 |
example_user: "Show nonconserved areas in climate zone 2"
|
| 195 |
example_assistant: {{"sql_query":
|
| 196 |
+
SELECT "sub_id", "geom", "name", "acres", "status", "climate_zone"
|
| 197 |
FROM mydata
|
| 198 |
WHERE "climate_zone" = 'Zone 2'
|
| 199 |
AND "status" = 'Non-Conservation Area';
|
| 200 |
|
| 201 |
example_user: "Show me working lands that 30x30 conservation areas"
|
| 202 |
example_assistant: {{"sql_query":
|
| 203 |
+
SELECT "sub_id", "geom", "name", "acres", "land_tenure", "pct_farmland"
|
| 204 |
FROM mydata
|
| 205 |
WHERE "status" = '30x30 Conservation Area'
|
| 206 |
AND "pct_farmland" >0;
|
|
|
|
| 211 |
SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_mammal_richness_95_percentile
|
| 212 |
FROM mydata
|
| 213 |
)
|
| 214 |
+
SELECT "sub_id", "geom", "name", "acres","mean_mammal_richness"
|
| 215 |
FROM mydata
|
| 216 |
WHERE "status" != '30x30 Conservation Area'
|
| 217 |
AND "mean_mammal_richness" >= (SELECT mean_amphibian_richness_95_percentile FROM percentile);
|
app/utils.py
CHANGED
|
@@ -184,7 +184,7 @@ def get_summary_table_sql(ca, column, colors, ids, feature_col = None):
|
|
| 184 |
"""
|
| 185 |
Generates a summary table using specific IDs as filters.
|
| 186 |
"""
|
| 187 |
-
combined_filter = _.
|
| 188 |
df_network = get_summary(ca, combined_filter, [column], column, feature_col, colors)
|
| 189 |
df_feature = get_summary(ca, combined_filter, [column], column, feature_col, colors, feature = True)
|
| 190 |
return df_network, df_feature
|
|
@@ -206,7 +206,7 @@ def get_pmtiles_style(paint, pmtiles_file, low_res, filter_cols=None, filter_val
|
|
| 206 |
Generates a MapLibre GL style for PMTiles with either filters or a list of IDs.
|
| 207 |
"""
|
| 208 |
if ids:
|
| 209 |
-
filter_expr = ["in", ["get", "
|
| 210 |
else:
|
| 211 |
# we don't want to overwhelm streamlit so if they didn't filter anything, don't provide filter arg
|
| 212 |
filter_length = sum([len(x) for x in filter_vals])
|
|
|
|
| 184 |
"""
|
| 185 |
Generates a summary table using specific IDs as filters.
|
| 186 |
"""
|
| 187 |
+
combined_filter = _.sub_id.isin(ids)
|
| 188 |
df_network = get_summary(ca, combined_filter, [column], column, feature_col, colors)
|
| 189 |
df_feature = get_summary(ca, combined_filter, [column], column, feature_col, colors, feature = True)
|
| 190 |
return df_network, df_feature
|
|
|
|
| 206 |
Generates a MapLibre GL style for PMTiles with either filters or a list of IDs.
|
| 207 |
"""
|
| 208 |
if ids:
|
| 209 |
+
filter_expr = ["in", ["get", "sub_id"], ["literal", ids]]
|
| 210 |
else:
|
| 211 |
# we don't want to overwhelm streamlit so if they didn't filter anything, don't provide filter arg
|
| 212 |
filter_length = sum([len(x) for x in filter_vals])
|
app/variables.py
CHANGED
|
@@ -463,8 +463,8 @@ help_message = '''
|
|
| 463 |
|
| 464 |
example_queries = """
|
| 465 |
Mapping queries:
|
| 466 |
-
- Show me
|
| 467 |
-
- Show me GAP 3 lands with mean
|
| 468 |
- Show me easements with 60% or more overlap with high plant biodiversity regions.
|
| 469 |
- Show me protected areas that are open to the public in disadvantaged communities.
|
| 470 |
- Show me all 30x30 conservation areas managed by The Nature Conservancy.
|
|
|
|
| 463 |
|
| 464 |
example_queries = """
|
| 465 |
Mapping queries:
|
| 466 |
+
- Show me amphibian biodiversity hotspots not covered by the 30x30 network.
|
| 467 |
+
- Show me GAP 3 lands with mean bird richness in the top 10%.
|
| 468 |
- Show me easements with 60% or more overlap with high plant biodiversity regions.
|
| 469 |
- Show me protected areas that are open to the public in disadvantaged communities.
|
| 470 |
- Show me all 30x30 conservation areas managed by The Nature Conservancy.
|