Spaces:
Sleeping
Sleeping
Now don't have to have join column in reference df. Can remove input files without error message.
8c163ee
| import gradio as gr | |
| import pandas as pd | |
| import os | |
| import re | |
| def get_or_create_env_var(var_name, default_value): | |
| # Get the environment variable if it exists | |
| value = os.environ.get(var_name) | |
| # If it doesn't exist, set it to the default value | |
| if value is None: | |
| os.environ[var_name] = default_value | |
| value = default_value | |
| return value | |
| # Retrieving or setting output folder | |
| env_var_name = 'GRADIO_OUTPUT_FOLDER' | |
| default_value = 'output/' | |
| output_folder = get_or_create_env_var(env_var_name, default_value) | |
| print(f'The value of {env_var_name} is {output_folder}') | |
| def detect_file_type(filename): | |
| """Detect the file type based on its extension.""" | |
| if (filename.endswith('.csv')) | (filename.endswith('.csv.gz')) | (filename.endswith('.zip')): | |
| return 'csv' | |
| elif filename.endswith('.xlsx'): | |
| return 'xlsx' | |
| elif filename.endswith('.parquet'): | |
| return 'parquet' | |
| else: | |
| raise ValueError("Unsupported file type.") | |
| def read_file(filename): | |
| """Read the file based on its detected type.""" | |
| file_type = detect_file_type(filename) | |
| if file_type == 'csv': | |
| return pd.read_csv(filename, low_memory=False) | |
| elif file_type == 'xlsx': | |
| return pd.read_excel(filename) | |
| elif file_type == 'parquet': | |
| return pd.read_parquet(filename) | |
| def initial_data_load(in_file): | |
| new_choices = [] | |
| concat_choices = [] | |
| output_message = "" | |
| results_df = pd.DataFrame() | |
| df = pd.DataFrame() | |
| if not in_file: | |
| return "No files provided.", gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), df, results_df | |
| file_list = [string.name for string in in_file] | |
| data_file_names = [string for string in file_list if "results_on_orig" not in string.lower()] | |
| if data_file_names: | |
| df = read_file(data_file_names[0]) | |
| else: | |
| error_message = "No data file found." | |
| return error_message, gr.Dropdown(choices=concat_choices), gr.Dropdown(choices=concat_choices), df, results_df | |
| results_file_names = [string for string in file_list if "results_on_orig" in string.lower()] | |
| if results_file_names: | |
| results_df = read_file(results_file_names[0]) | |
| new_choices = list(df.columns) | |
| concat_choices.extend(new_choices) | |
| output_message = "Data successfully loaded" | |
| return output_message, gr.Dropdown(choices=concat_choices), gr.Dropdown(choices=concat_choices), df, results_df | |
| def ensure_output_folder_exists(output_folder): | |
| """Checks if the output folder exists, creates it if not.""" | |
| folder_name = output_folder | |
| if not os.path.exists(folder_name): | |
| # Create the folder if it doesn't exist | |
| os.makedirs(folder_name) | |
| print(f"Created the output folder:", folder_name) | |
| else: | |
| print(f"The output folder already exists:", folder_name) | |
| def dummy_function(in_colnames): | |
| """ | |
| A dummy function that exists just so that dropdown updates work correctly. | |
| """ | |
| return None | |
| # Upon running a process, the feedback buttons are revealed | |
| def reveal_feedback_buttons(): | |
| return gr.Radio(visible=True), gr.Textbox(visible=True), gr.Button(visible=True), gr.Markdown(visible=True) | |
| def clear_inputs(in_file, in_ref, in_text): | |
| return gr.File(value=[]), gr.File(value=[]), gr.Textbox(value='') | |
| ## Get final processing time for logs: | |
| def sum_numbers_before_seconds(string): | |
| """Extracts numbers that precede the word 'seconds' from a string and adds them up. | |
| Args: | |
| string: The input string. | |
| Returns: | |
| The sum of all numbers before 'seconds' in the string. | |
| """ | |
| # Extract numbers before 'seconds' using regular expression | |
| numbers = re.findall(r'(\d+\.\d+)?\s*seconds', string) | |
| # Extract the numbers from the matches | |
| numbers = [float(num.split()[0]) for num in numbers] | |
| # Sum up the extracted numbers | |
| sum_of_numbers = round(sum(numbers),1) | |
| return sum_of_numbers | |
| async def get_connection_params(request: gr.Request): | |
| base_folder = "" | |
| if request: | |
| #print("request user:", request.username) | |
| #request_data = await request.json() # Parse JSON body | |
| #print("All request data:", request_data) | |
| #context_value = request_data.get('context') | |
| #if 'context' in request_data: | |
| # print("Request context dictionary:", request_data['context']) | |
| # print("Request headers dictionary:", request.headers) | |
| # print("All host elements", request.client) | |
| # print("IP address:", request.client.host) | |
| # print("Query parameters:", dict(request.query_params)) | |
| # To get the underlying FastAPI items you would need to use await and some fancy @ stuff for a live query: https://fastapi.tiangolo.com/vi/reference/request/ | |
| #print("Request dictionary to object:", request.request.body()) | |
| print("Session hash:", request.session_hash) | |
| # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER | |
| CUSTOM_CLOUDFRONT_HEADER_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER', '') | |
| #print(f'The value of CUSTOM_CLOUDFRONT_HEADER is {CUSTOM_CLOUDFRONT_HEADER_var}') | |
| # Retrieving or setting CUSTOM_CLOUDFRONT_HEADER_VALUE | |
| CUSTOM_CLOUDFRONT_HEADER_VALUE_var = get_or_create_env_var('CUSTOM_CLOUDFRONT_HEADER_VALUE', '') | |
| #print(f'The value of CUSTOM_CLOUDFRONT_HEADER_VALUE_var is {CUSTOM_CLOUDFRONT_HEADER_VALUE_var}') | |
| if CUSTOM_CLOUDFRONT_HEADER_var and CUSTOM_CLOUDFRONT_HEADER_VALUE_var: | |
| if CUSTOM_CLOUDFRONT_HEADER_var in request.headers: | |
| supplied_cloudfront_custom_value = request.headers[CUSTOM_CLOUDFRONT_HEADER_var] | |
| if supplied_cloudfront_custom_value == CUSTOM_CLOUDFRONT_HEADER_VALUE_var: | |
| print("Custom Cloudfront header found:", supplied_cloudfront_custom_value) | |
| else: | |
| raise(ValueError, "Custom Cloudfront header value does not match expected value.") | |
| # Get output save folder from 1 - username passed in from direct Cognito login, 2 - Cognito ID header passed through a Lambda authenticator, 3 - the session hash. | |
| if request.username: | |
| out_session_hash = request.username | |
| base_folder = "user-files/" | |
| print("Request username found:", out_session_hash) | |
| elif 'x-cognito-id' in request.headers: | |
| out_session_hash = request.headers['x-cognito-id'] | |
| base_folder = "user-files/" | |
| print("Cognito ID found:", out_session_hash) | |
| else: | |
| out_session_hash = request.session_hash | |
| base_folder = "temp-files/" | |
| # print("Cognito ID not found. Using session hash as save folder:", out_session_hash) | |
| output_folder = base_folder + out_session_hash + "/" | |
| #if bucket_name: | |
| # print("S3 output folder is: " + "s3://" + bucket_name + "/" + output_folder) | |
| return out_session_hash, output_folder, out_session_hash | |
| else: | |
| print("No session parameters found.") | |
| return "","" | |