Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from datasets import load_dataset | |
| import pandas as pd | |
| import random | |
| from sentence_transformers import SentenceTransformer, util | |
| import torch | |
| # Load the Indian Law dataset | |
| print("Loading Indian Law Dataset...") | |
| ds = load_dataset("viber1/indian-law-dataset") | |
| # Convert to pandas for easier manipulation | |
| df = pd.DataFrame(ds['train']) | |
| print(f"Dataset loaded successfully with {len(df)} entries") | |
| print(f"Dataset columns: {df.columns.tolist()}") | |
| # Load semantic search model | |
| print("Loading sentence-transformers model for semantic search...") | |
| model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| # Precompute embeddings for the dataset | |
| print("Computing embeddings for dataset...") | |
| df['combined_text'] = df.apply(lambda row: ' '.join([str(val) for val in row.values if pd.notna(val) and isinstance(val, str)]), axis=1) | |
| corpus_embeddings = model.encode(df['combined_text'].tolist(), convert_to_tensor=True, show_progress_bar=True) | |
| print("Embeddings computed successfully!") | |
| def search_legal_info(question): | |
| """Search the dataset for relevant legal information using semantic search""" | |
| if not question or len(question.strip()) == 0: | |
| return "β οΈ Please enter a legal question to search." | |
| # Encode the query | |
| query_embedding = model.encode(question, convert_to_tensor=True) | |
| # Compute cosine similarity scores | |
| cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0] | |
| # Get top 5 results | |
| top_results = torch.topk(cos_scores, k=min(5, len(df))) | |
| # Format the response with best match first | |
| response = "# π Search Results\n\n" | |
| for i, (score, idx) in enumerate(zip(top_results.values, top_results.indices)): | |
| result = df.iloc[idx.item()].to_dict() | |
| similarity = score.item() | |
| # Skip low relevance results | |
| if similarity < 0.2: | |
| continue | |
| response += f"## π Result {i+1} (Relevance: {similarity*100:.1f}%)\n\n" | |
| for key, value in result.items(): | |
| if key == 'combined_text': # Skip internal field | |
| continue | |
| if value and isinstance(value, str) and len(value.strip()) > 0: | |
| # Clean and format the text | |
| display_value = value.strip() | |
| if len(display_value) > 800: | |
| display_value = display_value[:800] + "..." | |
| response += f"**{key.replace('_', ' ').title()}:**\n\n{display_value}\n\n" | |
| response += "---\n\n" | |
| if "Result 1" not in response: | |
| return "β No relevant information found in the dataset. Please try rephrasing your question or use different keywords." | |
| return response | |
| def get_random_sample(): | |
| """Get a random entry from the dataset""" | |
| random_idx = random.randint(0, len(df) - 1) | |
| sample = df.iloc[random_idx] | |
| response = "# π Random Legal Information\n\n" | |
| for key, value in sample.items(): | |
| if key == 'combined_text': # Skip internal field | |
| continue | |
| if value and isinstance(value, str) and len(value.strip()) > 0: | |
| display_value = value.strip() | |
| if len(display_value) > 800: | |
| display_value = display_value[:800] + "..." | |
| response += f"**{key.replace('_', ' ').title()}:**\n\n{display_value}\n\n" | |
| return response | |
| def handle_feedback(question, feedback_type): | |
| """Handle user feedback""" | |
| return f"β Thank you for your {feedback_type}! Your input helps us improve the system." | |
| # Example questions | |
| EXAMPLE_QUESTIONS = [ | |
| "Can a plaint be amended after it has been filed in a civil case in India?", | |
| "What are the provisions for bail under Indian law?", | |
| "What are the rights of an accused person in India?", | |
| "How can property rights be transferred in India?", | |
| "What is the procedure for filing a divorce petition?", | |
| "What are the provisions related to consumer protection?", | |
| "What are the penalties for copyright infringement in India?", | |
| ] | |
| # Create Gradio interface with improved UI | |
| with gr.Blocks(title="Indian Law Q&A Assistant", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # ποΈ Indian Law Q&A Assistant | |
| ### β οΈ IMPORTANT DISCLAIMER | |
| **This application is for informational purposes only and does NOT constitute legal advice.** | |
| The information provided is based on a dataset and should not be relied upon for legal decisions. | |
| Always consult with a qualified legal professional for specific legal matters. | |
| --- | |
| Welcome to the Indian Law Question-Answer Assistant powered by semantic search technology | |
| and the `viber1/indian-law-dataset`. Ask questions and get relevant legal information instantly! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| question_input = gr.Textbox( | |
| label="π¬ Your Legal Question", | |
| placeholder="Type your legal question here...", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| search_btn = gr.Button("π Search", variant="primary", size="lg") | |
| random_btn = gr.Button("π² Random Sample", size="lg") | |
| gr.Markdown("### π Example Questions (Click to use):") | |
| with gr.Row(): | |
| example_btns = [] | |
| for example in EXAMPLE_QUESTIONS[:4]: | |
| btn = gr.Button(example, size="sm") | |
| example_btns.append(btn) | |
| with gr.Row(): | |
| for example in EXAMPLE_QUESTIONS[4:]: | |
| btn = gr.Button(example, size="sm") | |
| example_btns.append(btn) | |
| output_box = gr.Markdown(label="π Response", value="Enter a question above and click Search to begin.") | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| ### π’ Feedback | |
| Found this helpful? Have suggestions? Click below: | |
| """) | |
| with gr.Row(): | |
| helpful_btn = gr.Button("π Helpful", size="sm") | |
| report_btn = gr.Button("π Report Issue", size="sm") | |
| feedback_output = gr.Markdown(visible=False) | |
| # Button actions | |
| search_btn.click(fn=search_legal_info, inputs=question_input, outputs=output_box) | |
| random_btn.click(fn=get_random_sample, inputs=None, outputs=output_box) | |
| # Example button actions | |
| for i, btn in enumerate(example_btns): | |
| btn.click( | |
| fn=lambda ex=EXAMPLE_QUESTIONS[i]: ex, | |
| inputs=None, | |
| outputs=question_input | |
| ) | |
| # Feedback actions | |
| helpful_btn.click( | |
| fn=lambda q: handle_feedback(q, "positive feedback"), | |
| inputs=question_input, | |
| outputs=feedback_output | |
| ).then(lambda: gr.update(visible=True), outputs=feedback_output) | |
| report_btn.click( | |
| fn=lambda q: handle_feedback(q, "report"), | |
| inputs=question_input, | |
| outputs=feedback_output | |
| ).then(lambda: gr.update(visible=True), outputs=feedback_output) | |
| gr.Markdown(""" | |
| --- | |
| ### π Dataset Information | |
| - **Source**: viber1/indian-law-dataset on Hugging Face | |
| - **Total Entries**: """ + str(len(df)) + """ | |
| - **Search Method**: Semantic search using sentence-transformers | |
| - **Model**: sentence-transformers/all-MiniLM-L6-v2 | |
| ### π§ Features | |
| - β Semantic search for better relevance | |
| - β Results ranked by similarity score | |
| - β Clean, readable Markdown formatting | |
| - β Example questions for quick start | |
| - β Random exploration of dataset | |
| - β User feedback mechanism | |
| *Built with β€οΈ using Gradio, Hugging Face Datasets, and Sentence Transformers* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |