Spaces:
Running
Running
| from fastapi import FastAPI,Request,File,UploadFile | |
| from fastapi.templating import Jinja2Templates | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.responses import HTMLResponse,JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import pandas as pd | |
| import re | |
| import io | |
| import base64 | |
| import matplotlib.pyplot as plt | |
| import torch | |
| from transformers import pipeline,VisionEncoderDecoderModel,ViTImageProcessor,AutoTokenizer | |
| from transformers import BartForConditionalGeneration, BartTokenizer | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import fitz | |
| from docx import Document | |
| from pptx import Presentation | |
| import seaborn as sns | |
| import PIL.Image as Image | |
| import fitz | |
| app=FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| try: | |
| #interpreter = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
| interpreter_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
| interpreter_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
| interpreter_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
| except Exception as exp: | |
| print("[ERROR] Can't load nlpconnect/vit-gpt2-image-captioning") | |
| print(str(exp)) | |
| #try: | |
| # summarizer = pipeline("summarization", model="facebook/bart-large-cnn",device=0) | |
| #except Exception as exp: | |
| # print("[ERROR] Can't load facebook/bart-large-cnn ") | |
| # print(str(exp)) | |
| try: | |
| summarizer_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") | |
| except OSError as e: | |
| print(f"[INFO] PyTorch weights not found. Falling back to TensorFlow weights.\n{e}") | |
| summarizer_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn", from_tf=True) | |
| summarizer_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") | |
| #try: | |
| # generator = pipeline("text-generation", model="deepseek-ai/deepseek-coder-1.3b-instruct", device_map="auto") | |
| #except Exception as exp: | |
| # print("[ERROR] Can't load deepseek-ai/deepseek-coder-1.3b-instruct ") | |
| # print(str(exp)) | |
| try: | |
| generator_model = AutoModelForCausalLM.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True) | |
| tokengenerator_modelizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True) | |
| except Exception as exp : | |
| print("[ERROR] Can't load deepseek-ai/deepseek-coder-1.3b-instruct ") | |
| print(str(exp)) | |
| app.mount("/static",StaticFiles(directory='static'),'static') | |
| templates = Jinja2Templates(directory='templates') | |
| def index(req:Request): | |
| return templates.TemplateResponse('index.html',{'request':req}) | |
| def index(req:Request): | |
| return templates.TemplateResponse('Summarization.html',{'request':req}) | |
| def index(req:Request): | |
| return templates.TemplateResponse('DataVisualisation.html',{'request':req}) | |
| def index(req:Request): | |
| return templates.TemplateResponse('ImageInterpretation.html',{'request':req}) | |
| def caption(file:UploadFile=File(...)): | |
| extension = file.filename.split(".")[-1] | |
| Supported_extensions = ["png","jpg","jpeg"] | |
| if extension not in Supported_extensions: | |
| return {"error": "Unsupported file type"} | |
| image = Image.open(file.file) | |
| #caption = interpreter(image) | |
| pixel_values = interpreter_processor(images=image, return_tensors="pt").pixel_values | |
| output_ids = interpreter_model.generate(pixel_values, max_length=16, num_beams=4) | |
| caption = interpreter_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| return {"caption":caption} | |
| #return {"caption": caption[0]['generated_text']} | |
| def summerzation(file:UploadFile=File(...)): | |
| extension = file.filename.split(".")[-1] | |
| if extension == "pdf": | |
| text = get_text_from_PDF(file.file) | |
| elif extension == "docx": | |
| text = get_text_from_DOC(file.file) | |
| elif extension == "pptx": | |
| text = get_text_from_PPT(file.file) | |
| elif extension == "xlsx": | |
| text = get_text_from_EXCEL(file.file) | |
| else: | |
| return {"error": "Unsupported file type"} | |
| if not text.strip(): | |
| return {"error": "File is empty"} | |
| result="" | |
| #for i in range(0,len(text),1024): | |
| # result+=summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['summary_text'] | |
| return {"summary": result} | |
| def plot(prompt:str,file:UploadFile=File(...)): | |
| try: | |
| extension = file.filename.split(".")[-1] | |
| Supported_extensions = ["xlsx","xls"] | |
| if extension not in Supported_extensions: | |
| return {"error": "Unsupported file type"} | |
| df = pd.read_excel(file.file) | |
| message = f""" | |
| You are a helpful assistant that helps users write Python code. | |
| ## Requirements: | |
| -you will be given a task and you will write the code to solve the task. | |
| -you have a dataset called **df** contains the following information: | |
| df.columns:{df.columns.to_list()} | |
| df.dtypes:{df.dtypes.to_dict()} | |
| -you have to write the code to solve the task using the dataset df. | |
| -you can use pandas to manipulate the dataframe. | |
| -you can use matplotlib to plot the data. | |
| -you can use seaborn to plot the data. | |
| -don't use print or input statements in the code. | |
| -don't use any other libraries except pandas, matplotlib, seaborn. | |
| -don't use any other functions except the ones provided in the libraries. | |
| -don't write the code for the dataframe creation. | |
| -exclude plt.show() from the code. | |
| -you have to write the code in a markdown code block. | |
| -make sure that the type of the chart is compatible with the dtypes of the columns | |
| -use only the column specified in the task. | |
| -you have to extract the column names and the plot type from the prompt bellow and use them in the code. | |
| -if the user task is not clear or there is an error like the column names are not in the dataframe, raise an | |
| error. | |
| ##Prompt: {prompt}. | |
| """ | |
| output = [{"generated_text":""}]#generator(message, max_length=1000) | |
| match = re.search(r'```python(.*?)```', output[0]["generated_text"], re.DOTALL) | |
| code ='' | |
| if not match: | |
| return {"error": "Can't generate the plot"} | |
| code = match.group(1).replace("plt.show()\n","") | |
| safe_globals={ | |
| "plt": plt, | |
| "sns": sns, | |
| "pd": pd, | |
| "df": df | |
| } | |
| try: | |
| exec(code,safe_globals) | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png') | |
| buf.seek(0) | |
| base64_image = base64.b64encode(buf.getvalue()).decode('utf-8') | |
| return {"plot": f"data:image/png;base64,{base64_image}"} | |
| except Exception as e: | |
| return {"error": str(e)} | |
| except Exception as exp: | |
| return {"error":"Internel Server Error:"+str(exp)} | |
| def get_text_from_PDF(file): | |
| doc = fitz.open(file, filetype="pdf") | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| def get_text_from_PPT(file): | |
| prs = Presentation(file) | |
| text = "" | |
| for slide in prs.slides: | |
| for shape in slide.shapes: | |
| if hasattr(shape, "text"): | |
| text += shape.text | |
| return text | |
| def get_text_from_DOC(file): | |
| doc = Document(file) | |
| text = "" | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text | |
| return text | |
| def get_text_from_EXCEL(file): | |
| df = pd.read_excel(file) | |
| text = df.to_string() | |
| return text |