Adignite commited on
Commit
22dc2ca
·
verified ·
1 Parent(s): 6280279

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -52
app.py CHANGED
@@ -6,34 +6,46 @@ from transformers import pipeline
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  from docx import Document
8
  import io
 
9
 
10
  class CarbonCreditDocGenerator:
11
  def __init__(self):
12
  self.sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
13
- self.nlg_pipeline = pipeline("text-generation", model="gpt2", max_length=500)
14
-
15
- # Load your knowledge base here
16
  self.knowledge_base = self.load_knowledge_base()
17
 
18
  def load_knowledge_base(self):
19
- # This should load your carbon credit domain knowledge
20
  return [
21
  "Carbon credits represent the reduction of one metric ton of carbon dioxide emissions.",
22
  "Afforestation projects involve planting trees in areas where there were none before.",
23
  "The Verified Carbon Standard (VCS) is a widely recognized certification for carbon credits.",
24
  "Carbon credit projects must demonstrate additionality, meaning the reductions wouldn't have occurred without the project.",
25
  "Monitoring, reporting, and verification (MRV) are crucial components of carbon credit projects.",
26
- # Add more knowledge base entries...
 
 
 
 
27
  ]
28
 
29
  def process_input_data(self, input_text):
30
- # In a real scenario, you'd parse the input document more thoroughly
31
- lines = input_text.split('\n')
32
  data = {}
33
- for line in lines:
34
- if ':' in line:
35
- key, value = line.split(':', 1)
36
- data[key.strip()] = value.strip()
 
 
 
 
 
 
 
 
 
 
 
37
  return data
38
 
39
  def retrieve_relevant_knowledge(self, query, top_k=3):
@@ -45,30 +57,31 @@ class CarbonCreditDocGenerator:
45
 
46
  return [self.knowledge_base[i] for i in top_indices]
47
 
48
- def generate_section_content(self, section_title, input_data, max_length=500):
49
  query = f"Generate content for the '{section_title}' section of a carbon credit document."
50
  relevant_knowledge = self.retrieve_relevant_knowledge(query)
51
 
52
- context = f"Input data: {input_data}\n\nRelevant knowledge: {' '.join(relevant_knowledge)}"
 
53
  prompt = f"{context}\n\nTask: {query}\n\nContent:"
54
 
55
  generated_text = self.nlg_pipeline(prompt, max_length=max_length, num_return_sequences=1)[0]['generated_text']
56
 
57
- # Apply corrective RAG
58
- corrected_text = self.apply_corrective_rag(generated_text, input_data, relevant_knowledge)
59
 
60
  return corrected_text
61
 
62
  def apply_corrective_rag(self, generated_text, input_data, relevant_knowledge):
63
- # This is a simplified version of corrective RAG
64
  corrected_text = generated_text
65
 
66
- # Ensure all input data is represented
67
  for key, value in input_data.items():
68
- if value.lower() not in corrected_text.lower():
 
 
 
 
69
  corrected_text += f" {key}: {value}."
70
 
71
- # Ensure relevant knowledge is incorporated
72
  for knowledge in relevant_knowledge:
73
  if knowledge.lower() not in corrected_text.lower():
74
  corrected_text += f" {knowledge}"
@@ -82,17 +95,14 @@ class CarbonCreditDocGenerator:
82
  input_data = self.process_input_data(input_text)
83
 
84
  sections = [
85
- "Executive Summary",
86
- "Certificate Identification",
87
- "Emission Reduction Details",
88
- "Project Information",
89
- "Verification and Certification",
90
- "Issuance and Expiration Dates",
91
- "Market Type",
92
- "Transferability Information",
93
- "Legal Framework",
94
- "Accountability Measures",
95
- "Contact Information"
96
  ]
97
 
98
  for section in sections:
@@ -104,41 +114,57 @@ class CarbonCreditDocGenerator:
104
 
105
  def generate_document(self, input_text):
106
  doc = self.create_document(input_text)
107
-
108
- # Save the document to a BytesIO object
109
  doc_io = io.BytesIO()
110
  doc.save(doc_io)
111
  doc_io.seek(0)
112
-
113
  return doc_io
114
 
115
  # Streamlit app
116
  def main():
 
117
  st.title("Carbon Credit Document Generator")
118
 
119
- # File uploader
120
- uploaded_file = st.file_uploader("Choose a text file", type="txt")
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- if uploaded_file is not None:
123
- # Read the file
124
- input_text = uploaded_file.read().decode("utf-8")
125
- st.text_area("Input Data", input_text, height=200)
 
 
 
 
 
126
 
127
- if st.button("Generate Document"):
128
- generator = CarbonCreditDocGenerator()
129
-
130
- with st.spinner("Generating document..."):
131
- doc_io = generator.generate_document(input_text)
132
 
133
- st.success("Document generated successfully!")
 
 
 
 
 
 
 
 
 
 
134
 
135
- # Provide download button
136
- st.download_button(
137
- label="Download Carbon Credit Document",
138
- data=doc_io.getvalue(),
139
- file_name="carbon_credit_document.docx",
140
- mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
141
- )
142
 
143
  if __name__ == "__main__":
144
  main()
 
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  from docx import Document
8
  import io
9
+ import re
10
 
11
  class CarbonCreditDocGenerator:
12
  def __init__(self):
13
  self.sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
14
+ self.nlg_pipeline = pipeline("text-generation", model="gpt2", max_length=1000)
 
 
15
  self.knowledge_base = self.load_knowledge_base()
16
 
17
  def load_knowledge_base(self):
 
18
  return [
19
  "Carbon credits represent the reduction of one metric ton of carbon dioxide emissions.",
20
  "Afforestation projects involve planting trees in areas where there were none before.",
21
  "The Verified Carbon Standard (VCS) is a widely recognized certification for carbon credits.",
22
  "Carbon credit projects must demonstrate additionality, meaning the reductions wouldn't have occurred without the project.",
23
  "Monitoring, reporting, and verification (MRV) are crucial components of carbon credit projects.",
24
+ "Project developers must provide detailed information about project location, type, and expected carbon sequestration.",
25
+ "Carbon credit pricing can vary based on project type, location, and additional benefits.",
26
+ "Environmental Impact Assessments (EIA) are often required for carbon credit projects.",
27
+ "Community engagement and social benefits are important aspects of many carbon credit projects.",
28
+ "Risk assessment and mitigation strategies are crucial for project success and credibility."
29
  ]
30
 
31
  def process_input_data(self, input_text):
32
+ sections = re.split(r'\d+\.\s+', input_text)[1:] # Split by numbered sections
 
33
  data = {}
34
+ current_section = ""
35
+ for section in sections:
36
+ lines = section.strip().split('\n')
37
+ section_title = lines[0].strip()
38
+ current_section = section_title
39
+ data[current_section] = {}
40
+ for line in lines[1:]:
41
+ if ':' in line:
42
+ key, value = line.split(':', 1)
43
+ data[current_section][key.strip()] = value.strip()
44
+ else:
45
+ # Append to the last key if no colon is found
46
+ if data[current_section]:
47
+ last_key = list(data[current_section].keys())[-1]
48
+ data[current_section][last_key] += " " + line.strip()
49
  return data
50
 
51
  def retrieve_relevant_knowledge(self, query, top_k=3):
 
57
 
58
  return [self.knowledge_base[i] for i in top_indices]
59
 
60
+ def generate_section_content(self, section_title, input_data, max_length=1000):
61
  query = f"Generate content for the '{section_title}' section of a carbon credit document."
62
  relevant_knowledge = self.retrieve_relevant_knowledge(query)
63
 
64
+ section_data = input_data.get(section_title, input_data)
65
+ context = f"Input data: {section_data}\n\nRelevant knowledge: {' '.join(relevant_knowledge)}"
66
  prompt = f"{context}\n\nTask: {query}\n\nContent:"
67
 
68
  generated_text = self.nlg_pipeline(prompt, max_length=max_length, num_return_sequences=1)[0]['generated_text']
69
 
70
+ corrected_text = self.apply_corrective_rag(generated_text, section_data, relevant_knowledge)
 
71
 
72
  return corrected_text
73
 
74
  def apply_corrective_rag(self, generated_text, input_data, relevant_knowledge):
 
75
  corrected_text = generated_text
76
 
 
77
  for key, value in input_data.items():
78
+ if isinstance(value, dict):
79
+ for sub_key, sub_value in value.items():
80
+ if sub_value.lower() not in corrected_text.lower():
81
+ corrected_text += f" {sub_key}: {sub_value}."
82
+ elif value.lower() not in corrected_text.lower():
83
  corrected_text += f" {key}: {value}."
84
 
 
85
  for knowledge in relevant_knowledge:
86
  if knowledge.lower() not in corrected_text.lower():
87
  corrected_text += f" {knowledge}"
 
95
  input_data = self.process_input_data(input_text)
96
 
97
  sections = [
98
+ "Project Overview",
99
+ "Seller/Proponent Information",
100
+ "Carbon Credit Specifications",
101
+ "Financial & Pricing Information",
102
+ "Project Impact and Sustainability",
103
+ "Risks & Mitigation Strategies",
104
+ "Supporting Documentation",
105
+ "Declarations and Acknowledgements"
 
 
 
106
  ]
107
 
108
  for section in sections:
 
114
 
115
  def generate_document(self, input_text):
116
  doc = self.create_document(input_text)
 
 
117
  doc_io = io.BytesIO()
118
  doc.save(doc_io)
119
  doc_io.seek(0)
 
120
  return doc_io
121
 
122
  # Streamlit app
123
  def main():
124
+ st.set_page_config(page_title="Carbon Credit Document Generator", page_icon="🌿")
125
  st.title("Carbon Credit Document Generator")
126
 
127
+ st.markdown("""
128
+ This app generates a comprehensive Carbon Credit Project Document based on your input.
129
+ Upload a text file or paste your project details below.
130
+ """)
131
+
132
+ input_method = st.radio("Choose input method:", ("Upload File", "Paste Text"))
133
+
134
+ if input_method == "Upload File":
135
+ uploaded_file = st.file_uploader("Choose a text file", type="txt")
136
+ if uploaded_file is not None:
137
+ input_text = uploaded_file.read().decode("utf-8")
138
+ st.text_area("File Contents (Read-only)", input_text, height=300, disabled=True)
139
+ else:
140
+ input_text = st.text_area("Paste your project details here:", height=400, help="Enter your project details in a structured format, similar to the Carbon Credit Project Submission Form.")
141
 
142
+ if st.button("Generate Document"):
143
+ if not input_text:
144
+ st.error("Please provide input data before generating the document.")
145
+ else:
146
+ try:
147
+ generator = CarbonCreditDocGenerator()
148
+
149
+ with st.spinner("Generating document... This may take a few moments."):
150
+ doc_io = generator.generate_document(input_text)
151
 
152
+ st.success("Document generated successfully!")
 
 
 
 
153
 
154
+ st.download_button(
155
+ label="📥 Download Carbon Credit Document",
156
+ data=doc_io.getvalue(),
157
+ file_name="carbon_credit_document.docx",
158
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
159
+ )
160
+
161
+ st.info("Your document is ready for download. Click the button above to save it.")
162
+ except Exception as e:
163
+ st.error(f"An error occurred while generating the document: {str(e)}")
164
+ st.info("Please try again or contact support if the problem persists.")
165
 
166
+ st.markdown("---")
167
+ st.markdown("Developed with ❤️ by Your Company Name")
 
 
 
 
 
168
 
169
  if __name__ == "__main__":
170
  main()