File size: 4,557 Bytes
51fe9d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import streamlit as st
from streamlit_chat import message
from openai.error import OpenAIError
from .utils import (
    parse_docx,
    parse_pdf,
    parse_txt,
    search_docs,
    embed_docs,
    text_to_docs,
    get_answer,
)
from uuid import uuid4

def clear_submit():
    st.session_state["submit"] = False

def set_openai_api_key(api_key: str):
    st.session_state["OPENAI_API_KEY"] = api_key

def qa_main():
    st.markdown("<h1>This app allows to chat with files!</h1>", unsafe_allow_html=True)
    st.markdown(\
        """
        Developed using LangChain and OpenAI Embeddings.</p>
        Before hitting on "Submit", please make sure you have uploaded a file and entered a question.

        You can upload files using the sidebar on the left.
        """,
        unsafe_allow_html=True
        )
    index = None
    doc = None
    
    with st.sidebar:
        user_secret = st.text_input(
            "OpenAI API Key",
            type="password",
            placeholder="Paste your OpenAI API key here (sk-...)",
            help="You can get your API key from https://platform.openai.com/account/api-keys.",
            value=st.session_state.get("OPENAI_API_KEY", ""),
        )
        if user_secret:
            set_openai_api_key(user_secret)

        uploaded_file = st.file_uploader(
            "Upload a pdf, docx, or txt file",
            type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
            help="Scanned documents are not supported yet!",
            on_change=clear_submit, 
            accept_multiple_files=False,
        )
        # reading the files
        if uploaded_file is not None:
            if uploaded_file.name.endswith(".pdf"):
                doc = parse_pdf(uploaded_file)
            elif uploaded_file.name.endswith(".docx"):
                doc = parse_docx(uploaded_file)
            elif uploaded_file.name.endswith(".txt"):
                doc = parse_txt(uploaded_file)
            else:
                st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt]")
                doc = None
            
            text = text_to_docs(text=tuple(doc))
            st.write(text[:1])
            
            try:
                with st.spinner("Indexing document(s)... This may take some time."):
                    index = embed_docs(tuple(text))
                    st.session_state["api_key_configured"] = True
            except OpenAIError as e:
                st.error(e._message)

    tab1, tab2 = st.tabs(["Chat With File", "About the Application"])
    with tab1:
        if 'generated' not in st.session_state:
            st.session_state['generated'] = []

        if 'past' not in st.session_state:
            st.session_state['past'] = []

        def get_text():
            if user_secret:
                st.header("Ask me something about the document:")
                input_text = st.text_area("You:", on_change=clear_submit)
                return input_text
            
        user_input = get_text()

        button = st.button("Submit")
        if button or st.session_state.get("submit"):
            if not user_input:
                st.error("Please enter a question!")
            else:
                st.session_state["submit"] = True
                sources = search_docs(index, user_input)
                try:
                    answer = get_answer(sources, user_input)
    
                    st.session_state.past.append(user_input)
                    st.session_state.generated.append(answer["output_text"])
                
                except OpenAIError as e:
                    st.error(e._message)
                
                if st.session_state['past']:
                    for i in range(len(st.session_state['past'])-1, -1, -1):
                        message(st.session_state['generated'][i], key=str(uuid4()))
                        message(st.session_state['past'][i], is_user=True, key=str(uuid4()))

    with tab2:
        st.write('See sources')

        # st.write('Chat with Files enables user to extract all the information from a file. User can obtain the transcription, the embedding of each segment and also ask questions to the file through a chat.')
        # st.write('Features include- ')
        # st.write('1. Reading any pdf, docx or plain txt (such as python programs) file')
        # st.write('2. Embedding texts segments with Langchain and OpenAI')
        # st.write('3. Chatting with the file using streamlit-chat and LangChain QA with source and the GPT4 model')