Spaces:

GT4SD
/

PatentToolkit

Runtime error

File size: 10,050 Bytes

6d10c64
a1bc39d
2bde9e1
4c61fcd
7de20a0
 
 
 
 
d5be6af
4d15dc7
d5be6af
7de20a0
cab9610
2b64787
 
 
 
535b7be
7de20a0
e610ece
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5293e61
e610ece
1423dfb
 
 
 
 
 
 
 
 
 
5293e61
e610ece
a1bc39d
 
 
 
 
 
 
 
 
 
 
 
 
192d9af
9420ba3
 
 
46d20c8
9e8be11
1423dfb
9e8be11
 
e610ece
9e8be11
e610ece
9e8be11
eb55011
53bd282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e450f6c
53bd282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09e2691
53bd282
7f2ec6d
09e2691
7f2ec6d
09e2691
53bd282
 
 
7f2ec6d
9e8be11
7f2ec6d
 
 
 
 
 
 
3eb58cd
 
b1b3f71
 
 
3fbe817
b1b3f71
9224ffd
b24494c
4ca8440
5bdbc1f
3eb58cd
 
fd71939
b1b3f71
3eb58cd
b56fd2b
3eb58cd
3c56f3a
 
 
 
21d804e
9907d16
9224ffd
 
429953d
 
bee919a
46d20c8
5b4db95
3c56f3a
9907d16
5b4db95
 
 
 
9224ffd
3223cd0
9224ffd
 
5b4db95
9907d16
5b4db95
 
 
9808a5f
9224ffd
3223cd0
9224ffd
3eb58cd
9907d16
3eb58cd
 
 
919b891
 
 
9224ffd
919b891
9224ffd
919b891
 
 
 
 
 
92d4436
8373fbf
b56fd2b
92d4436
c585a5d
429953d
b56fd2b
9224ffd
919b891
46d20c8
9907d16
8cacfad
 
 
 
 
d6b5ec6
9907d16
4ca8440
 
 
f076e4f
 
92d4436
4ca8440
92d4436
4ca8440
 
 
f076e4f
38098e8
46d20c8
99c33b8
e5a45fc
 
 
914a779
e5a45fc
 
7f2ec6d
53bd282
 
 
8e7188b
38098e8
 
 
 
 
212ca5f
38098e8
212ca5f
38098e8
 
 
61c7a4c
 
bef2a73

import os
import gradio as gr
from tridentmodel import classification
import pandas as pd
import torch
import torch.nn as nn
import transformers
from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig

auth_token = os.environ.get("AUTH_TOKEN_SECRET")

tokenizer = LlamaTokenizer.from_pretrained("Claimed/capybara", use_auth_token=auth_token)

model = LlamaForCausalLM.from_pretrained(
     "Claimed/capybara", use_auth_token=auth_token, 
    load_in_8bit=True,
    device_map="auto") #low_cpu_mem_usage=True)

#model = model.to('cuda')

def broad_scope_class_predictor(class_embeddings, abstract_embedding, N=5, Sensitivity='Medium'):
    """
    Takes in pre-computed class embeddings and abstract texts, converts abstract text into
    :param class_embeddings: dataframe of class embeddings
    :param abstract: a single abstract embedding
    :param N: N highest matching classes to return, from highest to lowest, default is 5
    :return: predictions: a full dataframe of all the predictions on the 9500+ classes, HighestSimilarity: Dataframe of the N most similar classes
    """
    predictions = pd.DataFrame(columns=['Class Name', 'Score'])
    for i in range(len(class_embeddings)):
        class_name = class_embeddings.iloc[i, 0]
        embedding = class_embeddings.iloc[i, 2]
        embedding = convert_saved_embeddings(embedding)
        abstract_embedding = abstract_embedding.numpy()
        abstract_embedding = torch.from_numpy(abstract_embedding)
        cos = torch.nn.CosineSimilarity(dim=1)
        score = cos(abstract_embedding, embedding).numpy().tolist()
        result = [class_name, score[0]]
        predictions.loc[len(predictions)] = result
    if Sensitivity == 'High':
        Threshold = 0.5
    elif Sensitivity == 'Medium':
        Threshold = 0.40
    elif Sensitivity == 'Low':
        Threshold = 0.35
    GreenLikelihood = 'False'
    for i in range(len(greenpredictions)):
        score = greenpredictions.iloc[i, 1]
        if float(score) >= Threshold:
            GreenLikelihood = 'True'
            break
        else:
            continue
    HighestSimilarity = predictions.nlargest(N, ['Score'])
    return HighestSimilarity

def sentence_embedder(sentences, model_path):
  tokenizer = AutoTokenizer.from_pretrained(model_path) #instantiating the sentence embedder using HuggingFace library
  model = AutoModel.from_pretrained(model_path, from_tf=True) #making a model instance
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
  # Compute token embeddings
  with torch.no_grad():
    model_output = model(**encoded_input)
  sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) #outputs a (1, 384) tensor representation of input text
  return sentence_embeddings
    

    
def add_text(history, text):
    history = history + [(text, None)]
    return history, ""

def add_file(history, file):
    history = history + [((file.name,), None)]
    return history

def bot(history):
    response = "**That's cool!**"
    history[-1][1] = response
    return history


########## LOADING PRE-COMPUTED EMBEDDINGS ##########
class_embeddings = pd.read_csv('Embeddings/MainClassEmbeddings.csv')

def classifier(userin):
    clean_in = classification.clean_data(userin, type='String')
    in_emb = sentence_embedder(clean_in, 'Model_bert')

    Number = 10
    broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, Number, Sensitivity='High')

    return broad_scope_predictions
    
def generateresponse(history):#, task):
    """
    Model definition here:
    """

    global model
    global tokenizer

    user = history[-1][0]

    PROMPT = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
    ### Instruction:
    {user}
    ### Response:"""

    inputs = tokenizer(
        PROMPT,
        return_tensors="pt",
    ) 
    input_ids = inputs["input_ids"].cuda()

    generation_config = GenerationConfig(
        temperature=0.6,
        top_p=0.95,
        repetition_penalty=1.15,
    ) 
    print("Generating...")
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256,
    ) 
    output = []
    for s in generation_output.sequences:
        output.append(tokenizer.decode(s))
        print(tokenizer.decode(s))
    
    outputs = (output[0].split('### Response:'))[1]
    
    response = f"Response: {outputs}"
    history[-1][1] = response
    print(history)
    return history


theme = gr.themes.Base(
    primary_hue="indigo",
).set(
    prose_text_size='*text_sm'
)

with gr.Blocks(title='Claimed', theme=theme) as demo:

    gr.Markdown("""
    # CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS

    The patenting process can by incredibly time-consuming and expensive. We're on a mission to change that.
    
    Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain.

    Please note that this is for research purposes and shouldn't be used commercially. 

    None of the outputs of this model, taken in part or in its entirety, constitutes legal advice. If you are seeking protection for you intellectual property, consult a registered patent/trademark attorney.
    
    """)
    
    with gr.Tab("Claim Drafter"):
        gr.Markdown(""" 
        Use this tool to expand your idea into the technical language of a patent claim.        
        """)
        with gr.Row(scale=1, min_width=600):
            text1 = gr.Textbox(label="Input",
                              placeholder='Type in your idea here!')
            text2 = gr.Textbox(label="Output")
   
    with gr.Tab("Description Generator"):
        gr.Markdown(""" 

        Use this tool to expand your patent claim into a description. You can also use this tool to generate abstracts and give you ideas about the benefit of an invention by changing the settings in the dropdown menu.
 
        """)
        gr.Dropdown(["Generate Description", "Generate Abstract", "Benefits of the invention"], label='Choose Generation Type Here')
        with gr.Row(scale=1, min_width=600):
                    
            text1 = gr.Textbox(label="Input",
                              placeholder='Type in your idea here!')
            text2 = gr.Textbox(label="Output")

    with gr.Tab("Knowledge Graph"):
        gr.Markdown(""" 
        Use this tool to generate a knowledge graph of your invention. This will help highlight the links between features. 

        """)
        with gr.Row(scale=1, min_width=600):
            text1 = gr.Textbox(label="Input",
                              placeholder='Type in your idea here!')
            text2 = gr.Textbox(label="Output")

    with gr.Tab("Prosecution Ideator"):
        gr.Markdown(""" 
        Use this tool to generate ideas for how to overcome objections to novelty and inventive step. Outputs are in the problem-solution format. 
        """)
        with gr.Row(scale=1, min_width=600):
            text1 = gr.Textbox(label="Input",
                              placeholder='Type in your idea here!')
            text2 = gr.Textbox(label="Output")

    # with gr.Tab("Claimed Infill"):
    #     gr.Markdown(""" 
    #     Below is our 

    #     Example input: A device to help the visually impaired using proprioception.

    #     Output: 
    #     """)
    #     with gr.Row(scale=1, min_width=600):
    #         text1 = gr.Textbox(label="Input",
    #                           placeholder='Type in your idea here!')
    #         text2 = gr.Textbox(label="Output")

    
    with gr.Tab("CPC Search Tool"):
        gr.Markdown("""
        Use this tool to classify your invention according to the Cooperative Patent Classification system. 

        Click on the link to initiate either an Espacenet or Google Patents classification search using the generated classifications. You can specify which you would like using the dropdown menu. 
        """)

        gr.Dropdown(["Google Patent Search", "Espacenet Patent Search"], label='Choose Search Type Here')        
        with gr.Row(scale=1, min_width=600):
            userin = gr.Textbox(label="Input",
                              placeholder='Type in your Claim/Description/Abstract Here')
            output = gr.Textbox(label="Output")
        with gr.Row():
            classify_btn = gr.Button("Classify")
            classify_btn.click(fn=classifier, inputs=[userin] , outputs=output)


    gr.Markdown(""" 

    # THE CHATBOT

    Do you want a bit more freedom over the outputs you generate? No worries, you can use a chatbot version of our model below. You can ask it anything. 

    If you're concerned about a particular output, hit the flag button and we will use that information to improve the model.


    """)

   
    chatbot = gr.Chatbot([], elem_id="Claimed Assistant").style(height=500)
    with gr.Row():
        with gr.Column(scale=0.85):
            txt = gr.Textbox(
                show_label=False,
                placeholder="Enter text and submit",
            ).style(container=False)
        with gr.Column(scale=0.15, min_width=0):
            btn = gr.Button("Submit")
        
        txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
            generateresponse, chatbot, chatbot)

    gr.Markdown("""
    # HAVE AN IDEA? GET IT CLAIMED 

    In the future, we are looking to expand our model's capabilities further to assist in a range of IP related tasks.

    If you are interested in using a more powerful model that we have trained, or if you have any suggestions of features you would like to see us add, please get in touch!

    As far as data is concerned, you have nothing to worry about! We don't store any of your inputs to use for further training, we're not OpenAI.  
    
    """)



demo.launch()