gpt4 book ai didi

python - 如何将我的 python 应用程序制作/转换为 R Shiny 应用程序?这是一个脑筋急转弯!无法在 R 中找到 UI 需要什么更改

转载 作者:行者123 更新时间:2023-12-03 14:27:30 29 4
gpt4 key购买 nike

我是 R 的新手,并试图了解 Rshiny 以构建 UI。我正在尝试为我的 python 应用程序创建一个 UI,用于转录多个 wav 文件。下面有两个部分,第一个是我的 python 应用程序,第二个是我在 R 中使用 reticulate 来调用我的 transcribe.py 应用程序的 Shiny 应用程序。但由于某种原因,我没有收到任何输出。
我的 Python 应用程序运行良好,不需要代码审查。但是,Rshiny 应用程序没有正确执行 Python 应用程序以产生所需的结果。目标是让用户从 UI 转录文件并决定他们是否要下载 csv。
我有一个用于转录文件的 python 应用程序,名为 transcribe.py-

import os
import json
import time
# import threading
from pathlib import Path

import concurrent.futures

# from os.path import join, dirname
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import pandas as pd

# Replace with your api key.
my_api_key = "abc123"

# You can add a directory path to Path() if you want to run
# the project from a different folder at some point.
directory = Path().absolute()


authenticator = IAMAuthenticator(my_api_key)

service = SpeechToTextV1(authenticator=authenticator)
service.set_service_url('https://api.us-east.speech-to-text.watson.cloud.ibm.com')
# I used this URL.
# service.set_service_url('https://stream.watsonplatform.net/speech-to-text/api')


models = service.list_models().get_result()
#print(json.dumps(models, indent=2))

model = service.get_model('en-US_BroadbandModel').get_result()
#print(json.dumps(model, indent=2))



# get data to a csv
########################RUN THIS PART SECOND#####################################


def process_data(json_data, output_path):

print(f"Processing: {output_path.stem}")

cols = ["transcript", "confidence"]

dfdata = [[t[cols[0]], t[cols[1]]] for r in json_data.get('results') for t in r.get("alternatives")]

df0 = pd.DataFrame(data = dfdata, columns = cols)

df1 = pd.DataFrame(json_data.get("speaker_labels")).drop(["final", "confidence"], axis=1)


# test3 = pd.concat([df0, df1], axis=1)
test3 = pd.merge(df0, df1, left_index = True, right_index = True)


# sentiment
print(f"Getting sentiment for: {output_path.stem}")
transcript = test3["transcript"]
transcript.dropna(inplace=True)

analyzer = SentimentIntensityAnalyzer()
text = transcript
scores = [analyzer.polarity_scores(txt) for txt in text]

# data = pd.DataFrame(text, columns = ["Text"])
data = transcript.to_frame(name="Text")
data2 = pd.DataFrame(scores)


# final_dataset= pd.concat([data, data2], axis=1)
final_dataset = pd.merge(data, data2, left_index = True, right_index = True)

# test4 = pd.concat([test3, final_dataset], axis=1)
test4 = pd.merge(test3, final_dataset, left_index = True, right_index = True)

test4.drop("Text", axis=1, inplace=True)

test4.rename(columns = {
"neg": "Negative",
"pos": "Positive",
"neu": "Neutral",
}, inplace=True)

# This is the name of the output csv file
test4.to_csv(output_path, index = False)


def process_audio_file(filename, output_type = "csv"):

audio_file_path = directory.joinpath(filename)

# Update output path to consider `output_type` parameter.
out_path = directory.joinpath(f"{audio_file_path.stem}.{output_type}")

print(f"Current file: '{filename}'")

with open(audio_file_path, "rb") as audio_file:
data = service.recognize(
audio = audio_file,
speaker_labels = True,
content_type = "audio/wav",
inactivity_timeout = -1,
model = "en-US_NarrowbandModel",
continuous = True,
).get_result()

print(f"Speech-to-text complete for: '{filename}'")

# Return data and output path as collection.
return [data, out_path]


def main():
print("Running main()...")

# Default num. workers == min(32, os.cpu_count() + 4)
n_workers = os.cpu_count() + 2

# Create generator for all .wav files in folder (and subfolders).
file_gen = directory.glob("**/*.wav")

with concurrent.futures.ThreadPoolExecutor(max_workers = n_workers) as executor:
futures = {executor.submit(process_audio_file, f) for f in file_gen}
for future in concurrent.futures.as_completed(futures):
pkg = future.result()
process_data(*pkg)


if __name__ == "__main__":

print(f"Program to process audio files has started.")

t_start = time.perf_counter()

main()

t_stop = time.perf_counter()
print(f"Done! Processing completed in {t_stop - t_start} seconds.")
在 Rstudio 中,我尝试过 -
R.UI 文件
library(shiny)
library(reticulate) # for reading Python code
library(dplyr)
library(stringr)
library(formattable) # for adding color to tables
library(shinybusy) # for busy bar
library(DT) # for dataTableOutput

use_python("/usr/lib/python3")

ui <- fluidPage(
add_busy_bar(color = "#5d98ff"),
fileInput("wavFile", "SELECT .WAV FILE", accept = ".wav"),
uiOutput("downloadData"),
dataTableOutput("transcript"),

)
R.Server 文件
server <- function(input, output) {

# .WAV File Selector ------------------------------------------------------

file <- reactive({
file <- input$wavFile # Get file from user input
gsub("\\\\","/",file$datapath) # Access the file path. Convert back slashes to forward slashes.
})


# Transcribe and Clean ----------------------------------------------------

transcript <- reactive({

req(input$wavFile) # Require a file before proceeding

source_python('transcribe.py') # Load the Python function # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
transcript <- data.frame(transcribe(file())) # Transcribe the file # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
# load('transcript.rdata') # Loads a dummy transcript # UNCOMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY

transcript$transcript <- unlist(transcript$transcript) # Transcript field comes in as a list. Unlist it.
transcript <- transcript[which(!(is.na(transcript$confidence))),] # Remove empty lines
names(transcript) <- str_to_title(names(transcript)) # Capitalize column headers

transcript # Return the transcript

})


# Use a server-side download button ---------------------------------------

# ...so that the download button only appears after transcription

output$downloadData <- renderUI({
req(transcript())
downloadButton("handleDownload","Download CSV")
})

output$handleDownload <- downloadHandler(
filename = function() {
paste('Transcript ',Sys.Date(), ".csv", sep = "")
},
content = function(file) {
write.csv(transcript(), file, row.names = FALSE)
}
)


# Transcript table --------------------------------------------------------

output$transcript <- renderDataTable({
as.datatable(formattable(
transcript() %>%
select(Transcript,
Confidence,
Negative,
Positive
),
list(Confidence = color_tile('#ffffff','#a2b3c8'),
Negative = color_tile('#ffffff', '#e74446'),
Positive = color_tile('#ffffff', "#499650")
)
), rownames = FALSE, options =list(paging = FALSE)
)
})


# END ---------------------------------------------------------------------

}

最佳答案

在 Shiny 中,您需要在 python 脚本中正确传递参数。一个简单的方法是在 python 脚本中定义一个函数并以 Shiny 的方式调用该函数。
这是您修改后的 python 脚本(编辑了 process_data 函数并添加了 run_script 函数)-

import os
import json
import time
# import threading
from pathlib import Path

import concurrent.futures

# from os.path import join, dirname
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import pandas as pd

# Replace with your api key.
my_api_key = "api_key"

# You can add a directory path to Path() if you want to run
# the project from a different folder at some point.
directory = Path().absolute()


authenticator = IAMAuthenticator(my_api_key)

service = SpeechToTextV1(authenticator=authenticator)
service.set_service_url('https://api.us-east.speech-to-text.watson.cloud.ibm.com')
# I used this URL.
# service.set_service_url('https://stream.watsonplatform.net/speech-to-text/api')


models = service.list_models().get_result()
#print(json.dumps(models, indent=2))

model = service.get_model('en-US_BroadbandModel').get_result()
#print(json.dumps(model, indent=2))



# get data to a csv
########################RUN THIS PART SECOND#####################################


def process_data(json_data):

#print(f"Processing: {output_path.stem}")

cols = ["transcript", "confidence"]

dfdata = [[t[cols[0]], t[cols[1]]] for r in json_data.get('results') for t in r.get("alternatives")]

df0 = pd.DataFrame(data = dfdata, columns = cols)

df1 = pd.DataFrame(json_data.get("speaker_labels")).drop(["final", "confidence"], axis=1)


# test3 = pd.concat([df0, df1], axis=1)
test3 = pd.merge(df0, df1, left_index = True, right_index = True)


# sentiment
#print(f"Getting sentiment for: {output_path.stem}")
transcript = test3["transcript"]
transcript.dropna(inplace=True)

analyzer = SentimentIntensityAnalyzer()
text = transcript
scores = [analyzer.polarity_scores(txt) for txt in text]

# data = pd.DataFrame(text, columns = ["Text"])
data = transcript.to_frame(name="Text")
data2 = pd.DataFrame(scores)


# final_dataset= pd.concat([data, data2], axis=1)
final_dataset = pd.merge(data, data2, left_index = True, right_index = True)

# test4 = pd.concat([test3, final_dataset], axis=1)
test4 = pd.merge(test3, final_dataset, left_index = True, right_index = True)

test4.drop("Text", axis=1, inplace=True)

test4.rename(columns = {
"neg": "Negative",
"pos": "Positive",
"neu": "Neutral",
}, inplace=True)

# This is the name of the output csv file
# test4.to_csv(output_path, index = False)
return(test4)


def process_audio_file(filename, output_type = "csv"):

audio_file_path = directory.joinpath(filename)

# Update output path to consider `output_type` parameter.
out_path = directory.joinpath(f"{audio_file_path.stem}.{output_type}")

print(f"Current file: '{filename}'")

with open(audio_file_path, "rb") as audio_file:
data = service.recognize(
audio = audio_file,
speaker_labels = True,
content_type = "audio/wav",
inactivity_timeout = -1,
model = "en-US_NarrowbandModel",
continuous = True,
).get_result()

print(f"Speech-to-text complete for: '{filename}'")

# Return data and output path as collection.
return [data, out_path]


def main():
print("Running main()...")

# Default num. workers == min(32, os.cpu_count() + 4)
n_workers = os.cpu_count() + 2

# Create generator for all .wav files in folder (and subfolders).
file_gen = directory.glob("**/*.wav")

with concurrent.futures.ThreadPoolExecutor(max_workers = n_workers) as executor:
futures = {executor.submit(process_audio_file, f) for f in file_gen}
for future in concurrent.futures.as_completed(futures):
pkg = future.result()
process_data(*pkg)


def run_script (filename):
return(process_data(process_audio_file(filename)[0]))
Shiny 代码
在服务器文件中调用 run_script 函数而不是转录。确保 transcribe.py 文件在工作目录中。更正了 output$transcript 中的一些错字
library(shiny)
library(reticulate) # for reading Python code
library(dplyr)
library(stringr)
library(formattable) # for adding color to tables
library(shinybusy) # for busy bar
library(DT) # for dataTableOutput

use_python("C:/Users/ap396/Anaconda3/python")

ui <- fluidPage(
add_busy_bar(color = "#5d98ff"),
fileInput("wavFile", "SELECT .WAV FILE", accept = ".wav",multiple = T),
uiOutput("downloadData"),
dataTableOutput("transcript")

)


server <- function(input, output) {

# .WAV File Selector ------------------------------------------------------

file <- reactive({

req(input$wavFile) # Require a file before proceeding

files <- input$wavFile # Get file from user input
file = NULL
for (i in 1:nrow(files)){
print(file)
file = c(file,gsub("\\\\","/",files$datapath[i])) # Access the file path. Convert back slashes to forward slashes.
}
return(file)
})


# Transcribe and Clean ----------------------------------------------------
source_python('transcribe.py')

transcript <- reactive({

dft= data.frame(NULL)

for(j in 1:length(file())){
t0 = Sys.time()
transcript <- run_script(file()[j]) # Transcribe the file # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
t1 = Sys.time() - t0

transcript$File = j; transcript$Time = t1

dft = rbind(dft,transcript)
}

return(dft) # Return the transcript

})


# Use a server-side download button ---------------------------------------
# ...so that the download button only appears after transcription

output$downloadData <- renderUI({
req(transcript())
downloadButton("handleDownload","Download CSV")
})

output$handleDownload <- downloadHandler(
filename = function() {
paste('Transcript ',Sys.Date(), ".csv", sep = "")
},
content = function(file) {
write.csv(transcript(), file, row.names = FALSE)
}
)


# Transcript table --------------------------------------------------------

output$transcript <- renderDataTable({
as.datatable(formattable(
transcript() %>%
select(File,
Time,
transcript,
confidence,
Negative,
Positive
),
list(Confidence = color_tile('#ffffff','#a2b3c8'),
Negative = color_tile('#ffffff', '#e74446'),
Positive = color_tile('#ffffff', "#499650")
)
), rownames = FALSE, options =list(paging = FALSE)
)
})
# END ---------------------------------------------------------------------
}

# Return a Shiny app object
shinyApp(ui = ui, server = server)
请注意, Shiny 的下载仅适用于网络浏览器,因此您必须在网络浏览器中打开应用程序
enter image description here

关于python - 如何将我的 python 应用程序制作/转换为 R Shiny 应用程序?这是一个脑筋急转弯!无法在 R 中找到 UI 需要什么更改,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/66769294/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com