gpt4 book ai didi

python - 马尔可夫分析、格式化

转载 作者:行者123 更新时间:2023-12-01 09:21:09 33 4
gpt4 key购买 nike

我有一个程序,可以从文本文件中读取大量文本,然后根据文本内容随机化内容以显示为短篇故事。该程序可以运行,但最后一部分(我显示的 Material )非常笨重且效率不高,我想知道是否有人对如何更有效地获取文本然后显示它有任何想法作为一个字符串提供给用户,但允许它跨越多行(本质上是换行文本),这样它就不仅仅是继续到控制台右侧的巨大文本字符串。

from __future__ import print_function, division

import sys

import random

# global variables
suffix_map = {} # map from prefixes to a list of suffixes
prefix = () # current tuple of words
big_list = []

def process_file(filename, order=2):
"""Reads a file and performs Markov analysis.

filename: string
order: integer number of words in the prefix

returns: map from prefix to list of possible suffixes.
"""
fp = open(filename)

for line in fp:
for word in line.rstrip().split():
process_word(word, order)


def process_word(word, order=3):
"""Processes each word.

word: string
order: integer

During the first few iterations, all we do is store up the words;
after that we start adding entries to the dictionary.
"""
global prefix
if len(prefix) < order:
prefix += (word,)
return

try:
suffix_map[prefix].append(word)
except KeyError:
# if there is no entry for this prefix, make one
suffix_map[prefix] = [word]

prefix = shift(prefix, word)


def random_text(n=300):
"""Generates random wordsfrom the analyzed text.

Starts with a random prefix from the dictionary.

n: number of words to generate
"""
global big_list
# choose a random prefix (not weighted by frequency)
start = random.choice(list(suffix_map.keys()))

for i in range(n):
suffixes = suffix_map.get(start, None)
if suffixes == None:
random_text(n-i)
return
# choose a random suffix
word = random.choice(suffixes)
big_list.append(word + " ")
start = shift(start, word)


def shift(t, word):
"""Forms a new tuple by removing the head and adding word to the tail.

t : tuple of strings
word: string

Returns: tuple of strings
"""
return t[1:] + (word,)


def list_to_str_format():
global big_list
whole = " ".join(str(i) for i in big_list)

# 25 words per line
l1 = big_list[:25]
l2 = big_list[26:50]
l3 = big_list[51:75]
l4 = big_list[76:100]
l5 = big_list[101:125]
l6 = big_list[126:150]
l7 = big_list[151:175]
l8 = big_list[176:200]
l9 = big_list[201:225]
l10 = big_list[226:250]
l11 = big_list[256:275]
l12 = big_list[276:300]

str_1 = " ".join(str(i) for i in l1).capitalize()
str_2 = " ".join(str(i) for i in l2)
str_3 = " ".join(str(i) for i in l3)
str_4 = " ".join(str(i) for i in l4)
str_5 = " ".join(str(i) for i in l5)
str_6 = " ".join(str(i) for i in l6)
str_7 = " ".join(str(i) for i in l7)
str_8 = " ".join(str(i) for i in l8)
str_9 = " ".join(str(i) for i in l9)
str_10 = " ".join(str(i) for i in l10)
str_11 = " ".join(str(i) for i in l11)
str_12 = " ".join(str(i) for i in l12)

print(str_1)
print(str_2)
print(str_3)
print(str_4)
print(str_5)
print(str_6)
print(str_7)
print(str_8)
print(str_9)
print(str_10)
print(str_11)
print(str_12)


def main(filename, n=300, order=3):
try:
n = int(n)
order = int(order)
except ValueError as e:
print('Usage: %d filename [# of words] [prefix length]' % e)
else:
process_file(filename, order)
random_text(n)
list_to_str_format()
print()


main('C:\\Users\\Desktop\\TheBrothersKaramazov.txt')

最佳答案

我允许自己更改您的连接模式,从而形成了双倍空间。您必须导入模块re

def list_to_str_format(line_length=80):
global big_list
whole = "".join(str(i) for i in big_list)
regex = re.compile('(.*?(\s))*')
while whole != "":
break_pos = regex.match(whole[:line_length]).end()
print(whole[:break_pos])
whole = whole[break_pos:]

关于python - 马尔可夫分析、格式化,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50781490/

33 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com