gpt4 book ai didi

json - 在Hive中创建推文表时HDFS文件匹配错误

转载 作者:行者123 更新时间:2023-12-02 21:10:12 24 4
gpt4 key购买 nike

我正在使用水槽,hadoop和 hive 进行Twitter情感分析。在使用hive创建表之后,使用以下命令:

hive -f tweets.sql 

我收到此错误:
FAILED: SemanticException Line 3:17 Invalid path ''data/dictionary/dictionary.tsv'': No files matching path hdfs://localhost:9000/user/root/data/dictionary/dictionary.tsv

tweets.sql
--create the tweets_raw table containing the records as received from  Twitter
SET hive.support.sql11.reserved.keywords=false;

CREATE EXTERNAL TABLE Mytweets_raw (
id BIGINT,
created_at STRING,
source STRING,
favorited BOOLEAN,
retweet_count INT,
retweeted_status STRUCT<
text:STRING,
user:STRUCT<screen_name:STRING,name:STRING>>,

entities STRUCT<
urls:ARRAY<STRUCT<expanded_url:STRING>>,
user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>,
hashtags:ARRAY<STRUCT<text:STRING>>>,
text STRING,

user STRUCT<
screen_name:STRING,
name:STRING,
friends_count:INT,
followers_count:INT,
statuses_count:INT,
verified:BOOLEAN,
utc_offset:INT,
time_zone:STRING>,

in_reply_to_screen_name STRING

)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION '/user/flume/tweets';

-- create sentiment dictionary

CREATE EXTERNAL TABLE dictionary (
type string,
length int,
word string,
pos string,
stemmed string,
polarity string
)

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE

LOCATION '/data/dictionary';
-- loading data to the table dictionary

load data inpath 'data/dictionary/dictionary.tsv' INTO TABLE dictionary;

CREATE EXTERNAL TABLE time_zone_map (
time_zone string,
country string
)

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
LOCATION '/data/time_zone_map';

-- loading data to the table time_zone_map

load data inpath 'data/time_zone_map/time_zone_map.tsv' INTO TABLE time_zone_map;
-- Clean up tweets

CREATE VIEW tweets_simple AS
SELECT
id,
cast ( from_unixtime( unix_timestamp(concat( '2014 ', substring(created_at,5,15)), 'yyyy MMM dd hh:mm:ss')) as timestamp) ts,
text,
user.time_zone
FROM Mytweets_raw
;
CREATE VIEW tweets_clean AS
SELECT
id,
ts,
text,
m.country
FROM tweets_simple t LEFT OUTER JOIN time_zone_map m ON t.time_zone = m.time_zone;
-- Compute sentiment

create view l1 as select id, words from Mytweets_raw lateral view explode(sentences(lower(text))) dummy as words;

create view l2 as select id, word from l1 lateral view explode( words ) dummy as word ;

create view l3 as select
id,
l2.word,
case d.polarity
when 'negative' then -1
when 'positive' then 1
else 0 end as polarity
from l2 left outer join dictionary d on l2.word = d.word;

create table tweets_sentiment as select
id,
case
when sum( polarity ) > 0 then 'positive'
when sum( polarity ) < 0 then 'negative'
else 'neutral' end as sentiment
from l3 group by id;

-- put everything back together and re-name sentiments...

CREATE TABLE tweetsbi
AS
SELECT
t.*,
s.sentiment
FROM tweets_clean t LEFT OUTER JOIN tweets_sentiment s on t.id = s.id;

-- data with tweet counts.....

CREATE TABLE tweetsbiaggr
AS
SELECT
country,sentiment, count(sentiment) as tweet_count
FROM tweetsbi
group by country,sentiment;

-- store data for analysis......

CREATE VIEW A as select country,tweet_count as positive_response from tweetsbiaggr where sentiment='positive';

CREATE VIEW B as select country,tweet_count as negative_response from tweetsbiaggr where sentiment='negative';

CREATE VIEW C as select country,tweet_count as neutral_response from tweetsbiaggr where sentiment='neutral';

CREATE TABLE tweetcompare as select A.*,B.negative_response as negative_response,C.neutral_response as neutral_response from A join B on A.country= B.country join C on B.country=C.country;

-- permission to show data in Excel sheet for analysis ....

grant SELECT ON TABLE tweetcompare to user hue;
grant SELECT ON TABLE tweetcompare to user root;
-- for Tableau or Excel
-- UDAF sentiscore = sum(sentiment)*50 / count(sentiment)
-- context n-gram made readable

hadoop fs -ls -R /数据
drwxr-xr-x   - root supergroup          0 2016-11-03 17:10 /data/dictionary
-rw-r--r-- 1 root supergroup 308921 2016-11-03 17:10 /data/dictionary/dictionary.tsv
drwxr-xr-x - root supergroup 0 2016-11-03 17:12 /data/time_zone_map
-rw-r--r-- 1 root supergroup 3021 2016-11-03 17:12 /data/time_zone_map/time_zone_map.tsv
drwxr-xr-x - root supergroup 0 2016-11-03 16:51 /data/tweets_raw

我想知道在twitter.sql文件中甚至没有提到它时如何使用用户/根目录/数据路径。

请帮助我解决此问题。

最佳答案

您必须在路径的开头添加/

因此,查询将是
load data inpath '/data/dictionary/dictionary.tsv' INTO TABLE dictionary;
这适用于要加载数据的所有查询。

让我知道这个是否奏效。

关于json - 在Hive中创建推文表时HDFS文件匹配错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/40514386/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com