濟(jì)南做html5網(wǎng)站建設(shè)泰安網(wǎng)站推廣優(yōu)化
目錄
- 1.刪除
- 2.更新:
- 3.行轉(zhuǎn)列:
- 4.列轉(zhuǎn)行:
- 5.分析函數(shù):
- 6.多維分析
- 7.數(shù)據(jù)傾斜
- groupby:
- join:
掌握下面的技巧,你的SQL水平將有一個(gè)質(zhì)的提升!
1.刪除
正常hive刪除操作基本都是覆蓋原數(shù)據(jù);
insert overwrite tmp
select * from tmp where id != '666';
2.更新:
更新也是覆蓋操作;
insert overwrite tmp
select id,label,if(id = '1' and label = 'grade','25',value) as value
from tmp where id != '666';
3.行轉(zhuǎn)列:
思路1:
先通過concat函數(shù)把多列數(shù)據(jù)拼接成一個(gè)長(zhǎng)的字符串,分割符為逗號(hào),再通過explode函數(shù)炸裂成多行,然后使用split函數(shù)根據(jù)分隔符進(jìn)行切割;
-- Step03:最后將info的內(nèi)容切分
select id,split(info,':')[0] as label,split(info,':')[1] as value
from
(
-- Step01:先將數(shù)據(jù)拼接成“heit:180,weit:60,age:26”select id,concat('heit',':',height,',','weit',':',weight,',','age',':',age) as value from tmp
) as tmp
-- Step02:然后在借用explode函數(shù)將數(shù)據(jù)膨脹至多行
lateral view explode(split(value,',')) mytable as info;
思路2:使用union all函數(shù),多段union
select id,'heit' as label,height as value
union all
select id,'weit' as label,weight as value
union all
select id,'age' as label,age as value
4.列轉(zhuǎn)行:
思路1:多表join,進(jìn)行關(guān)聯(lián)
select
tmp1.id as id,tmp1.value as height,tmp2.value as weight,tmp3.value as age
from
(select id,label,value from tmp2 where label = 'heit') as tmp1
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = 'weit') as tmp2
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = 'age') as tmp3
on tmp1.id = tmp3.id;
思路2:使用max(if) 或max(case when ),可以根據(jù)實(shí)際情況換成sum函數(shù)
select
id,
max(case when label = 'heit' then value end) as height,
max(case when label = 'weit' then value end) as weight,
max(case when label = 'age' then value end) as age
from tmp2
group by
id;
思路3:map的思想,先拼接成map的形式,再取下標(biāo)
select
id,tmpmap['height'] as height,tmpmap['weight'] as weight,tmpmap['age'] as age
from
(select id,str_to_map(concat_ws(',',collect_set(concat(label,':',value))),',',':') as tmpmap from tmp2 group by id
) as tmp1;
5.分析函數(shù):
select id,label,value,lead(value,1,0)over(partition by id order by label) as lead,lag(value,1,999)over(partition by id order by label) as lag,first_value(value)over(partition by id order by label) as first_value,last_value(value)over(partition by id order by label) as last_value
from tmp;
select id,label,value,row_number()over(partition by id order by value) as row_number,rank()over(partition by id order by value) as rank,dense_rank()over(partition by id order by value) as dense_rank
from tmp;
6.多維分析
select col1,col2,col3,count(1),Grouping__ID
from tmp
group by col1,col2,col3
grouping sets(col1,col2,col3,(col1,col2),(col1,col3),(col2,col3),())
select col1,col2,col3,count(1),Grouping__ID
from tmp
group by col1,col2,col3
with cube;
7.數(shù)據(jù)傾斜
groupby:
select label,sum(cnt) as all from
(select rd,label,sum(1) as cnt from (select id,label,round(rand(),2) as rd,value from tmp1) as tmpgroup by rd,label
) as tmp
group by label;
join:
select label,sum(value) as all from
(select rd,label,sum(value) as cnt from(select tmp1.rd as rd,tmp1.label as label,tmp1.value*tmp2.value as value from (select id,round(rand(),1) as rd,label,value from tmp1) as tmp1join(select id,rd,label,value from tmp2lateral view explode(split('0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9',',')) mytable as rd) as tmp2on tmp1.rd = tmp2.rd and tmp1.label = tmp2.label) as tmp1group by rd,label
) as tmp1
group by label;