{"id":980,"date":"2021-07-19T18:03:07","date_gmt":"2021-07-19T10:03:07","guid":{"rendered":"https:\/\/blog.humh.cn\/?p=980"},"modified":"2021-07-19T19:30:58","modified_gmt":"2021-07-19T11:30:58","slug":"hive%e8%be%93%e5%87%ba%e7%ac%9b%e5%8d%a1%e5%b0%94%e7%a7%af%e6%8e%92%e9%94%99","status":"publish","type":"post","link":"https:\/\/blog.humh.cn\/?p=980","title":{"rendered":"hive\u8f93\u51fa\u7b1b\u5361\u5c14\u79ef\u6392\u9519"},"content":{"rendered":"\n<blockquote class=\"wp-block-quote\"><p>\u4eca\u5929\u5728\u6267\u884chive sql\u7684\u65f6\u5019\uff0c\u9700\u8981\u4e86\u5947\u602a\u7684join\u9519\u8bef\uff0c\u8fd9\u91cc\u603b\u7ed3\u4e0b<\/p><\/blockquote>\n\n\n\n<p>\u95ee\u9898sql\u5982\u4e0b<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"sql\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">-- 2019-05-03\n-- \u6c422019-05-03 \u4e4b\u524d\u53d1\u5e03\u7684\u89c6\u9891\u4f5c\u8005\u4e2d\uff0c\u5176\u73b0\u4f1a\u5458\u4eba\u6570\u53ca\u5360\u6bd4\uff0c\u5386\u53f2\u4f1a\u5458\u4eba\u6570\u53ca\u5360\u6bd4\n-- \u65e7\u89c6\u9891\uff08\u4f5c\u8005id\uff09\nwith\nold_video_user as (\n    select distinct user_id from `dw`.`dim_video` where published_at &lt;= '2019-05-03'\n),\n\n-- \u6628\u65e5\u7528\u6237vip\u7b49\u7ea7\u8868\u3002\u53ef\u4ee5\u5feb\u901f\u8fc7\u6ee4\u51fa\uff0c\u73b0vip\u7528\u6237\uff0c\u4e00\u5929vip\u65f6\u6548\u6027\u8bef\u5dee\u4e0d\u8003\u8651\nvip_user as (\n    select old_video_user.user_id from (select distinct user_id from parquet.`\/idl\/user_vip_level` where vip != 0) vip join old_video_user on vip.user_id = old_video_user.user_id\n),\n\n-- \u5f53\u524d\u4e0d\u662f\u4f1a\u5458\uff0c\u4f46\u4ee5\u5f80\u5f00\u8fc7\u4f1a\u5458\nold_vip_user as (\n    select no_vip_now.user_id from (select * from old_video_user where user_id not in (select user_id from vip_user)) no_vip_now join (select user_id from `dw`.`dim_user` where vip_expired_at &lt; now()) old_vip on no_vip_now.user_id = old_vip.user_id\n)\n\nselect all_count.count as `\u603b\u7528\u6237\u6570`, vip_count.count as `\u5f53\u524dvip\u7528\u6237\u6570`, round(vip_count.count \/ all_count.count, 3) as `\u5f53\u524dvip\u7528\u6237\u5360\u603b\u7528\u6237\u5360\u6bd4`, old_vip_count.count as `\u5386\u53f2vip\u7528\u6237\u6570`, round(old_vip_count.count \/ all_count.count, 3) as `\u5386\u53f2vip\u7528\u6237\u5360\u603b\u7528\u6237\u5360\u6bd4`, (all_count.count - vip_count.count - old_vip_count.count) as `\u666e\u901a\u7528\u6237\u6570`, round((all_count.count - vip_count.count - old_vip_count.count) \/ all_count.count, 3) as `\u666e\u901a\u7528\u6237\u5360\u603b\u7528\u6237\u5360\u6bd4` \nfrom (select count(user_id) as count, 1 as id from old_video_user) all_count, (select count(user_id) as count, 1 as id from vip_user) vip_count, (select count(user_id) as count, 1 as id from old_vip_user) old_vip_count<\/pre>\n\n\n\n<p>\u6267\u884c\u7ed3\u679c<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">java.sql.SQLException: org.apache.spark.sql.AnalysisException: Detected implicit cartesian product for INNER join between logical plans\nJoin Inner\n:- Aggregate [count(user_id#300) AS count#264L]\n:  +- Aggregate [user_id#300], [user_id#300]\n:     +- Project [user_id#300]\n:        +- Filter (isnotnull(published_at#323) &amp;&amp; (published_at#323 &lt;= 2019-05-03))\n:           +- Relation[photos_count#278,duration#279,name#280,channel#281,point#282,mp4_etag#283,catalog#284,logo_etag#285,scenes#286,digest#287,shared_at#288,state#289,shared#290,post_id#291,tags#292,published#293,audio#294,logo_path#295,udid#296,mold_id#297,create_type#298,last_comment_id#299,user_id#300,reference_id#301,... 26 more fields] parquet\n+- Aggregate [count(user_id#300) AS count#266L]\n   +- Project [user_id#300]\n      +- Join Inner, (user_id#378 = user_id#300)\n         :- Aggregate [user_id#378], [user_id#378]\n         :  +- Project [user_id#378]\n         :     +- Filter ((isnotnull(vip#379) &amp;&amp; NOT (cast(vip#379 as int) = 0)) &amp;&amp; isnotnull(user_id#378))\n         :        +- Relation[user_id#378,vip#379,date#380] parquet\n         +- Aggregate [user_id#300], [user_id#300]\n            +- Project [user_id#300]\n               +- Filter ((isnotnull(published_at#323) &amp;&amp; (published_at#323 &lt;= 2019-05-03)) &amp;&amp; isnotnull(user_id#300))\n                  +- Relation[photos_count#278,duration#279,name#280,channel#281,point#282,mp4_etag#283,catalog#284,logo_etag#285,scenes#286,digest#287,shared_at#288,state#289,shared#290,post_id#291,tags#292,published#293,audio#294,logo_path#295,udid#296,mold_id#297,create_type#298,last_comment_id#299,user_id#300,reference_id#301,... 26 more fields] parquet\nand\nAggregate [count(user_id#300) AS count#268L]\n+- Project [user_id#300]\n   +- Join Inner, (user_id#300 = user_id#381)\n      :- Aggregate [user_id#300], [user_id#300]\n      :  +- Project [user_id#300]\n      :     +- Filter ((isnotnull(published_at#323) &amp;&amp; ((published_at#323 &lt;= 2019-05-03) &amp;&amp; NOT user_id#300 IN (list#277 []))) &amp;&amp; isnotnull(user_id#300))\n      :        :  +- Project [user_id#300 AS user_id#300#467]\n      :        :     +- Join Inner, (user_id#378 = user_id#300)\n      :        :        :- Aggregate [user_id#378], [user_id#378]\n      :        :        :  +- Project [user_id#378]\n      :        :        :     +- Filter ((isnotnull(vip#379) &amp;&amp; NOT (cast(vip#379 as int) = 0)) &amp;&amp; isnotnull(user_id#378))\n      :        :        :        +- Relation[user_id#378,vip#379,date#380] parquet\n      :        :        +- Aggregate [user_id#300], [user_id#300]\n      :        :           +- Project [user_id#300]\n      :        :              +- Filter ((isnotnull(published_at#323) &amp;&amp; (published_at#323 &lt;= 2019-05-03)) &amp;&amp; isnotnull(user_id#300))\n      :        :                 +- Relation[photos_count#278,duration#279,name#280,channel#281,point#282,mp4_etag#283,catalog#284,logo_etag#285,scenes#286,digest#287,shared_at#288,state#289,shared#290,post_id#291,tags#292,published#293,audio#294,logo_path#295,udid#296,mold_id#297,create_type#298,last_comment_id#299,user_id#300,reference_id#301,... 26 more fields] parquet\n      :        +- Relation[photos_count#278,duration#279,name#280,channel#281,point#282,mp4_etag#283,catalog#284,logo_etag#285,scenes#286,digest#287,shared_at#288,state#289,shared#290,post_id#291,tags#292,published#293,audio#294,logo_path#295,udid#296,mold_id#297,create_type#298,last_comment_id#299,user_id#300,reference_id#301,... 26 more fields] parquet\n      +- Project [user_id#381]\n         +- Filter ((isnotnull(vip_expired_at#393) &amp;&amp; (vip_expired_at#393 &lt; 2021-07-19 16:56:54.135)) &amp;&amp; (NOT user_id#381 IN (list#277 []) &amp;&amp; isnotnull(user_id#381)))\n            :  +- Project [user_id#300 AS user_id#300#467]\n            :     +- Join Inner, (user_id#378 = user_id#300)\n            :        :- Aggregate [user_id#378], [user_id#378]\n            :        :  +- Project [user_id#378]\n            :        :     +- Filter ((isnotnull(vip#379) &amp;&amp; NOT (cast(vip#379 as int) = 0)) &amp;&amp; isnotnull(user_id#378))\n            :        :        +- Relation[user_id#378,vip#379,date#380] parquet\n            :        +- Aggregate [user_id#300], [user_id#300]\n            :           +- Project [user_id#300]\n            :              +- Filter ((isnotnull(published_at#323) &amp;&amp; (published_at#323 &lt;= 2019-05-03)) &amp;&amp; isnotnull(user_id#300))\n            :                 +- Relation[photos_count#278,duration#279,name#280,channel#281,point#282,mp4_etag#283,catalog#284,logo_etag#285,scenes#286,digest#287,shared_at#288,state#289,shared#290,post_id#291,tags#292,published#293,audio#294,logo_path#295,udid#296,mold_id#297,create_type#298,last_comment_id#299,user_id#300,reference_id#301,... 26 more fields] parquet\n            +- Relation[user_id#381,name#382,vip#383,icon#384,udid#385,age_zone#386,signature#387,created_at#388,level#389,platform#390,auth_token#391,updated_at#392,vip_expired_at#393,gender#394,first_udid#395,last_on#396,is_wechat#397,version#398,app_version#399,os#400,os_version#401,device_name#402,from_src#403,original_src#404,... 13 more fields] parquet\nJoin condition is missing or trivial.\nEither: use the CROSS JOIN syntax to allow cartesian products between these\nrelations, or: enable implicit cartesian products by setting the configuration\nvariable spark.sql.crossJoin.enabled=true;\n\tat org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:296)\n\tat org.apache.commons.dbcp2.DelegatingStatement.execute(DelegatingStatement.java:291)\n\tat org.apache.commons.dbcp2.DelegatingStatement.execute(DelegatingStatement.java:291)\n\tat org.apache.zeppelin.jdbc.JDBCInterpreter.executeSql(JDBCInterpreter.java:736)\n\tat org.apache.zeppelin.jdbc.JDBCInterpreter.interpret(JDBCInterpreter.java:819)\n\tat org.apache.zeppelin.interpreter.LazyOpenInterpreter.interpret(LazyOpenInterpreter.java:103)\n\tat org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:632)\n\tat org.apache.zeppelin.scheduler.Job.run(Job.java:188)\n\tat org.apache.zeppelin.scheduler.ParallelScheduler$JobRunner.run(ParallelScheduler.java:162)\n\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:748)<\/pre>\n\n\n\n<p>\u6267\u884c\u7ed3\u679c\u4e2d\uff0c\u9519\u8bef\u4fe1\u606f\u63d0\u793a\uff0cjoin\u5b58\u5728\u95ee\u9898\uff0c\u5b58\u5728\u7b1b\u5361\u5c14\u79ef\u7684\u95ee\u9898\u3002\u4e00\u5f00\u59cb\u770b\u5230\u8fd9\u4e2a\u9519\u8bef\u4fe1\u606f\uff0c\u52a0\u4e0a\u201c<code>Join condition is missing or trivial<\/code>\u201d \uff0c\u6211\u5c31\u6392\u67e5sql\u4e2d\u7684<code>join on<\/code>\u5bf9\u7740\u6ca1\uff0c\u786e\u8ba4\u6ca1\u6709\u4efb\u4f55\u95ee\u9898\u554a\uff0c\u6478\u4e0d\u7740\u5934\u7eea\u3002\u6700\u540e\u624d\u7559\u610f\u5230\u6700\u540e\u603bsql\u4e2d<strong>from\u591a\u8868<\/strong>\u67e5\u8be2\u7684\u95ee\u9898\u3002<\/p>\n\n\n\n<p>\u539f\u672c\u6211\u662f\u60f3\uff0c\u6bcf\u4e2a\u8868\u90fdcount\u4e00\u4e0b\uff0c\u7136\u540e\u591a\u4e2acount\u540e\u7684\u8868\u8054\u5408\u67e5\u8be2\u4e00\u4e0b\uff0c\u5c31\u7b97\u7b1b\u5361\u5c14\u79ef\u4e5f\u662f\u201c1*1*1=1\u201d\uff0c\u4e5f\u4e0d\u5f71\u54cd\u3002<\/p>\n\n\n\n<p>\u4e8e\u662f\uff0c\u8c37\u6b4c\u4e86\u4e00\u4e0b\u9519\u8bef\u4fe1\u606f\u3002\u624d\u7406\u89e3\uff0c\u539f\u6765\u73af\u5883\u4e2d\u642d\u5efa\u7684<strong>ThriftServer<\/strong>\u5176\u5bf9\u5e94<strong>spark<\/strong>\u914d\u7f6e\u5e76\u6ca1\u6709\u5bf9\u7b1b\u5361\u5c14\u79ef\u7279\u6b8a\u5904\u7406\uff0c\u4e5f\u5c31\u662f\u4e0a\u9762\u6700\u540e\u4e00\u53e5sql\uff0cfrom\u591a\u8868\u6ca1\u6709where\u6761\u4ef6\uff0c\u4e5f\u5c31\u662f\u5c06\u4ea7\u751f\u7684\u7b1b\u5361\u5c14\u79ef\u76f4\u63a5\u8f93\u51fa\uff0c\u800chive where\u53ef\u80fd\u4f7f\u7528inner join\u65b9\u5f0f\uff08\u8ddfmysql\u539f\u7406\u4e0d\u592a\u4e00\u6837\uff1f\uff1f<strong><em>todo\u5f85\u67e5<\/em><\/strong>\uff09inner join\u7981\u6b62\u4e0d\u5e26\u6709on\u6761\u4ef6\uff0c\u76f4\u63a5\u5c06\u7b1b\u5361\u5c14\u79ef\u8f93\u51fa\uff01\u662f\u53ef\u80fd\u8003\u8651\u5230\uff0c\u5982\u679cjoin\u76f4\u63a5\u8f93\u51fa\u7b1b\u5361\u5c14\u79ef\u7ed3\u679c\uff0c\u80af\u5b9a\u662f\u6570\u636e\u7ed3\u679c\u9519\u8bef\uff0c\u4e0d\u662f\u7406\u60f3\u7684\u7ed3\u679c\u3002\u6240\u4ee5\u8fd9\u91cc\u9519\u8bef\u4fe1\u606f\u4e2d\u6307\u51fa\uff0c\u5efa\u8bae\u4f7f\u7528cross join sql\uff0c\u6216\u5f00\u542fcross join\u53ef\u7528\u7684\u914d\u7f6e\u3002<strong>cross join\u5c31\u662fhive\u4e2d\u53ef\u4ee5\u76f4\u63a5\u8f93\u51fa\u4e24\u4e2a\u8868\u7b1b\u5361\u5c14\u79ef\u7684\u4e00\u79cd\u8fde\u63a5\u67e5\u8be2\u65b9\u5f0f\u3002<\/strong>\u6240\u4ee5\u82e5\u91c7\u7528cross join\u7684\u65b9\u5f0f\u8fd0\u884c\uff0c\u4f60\u53ef\u4ee5\u5c06from\u591a\u8868\u6539\u4e3a\u4e00\u5c42\u5c42cross join\u3002\u4e5f\u53ef\u4ee5\u76f4\u63a5\u8bbe\u7f6ecross join\u914d\u7f6e\u53ef\u7528\uff08<code>spark.sql.crossJoin.enabled=true<\/code>\uff09\uff0c\u5982\u76f4\u63a5\u5728ThriftServer\u7684spark\u914d\u7f6e\u6587\u4ef6\u4e2d\uff0c\u52a0\u4e0a\u8be5\u914d\u7f6e\u9879\u3002\u8fd9\u6837\u518d\u6b21\u6267\u884c\u4e0a\u9762sql\uff0chive\u4f1a\u81ea\u52a8\u4f18\u5316\u4e3across join\u7684\u65b9\u5f0f\u3002\u5f53\u7136\uff0c\u9664\u4e86\u91c7\u7528\u9519\u8bef\u4fe1\u606f\u63d0\u9192\u7684\u65b9\u5f0f\uff0c\u4e5f\u53ef\u4ee5\u589e\u52a0where\u6761\u4ef6\uff08\u6bd5\u7adfwhere hive\u5e94\u8be5\u5bf9\u5e94inner join\uff0c\u9519\u8bef\u4fe1\u606f\u4e2d\uff0c\u4e0d\u5c31\u63d0\u793a\u6ca1\u6709join\u6761\u4ef6\u5417\uff09\uff0c\u6240\u4ee5\u8fd9\u91cc<strong>\u7ed9\u6bcf\u4e2acount\u8868\u589e\u52a0\u4e00\u4e2a\u7528\u4e8e\u6761\u4ef6\u8fc7\u6ee4\u7684\u76f8\u540c\u5217\uff0c\u7136\u540ewhere\u6307\u5b9a\u4e00\u4e0b<\/strong>\uff0c\u5177\u4f53\u6700\u540e\u4e00\u53e5sql\u4fee\u6539\u5982\u4e0b<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"sql\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">select all_count.count as `\u603b\u7528\u6237\u6570`, vip_count.count as `\u5f53\u524dvip\u7528\u6237\u6570`, round(vip_count.count \/ all_count.count, 3) as `\u5f53\u524dvip\u7528\u6237\u5360\u603b\u7528\u6237\u5360\u6bd4`, old_vip_count.count as `\u5386\u53f2vip\u7528\u6237\u6570`, round(old_vip_count.count \/ all_count.count, 3) as `\u5386\u53f2vip\u7528\u6237\u5360\u603b\u7528\u6237\u5360\u6bd4`, (all_count.count - vip_count.count - old_vip_count.count) as `\u666e\u901a\u7528\u6237\u6570`, round((all_count.count - vip_count.count - old_vip_count.count) \/ all_count.count, 3) as `\u666e\u901a\u7528\u6237\u5360\u603b\u7528\u6237\u5360\u6bd4` \nfrom (select count(user_id) as count, 1 as id from old_video_user) all_count, (select count(user_id) as count, 1 as id from vip_user) vip_count, (select count(user_id) as count, 1 as id from old_vip_user) old_vip_count where all_count.id = vip_count.id and all_count.id = old_vip_count.id<\/pre>\n\n\n\n<p>\u518d\u6b21\u5b8c\u6574\u6267\u884c\uff0c\u7ed3\u679c\u6210\u529f\uff01<\/p>\n\n\n\n<hr class=\"wp-block-separator\" \/>\n\n\n\n<p>\u4e00\u822c\u4e0d\u5efa\u8bae\u76f4\u63a5\u8f93\u51fa\u7b1b\u5361\u5c14\u79ef\uff0c\u6211\u9047\u5230\u7684\u8fd9\u4e2a\u60c5\u51b5\uff0c\u521a\u597d\u662f\u5df2\u77e5\u6bcf\u4e2a\u8868\u53ea\u6709\u4e00\u6761\u8bb0\u5f55\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4eca\u5929\u5728\u6267\u884chive sql\u7684\u65f6\u5019\uff0c\u9700\u8981\u4e86\u5947\u602a\u7684join\u9519\u8bef\uff0c\u8fd9\u91cc\u603b\u7ed3\u4e0b \u95ee\u9898sql\u5982\u4e0b \u6267\u884c\u7ed3\u679c \u6267\u884c\u7ed3\u679c\u4e2d\uff0c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":981,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[11],"tags":[120,46],"_links":{"self":[{"href":"https:\/\/blog.humh.cn\/index.php?rest_route=\/wp\/v2\/posts\/980"}],"collection":[{"href":"https:\/\/blog.humh.cn\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.humh.cn\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.humh.cn\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.humh.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=980"}],"version-history":[{"count":2,"href":"https:\/\/blog.humh.cn\/index.php?rest_route=\/wp\/v2\/posts\/980\/revisions"}],"predecessor-version":[{"id":984,"href":"https:\/\/blog.humh.cn\/index.php?rest_route=\/wp\/v2\/posts\/980\/revisions\/984"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/blog.humh.cn\/index.php?rest_route=\/wp\/v2\/media\/981"}],"wp:attachment":[{"href":"https:\/\/blog.humh.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=980"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.humh.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=980"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.humh.cn\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=980"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}