×÷Õߣºoliverdding£¬ÌÚѶ CSIG ²âÊÔ¿ª·¢¹¤³Ìʦ
ÄãÏëÒªµÄ ClickHouse ÓÅ»¯£¬¶¼ÔÚÕâÀï¡£
ClickHouse ÊÇ OLAP£¨Online analytical processing£©Êý¾Ý¿â£¬ÒÔËٶȼû³¤[1]¡£ClickHouse ΪʲôÄÜÕâô¿ì£¿ÓÐÁ½µãÔÒò[2]£º
µ«ÊÇ£¬Êý¾Ý¿âÉè¼ÆÔÙÓÅÔ½Ò²Õü¾È²»ÁË´íÎóµÄʹÓ÷½Ê½£¬±¾ÎÄÒÔ MergeTree ÒýÇæ¼Ò×åΪÀý½²½âÈçºÎ¶Ô²éѯÓÅ»¯¡£
±¾½Ú»ùÓÚ ClickHouse 22.3 °æ±¾·ÖÎö
clickhouser-serverÆô¶¯ºó»áÔÚ while Ñ»·ÖеȴýÇëÇ󣬽ÓÊÕµ½²éѯºó»áµ÷ÓÃexecuteQueryImpl()ÐÐÊý¹¹½¨ AST¡¢ÓÅ»¯²¢Éú³ÉÖ´Ðмƻ® pipeline£¬×îºóÔÚexecuteImpl()ÖжàÏß³ÌÖ´ÐÐ DAG »ñÈ¡½á¹û£¬ÕâÆªÎÄÕÂÖ»¹ØÐÄ SQL Ö´ÐУ¬Ê¡ÂÔµôÍøÂç½»»¥²¿·Ö£¬²éѯִÐÐÁ÷³ÌÈçÏÂͼËùʾ£º
SQL µÄ½âÎöÓÅ»¯ºÍ±àÒëÔÀíϢϢÏà¹Ø£¬±¾½Ú½«°üº¬´óÁ¿±àÒëÔÀíºÍ´úÂëϸ½Ú£¬ÊôÀ©Õ¹ÖªÊ¶¡£
ClickHouse Äõ½ÐèÒªÖ´ÐÐµÄ SQL£¬Ê×ÏÈÐèÒª½« String ¸ñʽµÄ×Ö·û´®½âÎöΪËüÄÜÀí½âµÄÊý¾Ý½á¹¹£¬Ò²¾ÍÊÇ AST ºÍÖ´Ðмƻ®¡£¹¹Ôì AST ²¿·Ö´úÂëÈçÏÂËùʾ£º
//src/Interpreters/executeQuery.cpp
staticstd::tuple<ASTPtr,BlockIO>executeQueryImpl()
{
//¹¹ÔìParser
ParserQueryparser(end,settings.allow_settings_after_format_in_insert);
//½«SQLתΪ³éÏóÓï·¨Ê÷
ast=parseQuery(parser,begin,end,"",max_query_size,settings.max_parser_depth);
//ÉèÖÃqueryµÄÉÏÏÂÎÄ£¬±ÈÈçSETTINGS
...
if(async_insert)
{
...
}else{
//Éú³ÉinterpreterʵÀý
interpreter=InterpreterFactory::get(ast,context,SelectQueryOptions(stage).setInternal(internal));
//interpreterÓÅ»¯AST²¢·µ»ØÖ´Ðмƻ®
res=interpreter->execute();
}
//·µ»Ø³éÏóÓï·¨Ê÷ºÍÖ´Ðмƻ®
returnstd::make_tuple(ast,std::move(res));
}
ÖµµÃÒ»ÌáµÄÊÇ£¬½âÎö SQL Éú³ÉÓï·¨Ê÷ÕâÊDZàÒëÔÀíÖдʷ¨·ÖÎöºÍÓï·¨·ÖÎö²¿·Ö¸²¸ÇµÄÊÂÇé¡£´Ê·¨·ÖÎöÖ»ÊǼòµ¥²ð½âÊý¾ÝÁ÷Ϊһ¸ö¸ö token£¬¶øÓï·¨·ÖÎö·ÖΪ×Ô¶¥ÏòϺÍ×Ôµ×ÏòÉÏÁ½ÖÖ·½Ê½£¬³£¼ûµÄÓï·¨·ÖÎö·½Ê½Ò²ÎªÊÖдÓï·¨·ÖÎö£¨ÍùÍùÊÇ×Ô¶¥ÏòϵÄÓÐÏÞ״̬»ú£¬µÝ¹éϽµ·ÖÎö£©ºÍÓï·¨·ÖÎö¹¤¾ß£¨ÍùÍùÊÇ×Ôµ×ÏòÉÏ£¬Èç Flex¡¢Yacc/Bison µÈ£©¡£
ÊÖдÓï·¨·ÖÎö±ÈÆðÓï·¨·ÖÎö¹¤¾ßÓм¸¸öÓÅÊÆ£¨µ±È»ÒªÐ´µÃºÃµÄÇé¿ö£©£º
ClickHouse ½âÎö SQL µÄº¯ÊýÈçÏÂËùʾ£º
//src/Parsers/parseQuery.cpp
ASTPtrtryParseQuery()
{
//½«SQL²ð·ÖΪtokenÁ÷
Tokenstokens(query_begin,all_queries_end,max_query_size);
IParser::Postoken_iterator(tokens,max_parser_depth);
//½«tokenÁ÷½âÎöΪÓï·¨Ê÷
ASTPtrres;
constboolparse_res=parser.parse(token_iterator,res,expected);
returnres;
}
¿ÉÒÔ¿´µ½ÏȽ« SQL ×Ö·û´®²ð½âΪ token Á÷£¨´Ê·¨·ÖÎö£©£¬ÔÙµ÷ÓÃperser.parse()º¯Êý½øÐÐÓï·¨·ÖÎö£¬ËüµÄʵÏÖÈçÏ£º
//src/Parsers/ParserQuery.cpp
boolParserQuery::parseImpl(Pos&pos,ASTPtr&node,Expected&expected)
{
ParserQueryWithOutputquery_with_output_p(end,allow_settings_after_format_in_insert);
ParserInsertQueryinsert_p(end,allow_settings_after_format_in_insert);
ParserUseQueryuse_p;
ParserSetQueryset_p;
ParserSystemQuerysystem_p;
ParserCreateUserQuerycreate_user_p;
ParserCreateRoleQuerycreate_role_p;
ParserCreateQuotaQuerycreate_quota_p;
ParserCreateRowPolicyQuerycreate_row_policy_p;
ParserCreateSettingsProfileQuerycreate_settings_profile_p;
ParserCreateFunctionQuerycreate_function_p;
ParserDropFunctionQuerydrop_function_p;
ParserDropAccessEntityQuerydrop_access_entity_p;
ParserGrantQuerygrant_p;
ParserSetRoleQueryset_role_p;
ParserExternalDDLQueryexternal_ddl_p;
ParserTransactionControltransaction_control_p;
ParserBackupQuerybackup_p;
boolres=query_with_output_p.parse(pos,node,expected)
||insert_p.parse(pos,node,expected)
||use_p.parse(pos,node,expected)
||set_role_p.parse(pos,node,expected)
||set_p.parse(pos,node,expected)
||system_p.parse(pos,node,expected)
||create_user_p.parse(pos,node,expected)
||create_role_p.parse(pos,node,expected)
||create_quota_p.parse(pos,node,expected)
||create_row_policy_p.parse(pos,node,expected)
||create_settings_profile_p.parse(pos,node,expected)
||create_function_p.parse(pos,node,expected)
||drop_function_p.parse(pos,node,expected)
||drop_access_entity_p.parse(pos,node,expected)
||grant_p.parse(pos,node,expected)
||external_ddl_p.parse(pos,node,expected)
||transaction_control_p.parse(pos,node,expected)
||backup_p.parse(pos,node,expected);
returnres;
}
¿ÉÒÔ·¢ÏÖ ClickHouse ½« Query ·ÖΪÁË 18 ÖÖÀàÐÍ£¨½ØÖ¹ 2022-11-12 ÈÕ£©£¬Ã¿ÖÖ Query ¶¼ÓÐ×Ô¼ºµÄ Parser£¬Í¨¹ý¹Ø¼ü´ÊÆ¥Åä¹¹Ôì AST ÉϵĽڵ㣬×îÖÕÉú³ÉÓï·¨Ê÷¡£µÝ¹éϽµ²¿·Ö³¬¸ÙÁË£¬ÕâÀï¾Í²»ÆÌ¿ª½²¡£
¾¹ýÓï·¨·ÖÎöºóÉú³ÉµÄ AST ²¢²»ÊÇÖ´ÐÐ×îÓŽ⣬ClickHouse °üº¬´óÁ¿»ùÓÚ¹æÔòµÄÓÅ»¯£¨rule based optimization£©£¬Ã¿¸ö Query »á±éÀúÒ»±éÓÅ»¯¹æÔò£¬½«Âú×ãµÄÇé¿ö½øÐв»¸Ä±ä²éѯÓïÒåµØÖØÐ´¡£
ÿһÖÖ Query ÀàÐͶ¼ÓжÔÓ¦µÄ Interpreter£¬ºóÎͼÒÔ Select ²éѯ¾ÙÀý£¬´úÂëÈçÏ£º
//src/Interpreters/InterpreterFactory.cpp
std::unique_ptr<IInterpreter>InterpreterFactory::get()
{
...
if(query->as<ASTSelectQuery>())
{
returnstd::make_unique<InterpreterSelectQuery>(query,context,options);
}
...
ÔÚInterpreterSelectQueryÀàµÄ¹¹Ô캯ÊýÖн« AST ÓÅ»¯¡¢ÖØÐ´£¬´úÂëÏê¼ûsrc/Interpreters/InterpreterSelectQuery.cpp£¬ÕâÀïÖ»»Á÷³Ìͼ£º
ÊÇ·ñ³õʼ»¯ settings ÓÅ»¯ with ÓÅ»¯ joins ν´ÊÏÂÍÆ½« where ÏÂÍÆµ½ prewhere ÊÇ·ñÒªÔÙ´ÎÓÅ»¯¼ì²é storage ȨÏÞÉú³É analysis_result ºÍ result_header
src/Interpreters/InterpreterSelectQuery.cppÎļþInterpreterSelectQuery::executeImpl()·½·¨½«ÓÅ»¯·ÖÎöµÃµ½µÄÖмäÊý¾Ý¸¨ÖúÉú³É×îÖÕµÄÖ´Ðмƻ®£¬´úÂëÈçÏ£º
//src/Interpreters/InterpreterSelectQuery.cpp
voidInterpreterSelectQuery::executeImpl()
{
...
//¸öÈËÀí½âÕë¶ÔEXPLAINPLAN£¬Ö»¹¹½¨Ö´Ðмƻ®²»Ö´ÐÐ
if(options.only_analyze)
{
...
}
else
{
//´Ó´ÅÅ̶ÁÈ¡ËùÐèÁУ¬×¢ÒâÕâÒ»ÐУ¬ºóÎÄÌø×ª½øÈ¥·ÖÎö
executeFetchColumns(from_stage,query_plan);
}
if(options.to_stage>QueryProcessingStage::FetchColumns)
{
//ÔÚ·Ö²¼Ê½Ö´ÐÐQueryʱֻÔÚÔ¶³Ì½ÚµãÖ´ÐÐ
if(expressions.first_stage)
{
//µ±storage²»Ö§³ÖprewhereʱÌí¼ÓFilterStep
if(!query_info.projection&&expressions.filter_info)
{
...
}
if(expressions.before_array_join)
{
...
}
if(expressions.array_join)
{
...
}
if(expressions.before_join)
{
...
}
//¿ÉÑ¡²½Ö裺½«joinkeyתΪһÖµÄsupertype
if(expressions.converting_join_columns)
{
...
}
//Ìí¼ÓJoin
if(expressions.hasJoin())
{
...
}
//Ìí¼Ówhere
if(!query_info.projection&&expressions.hasWhere())
executeWhere(query_plan,expressions.before_where,expressions.remove_where_filter);
//Ìí¼Óaggregation
if(expressions.need_aggregate)
{
executeAggregation(
query_plan,expressions.before_aggregation,aggregate_overflow_row,aggregate_final,query_info.input_order_info);
///Weneedtoresetinputorderinfo,sothatexecuteOrdercan'tuseit
query_info.input_order_info.reset();
if(query_info.projection)
query_info.projection->input_order_info.reset();
}
//×¼±¸Ö´ÐУº
//1.beforewindowsº¯Êý
//2.windowsº¯Êý
//3.afterwindowsº¯Êý
//4.×¼±¸DISTINCT
if(expressions.need_aggregate)
{
//´æÔھۺϺ¯Êý£¬ÔÚwindowsº¯Êý/ORDERBY֮ǰ²»Ö´ÐÐ
}
else
{
//²»´æÔھۺϺ¯Êý
//´æÔÚwindowsº¯Êý£¬Ó¦¸ÃÔÚ³õʼ½ÚµãÔËÐÐ
//²¢ÇÒ£¬ORDERBYºÍDISTINCTÒÀÀµÓÚwindowsº¯Êý£¬ÕâÀïÒ²²»ÄÜÔËÐÐ
if(query_analyzer->hasWindow())
{
executeExpression(query_plan,expressions.before_window,"Beforewindowfunctions");
}
else
{
//ûÓÐwindowsº¯Êý£¬Ö´ÐÐbeforeORDERBY¡¢×¼±¸DISTINCT
assert(!expressions.before_window);
executeExpression(query_plan,expressions.before_order_by,"BeforeORDERBY");
executeDistinct(query_plan,true,expressions.selected_columns,true);
}
}
//Èç¹û²éѯûÓÐGROUP¡¢HAVING£¬ÓÐORDER»òLIMIT£¬»áÔÚÔ¶³ÌÅÅÐò¡¢LIMIT
preliminary_sort();
}
//ÔÚ·Ö²¼Ê½Ö´ÐÐQueryʱֻÔÚ³õʼ½ÚµãÖ´Ðлòoptimize_distributed_group_by_sharding_key¿ªÆôʱ
if(expressions.second_stage||from_aggregation_stage)
{
if(from_aggregation_stage)
{
//Ô¶³Ì½Úµã¾ÛºÏ¹ý£¬ÕâÀïɶҲ²»¸É
}
elseif(expressions.need_aggregate)
{
//´Ó²»Í¬½ÚµãÀÈ¡Êý¾ÝºÏ²¢
if(!expressions.first_stage)
executeMergeAggregated(query_plan,aggregate_overflow_row,aggregate_final);
if(!aggregate_final)
{
//Ö´ÐÐgroupbywithtotals/rollup/cube
...
}
//Ìí¼ÓHaving
elseif(expressions.hasHaving())
executeHaving(query_plan,expressions.before_having,expressions.remove_having_filter);
}
//±¨¸ö´í
elseif(query.group_by_with_totals||query.group_by_with_rollup||query.group_by_with_cube)
throwException("WITHTOTALS,ROLLUPorCUBEarenotsupportedwithoutaggregation",ErrorCodes::NOT_IMPLEMENTED);
//×¼±¸Ö´ÐУº
//1.beforewindowsº¯Êý
//2.windowsº¯Êý
//3.afterwindowsº¯Êý
//4.×¼±¸DISTINCT
if(from_aggregation_stage)
{
if(query_analyzer->hasWindow())
throwException(
"WindowfunctionsdoesnotsupportprocessingfromWithMergeableStateAfterAggregation",
ErrorCodes::NOT_IMPLEMENTED);
}
elseif(expressions.need_aggregate)
{
executeExpression(query_plan,expressions.before_window,
"Beforewindowfunctions");
executeWindow(query_plan);
executeExpression(query_plan,expressions.before_order_by,"BeforeORDERBY");
executeDistinct(query_plan,true,expressions.selected_columns,true);
}
else
{
if(query_analyzer->hasWindow())
{
executeWindow(query_plan);
executeExpression(query_plan,expressions.before_order_by,"BeforeORDERBY");
executeDistinct(query_plan,true,expressions.selected_columns,true);
}
else
{
//Neitheraggregationnorwindows,allexpressionsbefore
//ORDERBYexecutedonshards.
}
}
//Ìí¼Óorderby
if(expressions.has_order_by)
{
//ÔÚ·Ö²¼Ê½²éѯÖУ¬Ã»ÓоۺϺ¯ÊýÈ´ÓÐorderby£¬½«»áÔÚÔ¶¶Ë½Úµãorderby
...
}
//¶àsourceorderbyÓÅ»¯
...
//¶àÌõÁ÷ʱÔÙ´ÎÖ´ÐÐdistinct
if(!from_aggregation_stage&&query.distinct)
executeDistinct(query_plan,false,expressions.selected_columns,false);
//´¦Àílimit
...
//´¦Àíprojection
...
//´¦Àíoffset
...
}
//ÐèÒª×Ó²éѯ½á¹û¹¹½¨set
if(!subqueries_for_sets.empty())
executeSubqueriesInSetsAndJoins(query_plan,subqueries_for_sets);
}
}
ÆäÖÐInterpreterSelectQuery::executeFetchColumns()º¯ÊýÊǶÁÈ¡ËùÐèÁеĽ׶Ρ£´Ó´úÂëÖпÉÒÔ¿´µ½ËüÒ²×öÁ˺ܶàµÄÓÅ»¯£º
count()ÓÅ»¯quotaÏÞÖÆ¿ÉÒÔ¿´µ½£º
count()¡¢count(1)ºÍcount(*)£¬ClickHouse ¶¼ÓÐÓÅ»¯£¬µ«²»Òªcount(any_field)Ë÷ÒýÊÇ ClickHouse ¿ìËÙ²éѯ×îÖØÒªµÄÒ»»·£¬·ÖΪÖ÷¼üË÷Òý£¨sparse indexes£©ºÍÌø±íË÷Òý£¨data skipping indexes£©¡£ÔÚÖ´Ðвéѯʱ£¬Ë÷ÒýÃüÖÐ˳ÐòÈçÏÂͼËùʾ£º
Partition Key MinMax IndexPartitionPrimary Key Sparse IndexData Skipping Indexes
Ïê¼û´úÂ룺
//src/Processors/QueryPlan/ReadFromMergeTree.cpp
MergeTreeDataSelectAnalysisResultPtrReadFromMergeTree::selectRangesToRead()
{
...
try
{
//ʹÓÃpartitionbyѡȡÐèÒªparts
MergeTreeDataSelectExecutor::filterPartsByPartition(...);
//´¦Àí³éÑù
...
//ʹÓÃÖ÷¼üË÷ÒýºÍÌø±íË÷Òý
result.parts_with_ranges=MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes(...);
}
catch(...)
{
...
}
...
}
ÖµµÃ×¢ÒâµÄÊÇ£¬Ö÷¼üµÄ sparse index ʹÓöþ·Ö²éÕÒÖ±½ÓËõС·¶Î§µ½ËùÐèÒªµÄ parts£¬¶øÌø±íË÷Òý¾ÍÐèÒªÔÚÑ¡³öÀ´µÄ parts Àÿ n ¸ö£¨Óû§×Ô¶¨Ò壩granules ¾ÍÐèÒª±È½Ï n ´Î¡£
×î¼Ñʵ¼ù£º
partition by ÐèÒªÒ»¸ö¿ÉÒÔתΪʱ¼äµÄÁУ¬±ÈÈç Datatime¡¢Date »òÕßʱ¼ä´Á£¬¶øÈç¹û primary key ÖÐÒ²ÓÐʱ¼ä×ֶΣ¬¿ÉÒÔʹÓÃͬһ¸ö×ֶαÜÃâ²éѯʱÐèҪͬʱָ¶¨Á½¸öʱ¼ä×ֶΡ£±ÈÈ磺ָ¶¨ÎªÊý¾Ý´¦Àíʱ¼ä¡£
Ê×ÏÈÒª±æÎö part ºÍ partition µÄÇø±ð£¬ClickHouse Ó¦ÓòãÃæ¶¨ÒåÁË partition£¬Óû§Ö¸¶¨ partition by ¹Ø¼ü´ÊÉèÖò»Í¬µÄ partition£¬µ«ÊÇ partition Ö»ÊÇÂß¼·ÖÇø¡£ÕæÕý´æ´¢µ½´ÅÅÌʱ°´ part À´´æ´¢£¬Ã¿Ò»¸ö part Ò»¸öÎļþ¼Ð£¬ÀïÃæ´æ´¢²»Í¬×ֶεÄ.mrkºÍ.binÎļþ£¬ÒÔ¼°Ò»¸öminmax_{PARTITION_KEY_COLUMN}.idxÎļþ£¬²»Í¬ part µÄ minmax ×÷Ϊһ¸öË÷Òý´æ´¢ÓÚÄÚ´æ¡£
µ±²éѯµÄ WHERE ´øÓÐ partition key ʱ£¬Ê×ÏÈ»á±È½Ïÿһ¸ö part µÄ minmax Ë÷Òý¹ýÂ˲»Ïà¹Ø parts¡£Ö®ºóÔÙ¸ù¾Ý PARTITION BY ¶¨ÒåµÄ¹æÔò¹ýÂ˲»Ïà¹Ø partition¡£
¿ÉÊÇ partition ²»ÊÇԽСԽºÃ¡£
partitioning ²¢²»»á¼ÓËÙ²éѯ£¨ÓÐÖ÷¼ü´æÔÚ£©£¬¹ýСµÄ partition ·´¶ø»áµ¼Ö´óÁ¿µÄ parts ÎÞ·¨ºÏ²¢£¨MergeTree ÒýÇæ¼Ò×å»áÔÚºǫ́²»¶ÏºÏ²¢ parts£©£¬ÒòΪÊôÓÚ²»Í¬ partition µÄ parts ÎÞ·¨ºÏ²¢¡£[5]
×î¼Ñʵ¼ù[6]£º
Ö÷¼üÊÇ ClickHouse ×îÖØÒªµÄË÷Òý£¬Ã»ÓÐÖ®Ò»¡£ºÃµÄÖ÷¼üÓ¦¸ÃÄÜÓÐЧÅųý´óÁ¿Î޹صÄÊý¾Ý granules£¬¼õÉÙ´ÅÅ̶ÁÈ¡µÄ×Ö½ÚÊý¡£
ÏȽ²¼¸¸öÖ÷¼üµÄ±³¾°ÖªÊ¶£º
index_granularityÉèÖõĴóС£¨Ä¬ÈÏ 8192£©½«Êý¾Ý·ÖΪһ¸ö¸ö granules[7].binÎļþ´æ´¢ÁË n ¸ö granules£¬ÓÃ.mrkÎļþ¼Ç¼ÿһ¸ö granules ÔÚ.binÎļþµÄµØÖ·Æ«ÒÆ[11]Ö÷¼üµÄÑ¡ÔñÓ¦¸Ã¾¡¿ÉÄÜ¿¼ÂÇÖÜÈ«£¬ÒòΪÖ÷¼üÊÇÎÞ·¨Ð޸ĵģ¬Ö»Äܽ¨Ð±íºóÊý¾ÝÇ¨ÒÆ¡£
×î¼Ñʵ¼ù[12]£¨Õë¶Ô(Replicated)MergeTree ÒýÇæ£©£º
×îºóÒ»²½ÊÇÌø±íË÷Òý£¬Õâ¸öûÓÐÌ«¶à¿ÉÒÔ½²µÄµØ·½£¬ºÍÆäËûÊý¾Ý¿âÏàͬ£¬Ìø±íË÷ÒýÓÃÓÚ¾¡Á¿¼õÉÙ¶ÁÈ¡µÄÐÐÊý¡£¾ßÌå²Î¿´¹Ù·½Îĵµ¡£
ÅäÖÃÓÅ»¯·ÖΪÁ½²¿·Ö£¬È«¾ÖÅäÖÃÓÅ»¯ºÍ MergeTree ±íÅäÖÃÓÅ»¯¡£
²Î¿´AltinityÑ¡ÔñÐÔÅäÖÃÓÅ»¯Ïî¡£
ÕâÀïдÈý¸öÍÆ¼öµÄÅäÖãº
force_index_by_dateºÍforce_primary_key±ÜÃâÈ«Å̶ÁÈ¡ttl_only_drop_parts±íÅäÖóýÁËÈ«¾ÖÅäÖã¬MergeTree ÒýÇæ¼Ò×åÿÕűíÒ²ÓÐ×Ô¼ºµÄÅäÖÃÏî¡£[13]
ÍÆ¼öÉèÖÃÈçÏÂÅäÖãº
ttl_only_drop_parts=1¡£Ö»ÓÐ parts ÖÐËùÓÐÊý¾Ý¶¼¹ýÆÚÁ˲Żá DROP£¬¿ÉÒÔÓÐЧ¼õÉÙTTL_MERGE·¢ÉúµÄƵÂÊ£¬½µµÍ´ÅÅ̸ºÔØ¡£merge_with_ttl_timeout=86400¡£ÅäºÏÉÏÒ»ÏîÅäÖ㬽« TTL ¼ì²éµ÷ÕûΪ 1 ÌìÒ»´Î£¨Ä¬ÈÏ 4 Сʱһ´Î£©¡£use_minimalistic_part_header_in_zookeeper=1¡£¿ÉÒÔÓÐЧ½µµÍ Zookeeper ¸ºÔØ£¬±ÜÃâ Zookeeeper ³ÉΪÐÔÄÜÆ¿¾±£¨²åÈ룩¡£³ýÁËË÷Òý¡¢·ÖÇøºÍÅäÖÃÍ⣬»¹Óбí×ֶοÉÒÔÓÅ»¯¡£½ÓÏÂÀ´½«½²Êö Schema ÀàÐÍ¡¢CODEC ºÍ»º´æÈý¸ö·½Ãæ¡£
×¢Ò⣬¾¡Á¿±ÜÃâʹÓà Null£¬ÔÚ ClickHouse ÖÐ Null »áÓÃÒ»¸öµ¥¶À Null masks Îļþ´æ´¢ÄÄЩÐÐΪ Null[14]£¬Òò´Ë¶Áȡij¸öÆÕͨ×Ö¶ÎÖ»ÐèÒª.binºÍ.mrkÁ½¸öÎļþ£¬¶ø¶ÁÈ¡ Nullable ×Ö¶ÎʱÐèÒª.bin¡¢.mrkºÍ masks Îļþ¡£ÉçÇø²éѯÑéÖ¤£¬×î¸ß»áÓÐ 2 ±¶ÐÔÄÜËðʧ¡£[15]
ʹÓà ClickHouse ´æ´¢Ê±£¬Ò»°ãÓû§¶¼»á´´½¨´ó¿í±í£¬°üº¬´óÁ¿ÊýÖµ¡¢×Ö·û´®ÀàÐ͵Ä×ֶΡ£ÕâÀïÌá¼°Á½ÖÖ Schema ÀàÐÍ[16]£¬Ã»ÓÐÄĸö¸üÓÅÔ½£¬ÓɶÁÕßÖ´ÐÐÆÀ¹ÀÒµÎñÊʺÏÄÄÒ»ÖÖ¡£
ÕâÊÇÎÒÃÇÖ÷±íÕýÔÚʹÓõÄÀàÐÍ£¬½«¿ÉÄÜÓõ½µÄ×Ö¶ÎÔ¤ÁôƽÆÌ£¬³ýÁËһϵÁлù´¡×Ö¶ÎÍ⣬Ôö¼Ó´óÁ¿metric1, metric2...metricNºÍtag1, tag2...tagNµÈµÈ×ֶΡ£
Óŵ㣺
ȱµã£º
ÕâÊÇÎÒÃÇ ctree ¹¦ÄÜÕýÔÚʹÓõÄÀàÐÍ¡£½«ÒµÎñ×Ö¶ÎÈûÈëǶÌ×Êý¾ÝÀàÐÍÖУ¬±ÈÈç array¡¢nested struct ºÍ map¡£ºóÎÄÒÔ array ¾ÙÀý£ºmetric_array¡¢tag_array¡£
Óŵ㣺
ȱµã£º
¹ØÓÚ Schema Éè¼ÆÕâÀ¶ÁÕß¿ÉÒÔ¿¼ÂÇ 28 ÔÔò£¬ÀíÂÛÉÏ 80%²éѯֻ»áÓõ½ 20%µÄÒµÎñ×ֶΣ¬Òò´Ë¿ÉÒÔ½«Ê¹ÓÃÆµÂʸߵÄÒµÎñ×Ö¶ÎÆ½ÆÌ£¬½«Ê¹ÓÃÆµÂʵ͵Ä×ֶηÅÈëǶÌ׽ṹÖС£
CODEC ·ÖΪѹËõËã·¨ CODEC¡¢´æ´¢¸ñʽ CODEC ºÍ¼ÓÃÜ CODEC£¬Ò»°ã¿ÉÒÔ×éºÏÒ»ÆðʹÓá£ÔÚ ClickHouse ÖУ¬Î´ÏÔʾָ¶¨ CODEC µÄ×ֶζ¼»á±»·ÖÅäÒ»¸ö DEFAULT ĬÈÏ CODEC LZ4£¨³ý·ÇÓû§ÐÞ¸Ä clickhouse ÅäÖà compression ²¿·Ö[17]£©¡£
ѹËõËã·¨ CODEC µÄÑ¡ÔñÊÇÒ»¸öƽºâ°åÎÊÌ⣬¸ü¸ßµÄѹËõ¶È¿ÉÒÔÓиüÉÙµÄ IO µ«ÊǸü¸ßµÄ CPU£¬¸üµÍµÄѹËõ¶ÈÓиü¶àµÄ IO µ«ÊǸüÉÙµÄ CPU¡£ÕâÐèÒª¶ÁÕ߸ù¾Ý²¿Êð»úÆ÷ÅäÖÃ×ÔÐÐÑ¡ÔñºÏÊʵÄѹËõËã·¨ºÍѹËõµÈ¼¶¡£
ÕâÀïÌṩÁ½¸öÅжϲßÂÔ£º
´æ´¢¸ñʽ CODEC Ö÷ÒªÊÇDelta¡¢DoubleDelta¡¢Gorilla¡¢FPCºÍT64¼¸ÖÖ¡£
Delta´æ´¢ÐÐÖ®¼äµÄ±ä»¯Öµ£¬Êʺϱ仯½ÏСÇұȽϹ̶¨µÄÁУ¬±ÈÈçʱ¼ä´Á¡£ÐèÒªÅäºÏ ZSTD ʹÓÃDoubleDelta´æ´¢DeltaµÄDelta¡£Êʺϱ仯ºÜÂýµÄÐòÁÐGorillaÊʺϲ»Ôõô±ä¶¯µÄ integer¡¢float ÀàÐÍ[18]FPCÊʺÏÓÚ float ÀàÐÍ£¬ÓÉÓÚÎÒÃÇδʹÓà float ×Ö¶ÎÕâÀïÂÔ¹ýT64´æ´¢±àÂ뷶ΧÄÚ×î´ó¡¢×îСֵ£¬ÒÔתΪ 64bit ´æ´¢£¬ÊʺϽÏСµÄ integer ÀàÐÍÀ©Õ¹ÔĶÁ£º
mark_cache_size¿ÉÒÔµ÷Õû.mrkÎļþµÄ»º´æ´óС£¬Ä¬ÈÏΪ 5GB¡£Êʵ±µ÷´ó¿ÉÒÔ¼õÉÙ²éѯʱ IO ´ÎÊý£¬ÓÐЧ½µµÍ´ÅÅÌѹÁ¦¡£[19]
.mrkÎļþÔ½´óindex_granularityÓë.mrkÎļþ´óС³É¸ºÏà¹Ø¿ÉÒÔͨ¹ýÈçÏ SQL ²éѯµ±Ç°ËùÓбíµÄ parts ÐÅÏ¢£º
SELECT
database,
table,
count()ASparts,
uniqExact(partition_id)ASpartition_cnt,
sum(rows),
formatReadableSize(sum(data_compressed_bytes)AScomp_bytes)AScomp,
formatReadableSize(sum(data_uncompressed_bytes)ASuncomp_bytes)ASuncomp,
uncomp_bytes/comp_bytesASratio,
formatReadableSize(sum(marks_bytes)ASmark_sum)ASmarks,
mark_sum/uncomp_bytesASmark_ratio
FROMcluster(default_cluster,system.parts)
WHEREactive
GROUPBY
database,
table
ORDERBYcomp_bytesDESC
¿ÉÒÔͨ¹ýÈçϲéѯ»ñÈ¡µ±Ìì mrk »º´æÃüÖÐÇé¿ö£º
WITH(ProfileEvents.Values[indexOf(ProfileEvents.Names,'MarkCacheHits')])ASMARK_CACHE_HITS
SELECT
toHour(event_time)AStime,
countIf(MARK_CACHE_HITS!=0)AShit_query_count,
count()AStotal_query_count,
hit_query_count/total_query_countAShit_percent,
avg(MARK_CACHE_HITS)ASaverage_hit_files,
min(MARK_CACHE_HITS)ASminimal_hit_files,
max(MARK_CACHE_HITS)ASmaximal_hit_files,
quantile(0.5)(MARK_CACHE_HITS)AS"50",
quantile(0.9)(MARK_CACHE_HITS)AS"90",
quantile(0.99)(MARK_CACHE_HITS)AS"99"
FROMclusterAllReplicas('default_cluster',system.query_log)
WHEREevent_date=toDate(now())
AND(type=2ORtype=4)
ANDquery_kind='Select'
GROUPBYtime
ORDERBYtimeASC
ÒÔ¼°Èçϲéѯ»ñÈ¡µ±Ç° mrk »º´æÄÚ´æÕ¼ÓÃÇé¿ö£º
SELECTformatReadableSize(value)
FROMasynchronous_metrics
WHEREmetric='MarkCacheBytes'
ÒÔ¼° mrk »º´æ¾ßÌ建´æ¶àÉÙÎļþ£º
SELECTvalue
FROMasynchronous_metrics
WHEREmetric='MarkCacheFiles'
³ý´ËÖ®Í⣬ClickHouse »¹¿ÉÒÔµ÷Õûuncompressed_cache»º´æÒ»¶¨Á¿ÔʼÊý¾ÝÓÚÄÚ´æÖС£[20]µ«ÊÇÕâ¸ö»º´æÖ»¶Ô´óÁ¿¶Ì²éѯÓÐЧ£¬¶ÔÓÚ OLAP À´Ëµ£¬²éÑ¯Ç§Ææ°Ù¹Ö£¬²»Ì«½¨Òéµ÷ÕûÕâ¸öÅäÖá£
µ½ÁË×îÄѵIJ¿·Ö£¬ÓÉÓÚ½ÓÏÂÀ´µÄ²¿·ÖºÍ²»Í¬ÒµÎñϢϢÏà¹Ø£¬ÎªÁ˽²½âÎÒÃÇÒµÎñÉϵÄÓÅ»¯£¬ÎÒÏȽéÉÜÏÂÎÒÃÇÒµÎñÇé¿ö£º
QAPM Ö÷´òÓ¦ÓÃÐÔÄÜ¼à¿Ø£¬Ö÷Òª·ÖΪָ±ê¡¢¸öÀýÁ½ÕÅ±í¡£¸öÀý±í°üº¬¸ü¶à»ù´¡×ֶΣ¬Ò»°ãÓû§Õ¹Ê¾£»Ö¸±ê±íÖ÷ÒªÓÃÓھۺϼÆËã¡£
Ê×ÏÈÈ·¶¨Ö÷¼ü£¬ÎãÓ¹ÖÃÒɵÄǰÁ½¸öÒ»¶¨ÊÇ
Ö¸±êûÓÐÌØÕ÷¼üÖµ£¬Òò´ËÖ»Ìí¼Ó´¦Àíʱ¼ä×÷ΪµÚÈý¸öÖ÷¼ü¡£
¶ÔÓÚÖ¸±ê±í£¬ÉèÖõÄÖ÷¼üΪ£ºapp_id, category, entrance_time
¸öÀý´æÔÚÌØÕ÷ feature£¬ÓÉÓÚ£º
½«ÌØÕ÷µÄ md5 Ôö¼Óµ½Ö÷¼üÖУ¬ÓÃÓÚ¼ÓËÙ²éѯ¡¢Ìá¸ßѹËõÂÊ¡£µ«ÊÇÕâÀïÓÐÁ½¸ö·½Ïò£º
app_id, category, intDiv(entrance_time, 3600000), feature_md5app_id, category, feature_md5, entrance_time·ÖÇø¼üÉèÖÃΪ`PARTITIONBYintDiv(entrance_time,2592000000)
¼øÓÚSAMPLE BYÐèÒª½« xxHash ×ֶηÅÔÚÖ÷¼üÖУ¬Ö÷¼ü¶¼°üº¬¸ß»ùÊý×ֶΣ¬¾Í²»ÉèÖóéÑù¼ü£¬¶øÊÇÔÚÐèÒªµÄʱºòÈí³éÑù[21]£º
SELECTcount()FROMtableWHERE...ANDcityHash64(some_high_card_key)%10=0;--Deterministic
SELECTcount()FROMtableWHERE...ANDrand()%10=0;--Non-deterministic
Êý¾Ý²åÈë¿´ÆðÀ´ºÍ²éѯÐÔÄÜûʲôÁªÏµ£¬µ«ÊÇÓмä½ÓÓ°Ïì¡£²»ºÏÀíµÄ²åÈë»áµ¼Ö¸ü¶àµÄдÅÌ¡¢¸ü¶àµÄÊý¾Ý merge ÉõÖÁÓпÉÄܲåÈëʧ°Ü£¬Ó°Ïì¶ÁÅÌÐÔÄÜ¡£
ClickHouse ×÷Ϊ OLAP ²¢²»ÊʺÏСÅúÁ¿¡¢´ó²¢·¢Ð´È룬Ïà·´¶øÊʺϴóÅúÁ¿¡¢Ð¡²¢·¢Ð´È룬¹Ù·½½¨Òé²åÈëÊý¾ÝÿÅú´ÎÖÁÉÙ 1000 ÐУ¬»òÕßÿÃëÖÓ×î¶à 1 ´Î²åÈë¡£[22]
ÕâһС½ÚÎÒÏëÇ¿µ÷Ô×Ó£¨Atomic Insert£©Ð´ÈëµÄ¸ÅÄһ´Î²åÈë´´½¨Ò»¸öÊý¾Ý part¡£
ǰÎÄÌá¼°£¬ClickHouse Ò»¸ö part ÊÇÒ»¸öÎļþ¼Ð£¬ºǫ́Óиö merge Ï̳߳ز»¶Ï merge ²»Í¬µÄ part¡£Ô×Ó²åÈë¿ÉÒÔ¼õÉÙ merge ´ÎÊý£¬Èà ClickHouse ¸ºÔظüµÍ£¬ÐÔÄܸüºÃ¡£
Ô×ÓдÈëµÄ³ä·ÖÌõ¼þ[23]£º
MergeTree±í£¨²»ÄÜÓÐ Buffer ±í£©max_insert_block_size£¨Ä¬ÈÏ 1048545£©input_format_parallel_parsing=0max_block_sizemin_insert_block_size_rows and min_insert_block_size_bytesMergeTree±í²»°üº¬ÎﻯÊÓͼÕâÀïÌùÒ»ÏÂÎÒÃÇÉú²úµÄÅäÖã¨users.xml£©¡£
¾¹ýͳ¼Æ£¬¸öÀý±íÿÐдóÔ¼ 2KB£¬Ö¸±ê±íÿÐдóÔ¼ 100B£¨Î´Ñ¹Ëõ£©¡£
ÉèÖÃmin_insert_block_size_rowsΪ 10000000£¬Ö¸±ê»áÏÈÂú×ãÕâ¸öÌõ¼þ£¬´ó¸ÅÒ»¸ö block Ôʼ´óС 1GB¡£ÉèÖÃmin_insert_block_size_bytesΪ 4096000000£¬¸öÀý»áÏÈÂú×ãÕâ¸öÌõ¼þ£¬´ó¸ÅÒ»¸ö block Ôʼ´óС 1G£¬Ô¼ 1024000 ÐС£
ÕâÈý¸öÅäÖÃÏîÊǿͻ§¶ËÅäÖã¬ÐèÒªÔÚ²åÈëµÄ session ÖÐÉèÖ㬶ø²»ÊÇÔÚÄǼ¸¸ö.xmlÖÐÅäÖá£
max_insert_block_size:16777216
input_format_parallel_parsing:0
min_insert_block_size_rows:10000000
min_insert_block_size_bytes:1024000000
×¢Ò⣬min_insert_block_size_rowsºÍmin_insert_block_size_bytesÊÇ¡°»ò¡±µÄ¹ØÏµ£º
//src/Interpreters/SquashingTransform.cpp
boolSquashingTransform::isEnoughSize(size_trows,size_tbytes)const
{
return(!min_block_size_rows&&!min_block_size_bytes)
||(min_block_size_rows&&rows>=min_block_size_rows)
||(min_block_size_bytes&&bytes>=min_block_size_bytes);
}
£º±¾·½°¸²¢Ã»Óо¹ýÉú²úÑéÖ¤£¬×ÃÇ鿼ÂÇ
ClickHouse ÓÐ Shard ºÍ Replica ¿ÉÒÔÅäÖã¬×÷ÓÃÈçÏÂͼËùʾ£º
Ëùν¶Áд·ÖÀëÒ²¾ÍÊǽ« Shard ·ÖΪÁ½°ë£¬Ò»°ëÖ»ÓÃÓÚ²éѯ£¬Ö»ÒªÈ÷ֲ¼Ê½±í²éѯ¶¼µ¼Èëµ½ Shard1 ¼´¿É£¨ÔÚusers.xmlÖÐÅäÖÃload_balancingΪfirst_or_random£©£»Ò»°ëÓÃÓÚдÈ룬²åÈëµÄ³ÌÐòÊÖ¶¯¿ØÖƲåÈë Shard2 µÄ½Úµã£¬ÓÉ ClickHouse µÄ ReplicatedMergeTree ²»Í¬ Shard Êý¾ÝÒÀ¿¿ zookeeper ×Ô¶¯Í¬²½µÄ²ßÂÔ½«Êý¾Ýͬ²½µ½ Shard1¡£[24]
ÕâÖÖ²ßÂÔÓÐÌìÈ»µÄȱÏÝ£º
£º»òÐí¿ÉÒÔÅäÖÃÁ½±ß Shard ×ÊÔ´²»Ò»ÖÂÀ´½â¾öÎÊÌ⣬±ÈÈçдÈëµÄ Shard ×ÊÔ´ÀµÍ£¬×¨ÓÃÓÚ´¦ÀíÊý¾Ý²åÈ룻¶ÁµÄ Shard ×ÊÔ´¸ü¸ß£¬×¨ÃÅÓÃÓÚ´¦ÀíÍ»Ôö²¢·¢Á÷Á¿¡£
Buffer ²¢²»ÍƼö³£¹æÒµÎñʹÓã¬Ö»ÓÐÔÚÆÈÇÐÐèÒª²éѯʵʱÐÔ+²åÈëÎÞ·¨´óÅúÁ¿Ô¤¾ÛºÏʱʹÓãº
¸Ã²¿·Ö´ý²¹³ä£¬Ïë¿´µÄͬѧ¿ÉÒÔÔÚÆÀÂÛÇøÌßÌß
Ô¤¾ÛºÏÓÐÈýÖÖ·½·¨£¬ETL¡¢ÎﻯÊÓͼºÍͶӰ£¬ËûÃǵÄÇø±ðÈçÏÂ[26]£º
| ETL | MV | Projections | |
|---|---|---|---|
| Realtime | no | yes | yes |
| How complex queries can be used to build the preaggregaton | any | complex | very simple |
| Impacts the insert speed | no | yes | yes |
| Are inconsistancies possible | Depends on ETL. If it process the errors properly - no. | yes (no transactions / atomicity) | no |
| Lifetime of aggregation | any | any | Same as the raw data |
| Requirements | need external tools/scripting | is a part of database schema | is a part of table schema |
| How complex to use in queries | Depends on aggregation, usually simple, quering a separate table | Depends on aggregation, sometimes quite complex, quering a separate table | Very simple, quering the main table |
| Can work correctly with ReplacingMergeTree as a source | Yes | No | No |
| Can work correctly with CollapsingMergeTree as a source | Yes | For simple aggregations | For simple aggregations |
| Can be chained | Yes (Usually with DAGs / special scripts) | Yes (but may be not straightforward, and often is a bad idea) | No |
| Resources needed to calculate the increment | May be signigicant | Usually tiny | Usually tiny |
ÔÚÎÒÃÇÒµÎñÖУ¬¸öÀýÊDz»Ó¦¸ÃÔ¤¾ÛºÏµÄ£¬ÒòΪÊý¾ÝÐèÒª±»Àȡչʾ¶ø²»ÓüÆËã¡£Ö¸±êÐèÒª¾ÛºÏ£¬Êý¾ÝÁ¿½Ï´ó£¬Ã¿´Îʵʱ¼ÆËã¶Ô ClickHouse ¸ºÔØÌ«´ó¡£
Æäʵ»¹ÓÐÒ»Ö־ۺϷ½Ê½£¬¹ýÆÚÊý¾Ý¾ÛºÏ¡£¿ÉÒԲο¼£¬Í¬ÑùÏÞÖÆÒªÇó group by µÄ¼üֵΪÖ÷¼üǰ׺¡£
ÔÚÎÒÃÇÒµÎñʹÓÃʱ£¬Ê²Ã´Ê±ºòÓÃÄÄÒ»¸öÄØ£¿
×îºó£¬ÎªÁ˱ÜÃ⼯Ⱥ±»Ä³¸ö²éѯ¡¢²åÈëŪ¿å£¬ÐèÒªºÏÀí°²ÅÅÄÚ´æÊ¹Óã¬ÐèÒª¸ø·ÃÎÊÕË»§·ÖȨÏÞ£¬ÔÚÎÒÃÇÒµÎñ·ÖΪ£º
default£º×î¸ß¼¶Õ˺ţ¬²»Ê¹ÓÃroot£ºÊý¾Ý²åÈ룬ÅäÖþۺÏдÈ벿·ÖµÄ¼¸¸öÅäÖÃÏîmonitor£ºÄÚ²¿¿ª·¢Ê¹Óã¬È¨Ï޽ϸßviewer£ºweb ʹÓã¬Ìí¼Ó´óÁ¿ÏÞÖÆviewerÕË»§ÅäÖÃÈçÏÂËùʾ£º
<yandex>
<profiles>
<query>
<max_memory_usage>10000000000</max_memory_usage>
<max_memory_usage_for_all_queries>100000000000</max_memory_usage_for_all_queries>
<max_rows_to_read>1000000000</max_rows_to_read>
<max_bytes_to_read>100000000000</max_bytes_to_read>
<max_rows_to_group_by>1000000</max_rows_to_group_by>
<group_by_overflow_mode>any</group_by_overflow_mode>
<max_rows_to_sort>1000000</max_rows_to_sort>
<max_bytes_to_sort>1000000000</max_bytes_to_sort>
<max_result_rows>100000</max_result_rows>
<max_result_bytes>100000000</max_result_bytes>
<result_overflow_mode>break</result_overflow_mode>
<max_execution_time>60</max_eecution_time>
<min_execution_speed>1000000</min_execution_speed>
<timeout_before_checking_execution_speed>15</timeout_before_checking_execution_speed>
<max_columns_to_read>25</max_columns_to_read>
<max_temporary_columns>100</max_temporary_columns>
<max_temporary_non_const_columns>50</max_temporary_non_const_columns>
<max_subquery_depth>2</max_subquery_depth>
<max_pipeline_depth>25</max_pipeline_depth>
<max_ast_depth>50</max_ast_depth>
<max_ast_elements>100</max_ast_elements>
<readonly>1</readonly>
</query>
</profiles>
</yandex>
ͬʱ½¨ÒéÉèÖà quota£¬¼õÉÙ´óÁ¿¶ÁÅ̼ÆËã¡¢LIMIT ÉÙÁ¿Êý¾Ý·µ»ØµÄÇé¿ö·¢Éú¡£
ÎÒÃÇÊÇ CSIG ÐÔÄܹ¤³Ì¶þ×é QAPM ÍŶӣ¬QAPM ʱһ¿îÓ¦ÓÃÐÔÄÜ¼à¿Ø¹¤¾ß£¬¸²¸Ç android¡¢ios¡¢Ð¡³ÌÐò¡¢mac ºÍ win ¶à¶Ë£¬ÒÑÓÐÌÚѶ»áÒé¡¢ÓÅÒ¿âµÈ´óÓû§½ÓÈ룬ֵµÃÐÅÀµ£¬»¶ÓͬÊÂÊÔÓÃÎÒÃÇ QAPM ²úÆ·¡«Ìø×ªÁ´½Ó
ÔÚ ClickHouse ÓÅ»¯¹ý³ÌÓöµ½ÎÞÊýµÄÎÊÌ⣬¿¨ÔÚ ClickHouse ×ÔÉí¼à¿ØÎÞ·¨¸²¸ÇµÄ½ÇÂäʱ£¬È«¿¿ÐÔÄܹ¤³ÌÈý×éÔ±¹¤µÄ Drop£¨ÓêµÎ£©¹¤¾ßµÄ¶¦Á¦ÏàÖú£¬¸ßЧֱ¹Û¼à¿Ø CVM ¸÷ÏîÖ¸±ê£¬½