• ÔªÓîÖæ£º±¾Õ¾·ÖÏíÔªÓîÖæÏà¹Ø×ÊѶ£¬×ÊѶ½ö´ú±í×÷Õß¹ÛµãÓëÆ½Ì¨Á¢³¡ÎÞ¹Ø,½ö¹©²Î¿¼.

ClickHouse ²éѯÓÅ»¯Ïêϸ½éÉÜ

  • ÌÚѶ¼¼Êõ¹¤³Ì
  • 2023Äê1ÔÂ06ÈÕ06ʱ


×÷Õߣºoliverdding£¬ÌÚѶ CSIG ²âÊÔ¿ª·¢¹¤³Ìʦ

ÄãÏëÒªµÄ ClickHouse ÓÅ»¯£¬¶¼ÔÚÕâÀï¡£

ClickHouse ÊÇ OLAP£¨Online analytical processing£©Êý¾Ý¿â£¬ÒÔËٶȼû³¤[1]¡£ClickHouse ΪʲôÄÜÕâô¿ì£¿ÓÐÁ½µãÔ­Òò[2]£º

  • ¼Ü¹¹ÓÅÔ½
    • ÁÐʽ´æ´¢
    • Ë÷Òý
    • Êý¾ÝѹËõ
    • ÏòÁ¿»¯Ö´ÐÐ
    • ×ÊÔ´ÀûÓÃ
  • ¹Ø×¢µ×²ãϸ½Ú

µ«ÊÇ£¬Êý¾Ý¿âÉè¼ÆÔÙÓÅÔ½Ò²Õü¾È²»ÁË´íÎóµÄʹÓ÷½Ê½£¬±¾ÎÄÒÔ MergeTree ÒýÇæ¼Ò×åΪÀý½²½âÈçºÎ¶Ô²éѯÓÅ»¯¡£

ClickHouse ²éѯִÐйý³Ì

±¾½Ú»ùÓÚ ClickHouse 22.3 °æ±¾·ÖÎö

clickhouser-serverÆô¶¯ºó»áÔÚ while Ñ­»·ÖеȴýÇëÇ󣬽ÓÊÕµ½²éѯºó»áµ÷ÓÃexecuteQueryImpl()ÐÐÊý¹¹½¨ AST¡¢ÓÅ»¯²¢Éú³ÉÖ´Ðмƻ® pipeline£¬×îºóÔÚexecuteImpl()ÖжàÏß³ÌÖ´ÐÐ DAG »ñÈ¡½á¹û£¬ÕâÆªÎÄÕÂÖ»¹ØÐÄ SQL Ö´ÐУ¬Ê¡ÂÔµôÍøÂç½»»¥²¿·Ö£¬²éѯִÐÐÁ÷³ÌÈçÏÂͼËùʾ£º

SQL µÄ½âÎöÓÅ»¯ºÍ±àÒëÔ­ÀíϢϢÏà¹Ø£¬±¾½Ú½«°üº¬´óÁ¿±àÒëÔ­ÀíºÍ´úÂëϸ½Ú£¬ÊôÀ©Õ¹ÖªÊ¶¡£

´Ê·¨½âÎöºÍÓï·¨½âÎö

ClickHouse Äõ½ÐèÒªÖ´ÐÐµÄ SQL£¬Ê×ÏÈÐèÒª½« String ¸ñʽµÄ×Ö·û´®½âÎöΪËüÄÜÀí½âµÄÊý¾Ý½á¹¹£¬Ò²¾ÍÊÇ AST ºÍÖ´Ðмƻ®¡£¹¹Ôì AST ²¿·Ö´úÂëÈçÏÂËùʾ£º

//src/Interpreters/executeQuery.cpp

staticstd::tuple<ASTPtr,BlockIO>executeQueryImpl()
{
//¹¹ÔìParser
ParserQueryparser(end,settings.allow_settings_after_format_in_insert);

//½«SQLתΪ³éÏóÓï·¨Ê÷
ast=parseQuery(parser,begin,end,"",max_query_size,settings.max_parser_depth);

//ÉèÖÃqueryµÄÉÏÏÂÎÄ£¬±ÈÈçSETTINGS
...

if(async_insert)
{
...
}else{
//Éú³ÉinterpreterʵÀý
interpreter=InterpreterFactory::get(ast,context,SelectQueryOptions(stage).setInternal(internal));

//interpreterÓÅ»¯AST²¢·µ»ØÖ´Ðмƻ®
res=interpreter->execute();
}

//·µ»Ø³éÏóÓï·¨Ê÷ºÍÖ´Ðмƻ®
returnstd::make_tuple(ast,std::move(res));
}

ÖµµÃÒ»ÌáµÄÊÇ£¬½âÎö SQL Éú³ÉÓï·¨Ê÷ÕâÊDZàÒëÔ­ÀíÖдʷ¨·ÖÎöºÍÓï·¨·ÖÎö²¿·Ö¸²¸ÇµÄÊÂÇé¡£´Ê·¨·ÖÎöÖ»ÊǼòµ¥²ð½âÊý¾ÝÁ÷Ϊһ¸ö¸ö token£¬¶øÓï·¨·ÖÎö·ÖΪ×Ô¶¥ÏòϺÍ×Ôµ×ÏòÉÏÁ½ÖÖ·½Ê½£¬³£¼ûµÄÓï·¨·ÖÎö·½Ê½Ò²ÎªÊÖдÓï·¨·ÖÎö£¨ÍùÍùÊÇ×Ô¶¥ÏòϵÄÓÐÏÞ״̬»ú£¬µÝ¹éϽµ·ÖÎö£©ºÍÓï·¨·ÖÎö¹¤¾ß£¨ÍùÍùÊÇ×Ôµ×ÏòÉÏ£¬Èç Flex¡¢Yacc/Bison µÈ£©¡£

  • Ôø¾­ GCC ʹÓà yacc/bison ×÷ΪÓï·¨½âÎöÆ÷£¬ÔÚ 3.x ij¸ö°æ±¾Ö®ºó¸ÄΪÊÖдµÝ¹éϽµÓï·¨·ÖÎö[3]
  • clang Ò»Ö±ÊÇÊÖдµÝ¹éϽµÓï·¨·ÖÎö[4]

ÊÖдÓï·¨·ÖÎö±ÈÆðÓï·¨·ÖÎö¹¤¾ßÓм¸¸öÓÅÊÆ£¨µ±È»ÒªÐ´µÃºÃµÄÇé¿ö£©£º

  • ÐÔÄܸüºÃ¡£¿ÉÒÔÓÅ»¯Èȵã·¾¶µÈ
  • Õï¶ÏºÍ´íÎó»Ö¸´¸üÇåÎúÃ÷ÁË¡£ÊÖд״̬»ú¿ÉÒÔÍêÈ«ÕÆ¿ØÏµÍ³×´Ì¬£¬´íÎó´¦Àí¸üÈÝÒ×
  • ¼òµ¥¡£²»ÐèÒªÕÆÎÕÐÂÓï·¨

ClickHouse ½âÎö SQL µÄº¯ÊýÈçÏÂËùʾ£º

//src/Parsers/parseQuery.cpp

ASTPtrtryParseQuery()
{
//½«SQL²ð·ÖΪtokenÁ÷
Tokenstokens(query_begin,all_queries_end,max_query_size);
IParser::Postoken_iterator(tokens,max_parser_depth);

//½«tokenÁ÷½âÎöΪÓï·¨Ê÷
ASTPtrres;
constboolparse_res=parser.parse(token_iterator,res,expected);

returnres;
}

¿ÉÒÔ¿´µ½ÏȽ« SQL ×Ö·û´®²ð½âΪ token Á÷£¨´Ê·¨·ÖÎö£©£¬ÔÙµ÷ÓÃperser.parse()º¯Êý½øÐÐÓï·¨·ÖÎö£¬ËüµÄʵÏÖÈçÏ£º

//src/Parsers/ParserQuery.cpp

boolParserQuery::parseImpl(Pos&pos,ASTPtr&node,Expected&expected)
{
ParserQueryWithOutputquery_with_output_p(end,allow_settings_after_format_in_insert);
ParserInsertQueryinsert_p(end,allow_settings_after_format_in_insert);
ParserUseQueryuse_p;
ParserSetQueryset_p;
ParserSystemQuerysystem_p;
ParserCreateUserQuerycreate_user_p;
ParserCreateRoleQuerycreate_role_p;
ParserCreateQuotaQuerycreate_quota_p;
ParserCreateRowPolicyQuerycreate_row_policy_p;
ParserCreateSettingsProfileQuerycreate_settings_profile_p;
ParserCreateFunctionQuerycreate_function_p;
ParserDropFunctionQuerydrop_function_p;
ParserDropAccessEntityQuerydrop_access_entity_p;
ParserGrantQuerygrant_p;
ParserSetRoleQueryset_role_p;
ParserExternalDDLQueryexternal_ddl_p;
ParserTransactionControltransaction_control_p;
ParserBackupQuerybackup_p;

boolres=query_with_output_p.parse(pos,node,expected)
||insert_p.parse(pos,node,expected)
||use_p.parse(pos,node,expected)
||set_role_p.parse(pos,node,expected)
||set_p.parse(pos,node,expected)
||system_p.parse(pos,node,expected)
||create_user_p.parse(pos,node,expected)
||create_role_p.parse(pos,node,expected)
||create_quota_p.parse(pos,node,expected)
||create_row_policy_p.parse(pos,node,expected)
||create_settings_profile_p.parse(pos,node,expected)
||create_function_p.parse(pos,node,expected)
||drop_function_p.parse(pos,node,expected)
||drop_access_entity_p.parse(pos,node,expected)
||grant_p.parse(pos,node,expected)
||external_ddl_p.parse(pos,node,expected)
||transaction_control_p.parse(pos,node,expected)
||backup_p.parse(pos,node,expected);

returnres;
}

¿ÉÒÔ·¢ÏÖ ClickHouse ½« Query ·ÖΪÁË 18 ÖÖÀàÐÍ£¨½ØÖ¹ 2022-11-12 ÈÕ£©£¬Ã¿ÖÖ Query ¶¼ÓÐ×Ô¼ºµÄ Parser£¬Í¨¹ý¹Ø¼ü´ÊÆ¥Åä¹¹Ôì AST ÉϵĽڵ㣬×îÖÕÉú³ÉÓï·¨Ê÷¡£µÝ¹éϽµ²¿·Ö³¬¸ÙÁË£¬ÕâÀï¾Í²»ÆÌ¿ª½²¡£

ÓÅ»¯Æ÷

¾­¹ýÓï·¨·ÖÎöºóÉú³ÉµÄ AST ²¢²»ÊÇÖ´ÐÐ×îÓŽ⣬ClickHouse °üº¬´óÁ¿»ùÓÚ¹æÔòµÄÓÅ»¯£¨rule based optimization£©£¬Ã¿¸ö Query »á±éÀúÒ»±éÓÅ»¯¹æÔò£¬½«Âú×ãµÄÇé¿ö½øÐв»¸Ä±ä²éѯÓïÒåµØÖØÐ´¡£

ÿһÖÖ Query ÀàÐͶ¼ÓжÔÓ¦µÄ Interpreter£¬ºóÎͼÒÔ Select ²éѯ¾ÙÀý£¬´úÂëÈçÏ£º

//src/Interpreters/InterpreterFactory.cpp

std::unique_ptr<IInterpreter>InterpreterFactory::get()
{
...
if(query->as<ASTSelectQuery>())
{
returnstd::make_unique<InterpreterSelectQuery>(query,context,options);
}
...

ÔÚInterpreterSelectQueryÀàµÄ¹¹Ô캯ÊýÖн« AST ÓÅ»¯¡¢ÖØÐ´£¬´úÂëÏê¼ûsrc/Interpreters/InterpreterSelectQuery.cpp£¬ÕâÀïÖ»»­Á÷³Ìͼ£º

ÊÇ·ñ³õʼ»¯ settings ÓÅ»¯ with ÓÅ»¯ joins ν´ÊÏÂÍÆ½« where ÏÂÍÆµ½ prewhere ÊÇ·ñÒªÔÙ´ÎÓÅ»¯¼ì²é storage ȨÏÞÉú³É analysis_result ºÍ result_header

¹¹ÔìÖ´Ðмƻ®

src/Interpreters/InterpreterSelectQuery.cppÎļþInterpreterSelectQuery::executeImpl()·½·¨½«ÓÅ»¯·ÖÎöµÃµ½µÄÖмäÊý¾Ý¸¨ÖúÉú³É×îÖÕµÄÖ´Ðмƻ®£¬´úÂëÈçÏ£º

//src/Interpreters/InterpreterSelectQuery.cpp

voidInterpreterSelectQuery::executeImpl()
{
...
//¸öÈËÀí½âÕë¶ÔEXPLAINPLAN£¬Ö»¹¹½¨Ö´Ðмƻ®²»Ö´ÐÐ
if(options.only_analyze)
{
...
}
else
{
//´Ó´ÅÅ̶ÁÈ¡ËùÐèÁУ¬×¢ÒâÕâÒ»ÐУ¬ºóÎÄÌø×ª½øÈ¥·ÖÎö
executeFetchColumns(from_stage,query_plan);
}
if(options.to_stage>QueryProcessingStage::FetchColumns)
{
//ÔÚ·Ö²¼Ê½Ö´ÐÐQueryʱֻÔÚÔ¶³Ì½ÚµãÖ´ÐÐ
if(expressions.first_stage)
{
//µ±storage²»Ö§³ÖprewhereʱÌí¼ÓFilterStep
if(!query_info.projection&&expressions.filter_info)
{
...
}
if(expressions.before_array_join)
{
...
}
if(expressions.array_join)
{
...
}
if(expressions.before_join)
{
...
}
//¿ÉÑ¡²½Ö裺½«joinkeyתΪһÖµÄsupertype
if(expressions.converting_join_columns)
{
...
}
//Ìí¼ÓJoin
if(expressions.hasJoin())
{
...
}
//Ìí¼Ówhere
if(!query_info.projection&&expressions.hasWhere())
executeWhere(query_plan,expressions.before_where,expressions.remove_where_filter);
//Ìí¼Óaggregation
if(expressions.need_aggregate)
{
executeAggregation(
query_plan,expressions.before_aggregation,aggregate_overflow_row,aggregate_final,query_info.input_order_info);
///Weneedtoresetinputorderinfo,sothatexecuteOrdercan'tuseit
query_info.input_order_info.reset();
if(query_info.projection)
query_info.projection->input_order_info.reset();
}
//×¼±¸Ö´ÐУº
//1.beforewindowsº¯Êý
//2.windowsº¯Êý
//3.afterwindowsº¯Êý
//4.×¼±¸DISTINCT
if(expressions.need_aggregate)
{
//´æÔھۺϺ¯Êý£¬ÔÚwindowsº¯Êý/ORDERBY֮ǰ²»Ö´ÐÐ
}
else
{
//²»´æÔھۺϺ¯Êý
//´æÔÚwindowsº¯Êý£¬Ó¦¸ÃÔÚ³õʼ½ÚµãÔËÐÐ
//²¢ÇÒ£¬ORDERBYºÍDISTINCTÒÀÀµÓÚwindowsº¯Êý£¬ÕâÀïÒ²²»ÄÜÔËÐÐ
if(query_analyzer->hasWindow())
{
executeExpression(query_plan,expressions.before_window,"Beforewindowfunctions");
}
else
{
//ûÓÐwindowsº¯Êý£¬Ö´ÐÐbeforeORDERBY¡¢×¼±¸DISTINCT
assert(!expressions.before_window);
executeExpression(query_plan,expressions.before_order_by,"BeforeORDERBY");
executeDistinct(query_plan,true,expressions.selected_columns,true);
}
}
//Èç¹û²éѯûÓÐGROUP¡¢HAVING£¬ÓÐORDER»òLIMIT£¬»áÔÚÔ¶³ÌÅÅÐò¡¢LIMIT
preliminary_sort();
}
//ÔÚ·Ö²¼Ê½Ö´ÐÐQueryʱֻÔÚ³õʼ½ÚµãÖ´Ðлòoptimize_distributed_group_by_sharding_key¿ªÆôʱ
if(expressions.second_stage||from_aggregation_stage)
{
if(from_aggregation_stage)
{
//Ô¶³Ì½Úµã¾ÛºÏ¹ý£¬ÕâÀïɶҲ²»¸É
}
elseif(expressions.need_aggregate)
{
//´Ó²»Í¬½ÚµãÀ­È¡Êý¾ÝºÏ²¢
if(!expressions.first_stage)
executeMergeAggregated(query_plan,aggregate_overflow_row,aggregate_final);

if(!aggregate_final)
{
//Ö´ÐÐgroupbywithtotals/rollup/cube
...
}
//Ìí¼ÓHaving
elseif(expressions.hasHaving())
executeHaving(query_plan,expressions.before_having,expressions.remove_having_filter);
}
//±¨¸ö´í
elseif(query.group_by_with_totals||query.group_by_with_rollup||query.group_by_with_cube)
throwException("WITHTOTALS,ROLLUPorCUBEarenotsupportedwithoutaggregation",ErrorCodes::NOT_IMPLEMENTED);
//×¼±¸Ö´ÐУº
//1.beforewindowsº¯Êý
//2.windowsº¯Êý
//3.afterwindowsº¯Êý
//4.×¼±¸DISTINCT
if(from_aggregation_stage)
{
if(query_analyzer->hasWindow())
throwException(
"WindowfunctionsdoesnotsupportprocessingfromWithMergeableStateAfterAggregation",
ErrorCodes::NOT_IMPLEMENTED);
}
elseif(expressions.need_aggregate)
{
executeExpression(query_plan,expressions.before_window,
"Beforewindowfunctions");
executeWindow(query_plan);
executeExpression(query_plan,expressions.before_order_by,"BeforeORDERBY");
executeDistinct(query_plan,true,expressions.selected_columns,true);
}
else
{
if(query_analyzer->hasWindow())
{
executeWindow(query_plan);
executeExpression(query_plan,expressions.before_order_by,"BeforeORDERBY");
executeDistinct(query_plan,true,expressions.selected_columns,true);
}
else
{
//Neitheraggregationnorwindows,allexpressionsbefore
//ORDERBYexecutedonshards.
}
}
//Ìí¼Óorderby
if(expressions.has_order_by)
{
//ÔÚ·Ö²¼Ê½²éѯÖУ¬Ã»ÓоۺϺ¯ÊýÈ´ÓÐorderby£¬½«»áÔÚÔ¶¶Ë½Úµãorderby
...
}
//¶àsourceorderbyÓÅ»¯
...

//¶àÌõÁ÷ʱÔÙ´ÎÖ´ÐÐdistinct
if(!from_aggregation_stage&&query.distinct)
executeDistinct(query_plan,false,expressions.selected_columns,false);

//´¦Àílimit
...
//´¦Àíprojection
...
//´¦Àíoffset
...
}

//ÐèÒª×Ó²éѯ½á¹û¹¹½¨set
if(!subqueries_for_sets.empty())
executeSubqueriesInSetsAndJoins(query_plan,subqueries_for_sets);
}
}

ÆäÖÐInterpreterSelectQuery::executeFetchColumns()º¯ÊýÊǶÁÈ¡ËùÐèÁеĽ׶Ρ£´Ó´úÂëÖпÉÒÔ¿´µ½ËüÒ²×öÁ˺ܶàµÄÓÅ»¯£º

  • count()ÓÅ»¯
  • Ö»ÓÐ LIMIT Çé¿öµÄÓÅ»¯
  • quotaÏÞÖÆ

¿ÉÒÔ¿´µ½£º

  1. limit ´ó²¿·ÖÇé¿öÏÂÊǼÆËãÍê³ÉºóÔÙÖ´ÐУ¬¶ø quota ÊÇÔÚ¶ÁÈ¡Êý¾ÝʱִÐеÄ
  2. ¼ÓËٵĹؼüÊǼõÉÙ¶ÁÈëµÄÊý¾ÝÁ¿£¬Ò²¾ÍÊÇËµÉÆÓÃË÷Òý
  3. ÓÃcount()¡¢count(1)ºÍcount(*)£¬ClickHouse ¶¼ÓÐÓÅ»¯£¬µ«²»Òªcount(any_field)

Ë÷ÒýÉè¼Æ

Ë÷ÒýÊÇ ClickHouse ¿ìËÙ²éѯ×îÖØÒªµÄÒ»»·£¬·ÖΪÖ÷¼üË÷Òý£¨sparse indexes£©ºÍÌø±íË÷Òý£¨data skipping indexes£©¡£ÔÚÖ´Ðвéѯʱ£¬Ë÷ÒýÃüÖÐ˳ÐòÈçÏÂͼËùʾ£º

Partition Key MinMax IndexPartitionPrimary Key Sparse IndexData Skipping Indexes

Ïê¼û´úÂ룺

//src/Processors/QueryPlan/ReadFromMergeTree.cpp

MergeTreeDataSelectAnalysisResultPtrReadFromMergeTree::selectRangesToRead()
{
...
try
{
//ʹÓÃpartitionbyѡȡÐèÒªparts
MergeTreeDataSelectExecutor::filterPartsByPartition(...);
//´¦Àí³éÑù
...
//ʹÓÃÖ÷¼üË÷ÒýºÍÌø±íË÷Òý
result.parts_with_ranges=MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes(...);
}
catch(...)
{
...
}
...
}

ÖµµÃ×¢ÒâµÄÊÇ£¬Ö÷¼üµÄ sparse index ʹÓöþ·Ö²éÕÒÖ±½ÓËõС·¶Î§µ½ËùÐèÒªµÄ parts£¬¶øÌø±íË÷Òý¾ÍÐèÒªÔÚÑ¡³öÀ´µÄ parts Àÿ n ¸ö£¨Óû§×Ô¶¨Ò壩granules ¾ÍÐèÒª±È½Ï n ´Î¡£

×î¼Ñʵ¼ù£º

partition by ÐèÒªÒ»¸ö¿ÉÒÔתΪʱ¼äµÄÁУ¬±ÈÈç Datatime¡¢Date »òÕßʱ¼ä´Á£¬¶øÈç¹û primary key ÖÐÒ²ÓÐʱ¼ä×ֶΣ¬¿ÉÒÔʹÓÃͬһ¸ö×ֶαÜÃâ²éѯʱÐèҪͬʱָ¶¨Á½¸öʱ¼ä×ֶΡ£±ÈÈ磺ָ¶¨ÎªÊý¾Ý´¦Àíʱ¼ä¡£

Partition

Ê×ÏÈÒª±æÎö part ºÍ partition µÄÇø±ð£¬ClickHouse Ó¦ÓòãÃæ¶¨ÒåÁË partition£¬Óû§Ö¸¶¨ partition by ¹Ø¼ü´ÊÉèÖò»Í¬µÄ partition£¬µ«ÊÇ partition Ö»ÊÇÂß¼­·ÖÇø¡£ÕæÕý´æ´¢µ½´ÅÅÌʱ°´ part À´´æ´¢£¬Ã¿Ò»¸ö part Ò»¸öÎļþ¼Ð£¬ÀïÃæ´æ´¢²»Í¬×ֶεÄ.mrkºÍ.binÎļþ£¬ÒÔ¼°Ò»¸öminmax_{PARTITION_KEY_COLUMN}.idxÎļþ£¬²»Í¬ part µÄ minmax ×÷Ϊһ¸öË÷Òý´æ´¢ÓÚÄÚ´æ¡£

µ±²éѯµÄ WHERE ´øÓÐ partition key ʱ£¬Ê×ÏÈ»á±È½Ïÿһ¸ö part µÄ minmax Ë÷Òý¹ýÂ˲»Ïà¹Ø parts¡£Ö®ºóÔÙ¸ù¾Ý PARTITION BY ¶¨ÒåµÄ¹æÔò¹ýÂ˲»Ïà¹Ø partition¡£

¿ÉÊÇ partition ²»ÊÇԽСԽºÃ¡£

partitioning ²¢²»»á¼ÓËÙ²éѯ£¨ÓÐÖ÷¼ü´æÔÚ£©£¬¹ýСµÄ partition ·´¶ø»áµ¼Ö´óÁ¿µÄ parts ÎÞ·¨ºÏ²¢£¨MergeTree ÒýÇæ¼Ò×å»áÔÚºǫ́²»¶ÏºÏ²¢ parts£©£¬ÒòΪÊôÓÚ²»Í¬ partition µÄ parts ÎÞ·¨ºÏ²¢¡£[5]

×î¼Ñʵ¼ù[6]£º

  • Ò»¸ö(Replicated)MergeTree µÄ partition ´ó¸Å 1 ¡« 300GB
    • Summing/ReplacingMergeTree µÄ partition ´ó¸Å 400MB ¡« 40GB
  • ²éÑ¯Ê±Éæ¼°¾¡Á¿ÉÙ partition
  • ²åÈëʱ×îºÃÖ»ÓÐ 1 ¡« 2 ¸ö·ÖÇø
  • Ò»Õűíά³Ö 100 ¸ö·ÖÇøÒÔÄÚ

Primary key index

Ö÷¼üÊÇ ClickHouse ×îÖØÒªµÄË÷Òý£¬Ã»ÓÐÖ®Ò»¡£ºÃµÄÖ÷¼üÓ¦¸ÃÄÜÓÐЧÅųý´óÁ¿Î޹صÄÊý¾Ý granules£¬¼õÉÙ´ÅÅ̶ÁÈ¡µÄ×Ö½ÚÊý¡£

ÏȽ²¼¸¸öÖ÷¼üµÄ±³¾°ÖªÊ¶£º

  • Ö÷¼üÓÃÓÚÊý¾ÝÅÅÐò
    • ClickHouse ½²Êý¾Ý°´Ö÷¼üÅÅÐò£¬ÔÙ°´index_granularityÉèÖõĴóС£¨Ä¬ÈÏ 8192£©½«Êý¾Ý·ÖΪһ¸ö¸ö granules[7]
    • ÿ¸ö granules µÄµÚÒ»ÐÐ×÷ΪÖ÷¼üË÷ÒýÖеÄÒ»¸öÔªËØ[8]
  • ²éѯʱÔÚÖ÷¼üÉÏʹÓöþ·Ö²éÕÒÌø¹ýÎÞ¹Ø granules[9]
  • Ö÷¼üÖ»ÄÜͨ¹ýǰ׺ÃüÖÐË÷Òý[10]
  • ÿһ¸ö part ÄÚµÄ.binÎļþ´æ´¢ÁË n ¸ö granules£¬ÓÃ.mrkÎļþ¼Ç¼ÿһ¸ö granules ÔÚ.binÎļþµÄµØÖ·Æ«ÒÆ[11]
  • ClickHouse »áÔÚºǫ́²»¶ÏºÏ²¢Í¬Ò»¸ö partition µÄ²»Í¬ parts£¬Ö±µ½´óС/·Ö²¼´ïµ½¡°Ô¤ÆÚ¡±

Ö÷¼üµÄÑ¡ÔñÓ¦¸Ã¾¡¿ÉÄÜ¿¼ÂÇÖÜÈ«£¬ÒòΪÖ÷¼üÊÇÎÞ·¨Ð޸ĵģ¬Ö»Äܽ¨Ð±íºóÊý¾ÝÇ¨ÒÆ¡£

×î¼Ñʵ¼ù[12]£¨Õë¶Ô(Replicated)MergeTree ÒýÇæ£©£º

  1. Ñ¡ÔñÓÀÔ¶»áÓÃÓÚ¹ýÂËÌõ¼þµÄÁÐ
  2. Ô½ÖØÒªµÄ¡¢»ùÊýÔ½µÍµÄ·Å×ó±ß
  3. Ö÷¼üÖв»Òª³öÏÖÁ½¸ö¸ß»ùÊý×ֶΣ¬Ò»°ã×îºóÒ»ÁпÉÒÔΪ×ÜÌåÔö³¤µÄʱ¼ä×Ö¶Î
  4. ½«ÐеÄÌØÕ÷×ֶμÓÈ룬½«ÏàËÆµÄÐзÅÒ»Æð£¬Ìá¸ßѹËõÂÊ
  5. ÈôÖ÷¼ü°üº¬Ö÷´Ó¹ØÏµ£¬Ö÷·Å×ó±ß£¬´Ó·ÅÓÒ±ß

Data skipping indexes

×îºóÒ»²½ÊÇÌø±íË÷Òý£¬Õâ¸öûÓÐÌ«¶à¿ÉÒÔ½²µÄµØ·½£¬ºÍÆäËûÊý¾Ý¿âÏàͬ£¬Ìø±íË÷ÒýÓÃÓÚ¾¡Á¿¼õÉÙ¶ÁÈ¡µÄÐÐÊý¡£¾ßÌå²Î¿´¹Ù·½Îĵµ¡£

ÅäÖÃÓÅ»¯

ÅäÖÃÓÅ»¯·ÖΪÁ½²¿·Ö£¬È«¾ÖÅäÖÃÓÅ»¯ºÍ MergeTree ±íÅäÖÃÓÅ»¯¡£

È«¾ÖÅäÖÃÓÅ»¯

²Î¿´AltinityÑ¡ÔñÐÔÅäÖÃÓÅ»¯Ïî¡£

ÕâÀïдÈý¸öÍÆ¼öµÄÅäÖãº

  1. Ìí¼Óforce_index_by_dateºÍforce_primary_key±ÜÃâÈ«Å̶ÁÈ¡
  2. µ÷ÕûÄÚ´æÅäÖ㬲ο¼Altinity
  3. ϵͳ±íÌí¼Ó TTL ºÍttl_only_drop_parts±íÅäÖÃ

±íÅäÖÃÓÅ»¯

³ýÁËÈ«¾ÖÅäÖã¬MergeTree ÒýÇæ¼Ò×åÿÕűíÒ²ÓÐ×Ô¼ºµÄÅäÖÃÏî¡£[13]

ÍÆ¼öÉèÖÃÈçÏÂÅäÖãº

  1. ttl_only_drop_parts=1¡£Ö»ÓÐ parts ÖÐËùÓÐÊý¾Ý¶¼¹ýÆÚÁ˲Żá DROP£¬¿ÉÒÔÓÐЧ¼õÉÙTTL_MERGE·¢ÉúµÄƵÂÊ£¬½µµÍ´ÅÅ̸ºÔØ¡£
  2. merge_with_ttl_timeout=86400¡£ÅäºÏÉÏÒ»ÏîÅäÖ㬽« TTL ¼ì²éµ÷ÕûΪ 1 ÌìÒ»´Î£¨Ä¬ÈÏ 4 Сʱһ´Î£©¡£
  3. use_minimalistic_part_header_in_zookeeper=1¡£¿ÉÒÔÓÐЧ½µµÍ Zookeeper ¸ºÔØ£¬±ÜÃâ Zookeeeper ³ÉΪÐÔÄÜÆ¿¾±£¨²åÈ룩¡£

×Ö¶ÎÓÅ»¯

³ýÁËË÷Òý¡¢·ÖÇøºÍÅäÖÃÍ⣬»¹Óбí×ֶοÉÒÔÓÅ»¯¡£½ÓÏÂÀ´½«½²Êö Schema ÀàÐÍ¡¢CODEC ºÍ»º´æÈý¸ö·½Ãæ¡£

×¢Ò⣬¾¡Á¿±ÜÃâʹÓà Null£¬ÔÚ ClickHouse ÖÐ Null »áÓÃÒ»¸öµ¥¶À Null masks Îļþ´æ´¢ÄÄЩÐÐΪ Null[14]£¬Òò´Ë¶Áȡij¸öÆÕͨ×Ö¶ÎÖ»ÐèÒª.binºÍ.mrkÁ½¸öÎļþ£¬¶ø¶ÁÈ¡ Nullable ×Ö¶ÎʱÐèÒª.bin¡¢.mrkºÍ masks Îļþ¡£ÉçÇø²éѯÑéÖ¤£¬×î¸ß»áÓÐ 2 ±¶ÐÔÄÜËðʧ¡£[15]

Schema ÀàÐÍ

ʹÓà ClickHouse ´æ´¢Ê±£¬Ò»°ãÓû§¶¼»á´´½¨´ó¿í±í£¬°üº¬´óÁ¿ÊýÖµ¡¢×Ö·û´®ÀàÐ͵Ä×ֶΡ£ÕâÀïÌá¼°Á½ÖÖ Schema ÀàÐÍ[16]£¬Ã»ÓÐÄĸö¸üÓÅÔ½£¬ÓɶÁÕßÖ´ÐÐÆÀ¹ÀÒµÎñÊʺÏÄÄÒ»ÖÖ¡£

ƽÆÌ×Ö¶Î

ÕâÊÇÎÒÃÇÖ÷±íÕýÔÚʹÓõÄÀàÐÍ£¬½«¿ÉÄÜÓõ½µÄ×Ö¶ÎÔ¤ÁôƽÆÌ£¬³ýÁËһϵÁлù´¡×Ö¶ÎÍ⣬Ôö¼Ó´óÁ¿metric1, metric2...metricNºÍtag1, tag2...tagNµÈµÈ×ֶΡ£

Óŵ㣺

  • ¼òµ¥
  • Ö»¶ÁÈ¡ËùÐèÒªµÄÁУ¬·Ç³£¸ßЧ
  • ÿ¸öÖ¸±ê¡¢±ê¼Ç¶¼¿ÉÒÔÓÐÌØÊâÀàÐÍ
  • ÊʺÏÃܼ¯¼Ç¼£¨ËùÓÐÔ¤Áô×ֶμ¸ºõÈ«ÓÃÉÏ£©

ȱµã£º

  • Ìí¼Ó×Ö¶ÎÐèÒª¸Ä±ä schema
  • Ô¤Áô×ֶβ»Äܹý¶à£¬×î¶à 100 ¡« 200 ¸ö
  • Èç¹ûʹÓúÜÏ¡Ê裬»á´´½¨´óÁ¿ sparse file ×Ö¶Î
  • ÐèÒª±êʶ¡°Êý¾Ýȱʧ¡±µÄÇé¿ö£¨Null »òÕßĬÈÏÖµ£©
  • ¶ÁÈ¡µÄÁÐÔ½¶à£¬ÐèÒª¶ÁÈ¡ÎļþÔ½¶à£¬IO ´ÎÊýÔ½¶à

arrays/nested/map ×Ö¶Î

ÕâÊÇÎÒÃÇ ctree ¹¦ÄÜÕýÔÚʹÓõÄÀàÐÍ¡£½«ÒµÎñ×Ö¶ÎÈûÈëǶÌ×Êý¾ÝÀàÐÍÖУ¬±ÈÈç array¡¢nested struct ºÍ map¡£ºóÎÄÒÔ array ¾ÙÀý£ºmetric_array¡¢tag_array¡£

Óŵ㣺

  • ¶¯Ì¬À©Õ¹
  • ClickHouse ÓдóÁ¿¸ßЧµÄÏà¹Ø´¦Àíº¯Êý£¬ÉõÖÁ¿ÉÒÔÕë¶Ô Array¡¢Map ÉèÖÃË÷Òý
  • ÊʺÏÏ¡Êè¼Ç¼£¨Ã¿Ðд洢ÉÙÁ¿Öµ£¬¾¡¹Ü×Ü»ùÊýºÜ¸ß£©

ȱµã£º

  • Ö»ÐèÒªÆäÖÐÒ»¸ö metric/tag ʱ£¬ÐèÒª½«Õû¸ö array È«²¿¶ÁÈëÄÚ´æ
  • ²»Í¨Óã¬ÓëÆäËûϵͳ½»»¥Ê±±È½ÏÂé·³¡£±ÈÈç spark ʹÓà jdbc ʱ£¬Ç¶Ì×ÀàÐÍÎÞ·¨Ö§³Ö±ÈÈç array(array(string))
  • ²»Í¨ÒâÒåµÄÖµ´æ´¢ÔÚÏàͬ×ֶΣ¬Ñ¹ËõÂʱäµÍ
  • ÐèÒª²»Í¬ÀàÐ͵ÄÔ¤Áô×Ö¶ÎʱÐèÒª´´½¨²»Í¬ÀàÐÍ

×ܽá

¹ØÓÚ Schema Éè¼ÆÕâÀ¶ÁÕß¿ÉÒÔ¿¼ÂÇ 28 Ô­Ôò£¬ÀíÂÛÉÏ 80%²éѯֻ»áÓõ½ 20%µÄÒµÎñ×ֶΣ¬Òò´Ë¿ÉÒÔ½«Ê¹ÓÃÆµÂʸߵÄÒµÎñ×Ö¶ÎÆ½ÆÌ£¬½«Ê¹ÓÃÆµÂʵ͵Ä×ֶηÅÈëǶÌ׽ṹÖС£

CODEC

CODEC ·ÖΪѹËõËã·¨ CODEC¡¢´æ´¢¸ñʽ CODEC ºÍ¼ÓÃÜ CODEC£¬Ò»°ã¿ÉÒÔ×éºÏÒ»ÆðʹÓá£ÔÚ ClickHouse ÖУ¬Î´ÏÔʾָ¶¨ CODEC µÄ×ֶζ¼»á±»·ÖÅäÒ»¸ö DEFAULT ĬÈÏ CODEC LZ4£¨³ý·ÇÓû§ÐÞ¸Ä clickhouse ÅäÖà compression ²¿·Ö[17]£©¡£

ѹËõËã·¨ CODEC µÄÑ¡ÔñÊÇÒ»¸öƽºâ°åÎÊÌ⣬¸ü¸ßµÄѹËõ¶È¿ÉÒÔÓиüÉÙµÄ IO µ«ÊǸü¸ßµÄ CPU£¬¸üµÍµÄѹËõ¶ÈÓиü¶àµÄ IO µ«ÊǸüÉÙµÄ CPU¡£ÕâÐèÒª¶ÁÕ߸ù¾Ý²¿Êð»úÆ÷ÅäÖÃ×ÔÐÐÑ¡ÔñºÏÊʵÄѹËõËã·¨ºÍѹËõµÈ¼¶¡£

ÕâÀïÌṩÁ½¸öÅжϲßÂÔ£º

  • ´æÔÚË÷ÒýµÄ×ֶοÉÒÔÉèÖøü¸ßµÄѹËõµÈ¼¶
  • ÓÃÓÚ where Ìõ¼þµÄ×Ö¶ÎÓ¦¸ÃÉèÖøüµÍѹËõµÈ¼¶

´æ´¢¸ñʽ CODEC Ö÷ÒªÊÇDelta¡¢DoubleDelta¡¢Gorilla¡¢FPCºÍT64¼¸ÖÖ¡£

  • Delta´æ´¢ÐÐÖ®¼äµÄ±ä»¯Öµ£¬Êʺϱ仯½ÏСÇұȽϹ̶¨µÄÁУ¬±ÈÈçʱ¼ä´Á¡£ÐèÒªÅäºÏ ZSTD ʹÓÃ
  • DoubleDelta´æ´¢DeltaµÄDelta¡£Êʺϱ仯ºÜÂýµÄÐòÁÐ
  • GorillaÊʺϲ»Ôõô±ä¶¯µÄ integer¡¢float ÀàÐÍ[18]
  • FPCÊʺÏÓÚ float ÀàÐÍ£¬ÓÉÓÚÎÒÃÇδʹÓà float ×Ö¶ÎÕâÀïÂÔ¹ý
  • T64´æ´¢±àÂ뷶ΧÄÚ×î´ó¡¢×îСֵ£¬ÒÔתΪ 64bit ´æ´¢£¬ÊʺϽÏСµÄ integer ÀàÐÍ

À©Õ¹ÔĶÁ£º

»º´æ

mark_cache_size¿ÉÒÔµ÷Õû.mrkÎļþµÄ»º´æ´óС£¬Ä¬ÈÏΪ 5GB¡£Êʵ±µ÷´ó¿ÉÒÔ¼õÉÙ²éѯʱ IO ´ÎÊý£¬ÓÐЧ½µµÍ´ÅÅÌѹÁ¦¡£[19]

  • ×Ö¶ÎÔ½¶à£¬.mrkÎļþÔ½´ó
  • index_granularityÓë.mrkÎļþ´óС³É¸ºÏà¹Ø

¿ÉÒÔͨ¹ýÈçÏ SQL ²éѯµ±Ç°ËùÓбíµÄ parts ÐÅÏ¢£º

SELECT
database,
table,
count()ASparts,
uniqExact(partition_id)ASpartition_cnt,
sum(rows),
formatReadableSize(sum(data_compressed_bytes)AScomp_bytes)AScomp,
formatReadableSize(sum(data_uncompressed_bytes)ASuncomp_bytes)ASuncomp,
uncomp_bytes/comp_bytesASratio,
formatReadableSize(sum(marks_bytes)ASmark_sum)ASmarks,
mark_sum/uncomp_bytesASmark_ratio
FROMcluster(default_cluster,system.parts)
WHEREactive
GROUPBY
database,
table
ORDERBYcomp_bytesDESC

¿ÉÒÔͨ¹ýÈçϲéѯ»ñÈ¡µ±Ìì mrk »º´æÃüÖÐÇé¿ö£º

WITH(ProfileEvents.Values[indexOf(ProfileEvents.Names,'MarkCacheHits')])ASMARK_CACHE_HITS
SELECT
toHour(event_time)AStime,
countIf(MARK_CACHE_HITS!=0)AShit_query_count,
count()AStotal_query_count,
hit_query_count/total_query_countAShit_percent,
avg(MARK_CACHE_HITS)ASaverage_hit_files,
min(MARK_CACHE_HITS)ASminimal_hit_files,
max(MARK_CACHE_HITS)ASmaximal_hit_files,
quantile(0.5)(MARK_CACHE_HITS)AS"50",
quantile(0.9)(MARK_CACHE_HITS)AS"90",
quantile(0.99)(MARK_CACHE_HITS)AS"99"
FROMclusterAllReplicas('default_cluster',system.query_log)
WHEREevent_date=toDate(now())
AND(type=2ORtype=4)
ANDquery_kind='Select'
GROUPBYtime
ORDERBYtimeASC

ÒÔ¼°Èçϲéѯ»ñÈ¡µ±Ç° mrk »º´æÄÚ´æÕ¼ÓÃÇé¿ö£º

SELECTformatReadableSize(value)
FROMasynchronous_metrics
WHEREmetric='MarkCacheBytes'

ÒÔ¼° mrk »º´æ¾ßÌ建´æ¶àÉÙÎļþ£º

SELECTvalue
FROMasynchronous_metrics
WHEREmetric='MarkCacheFiles'

³ý´ËÖ®Í⣬ClickHouse »¹¿ÉÒÔµ÷Õûuncompressed_cache»º´æÒ»¶¨Á¿Ô­Ê¼Êý¾ÝÓÚÄÚ´æÖС£[20]µ«ÊÇÕâ¸ö»º´æÖ»¶Ô´óÁ¿¶Ì²éѯÓÐЧ£¬¶ÔÓÚ OLAP À´Ëµ£¬²éÑ¯Ç§Ææ°Ù¹Ö£¬²»Ì«½¨Òéµ÷ÕûÕâ¸öÅäÖá£

ÒµÎñÓÅ»¯

µ½ÁË×îÄѵIJ¿·Ö£¬ÓÉÓÚ½ÓÏÂÀ´µÄ²¿·ÖºÍ²»Í¬ÒµÎñϢϢÏà¹Ø£¬ÎªÁ˽²½âÎÒÃÇÒµÎñÉϵÄÓÅ»¯£¬ÎÒÏȽéÉÜÏÂÎÒÃÇÒµÎñÇé¿ö£º

QAPM Ö÷´òÓ¦ÓÃÐÔÄÜ¼à¿Ø£¬Ö÷Òª·ÖΪָ±ê¡¢¸öÀýÁ½ÕÅ±í¡£¸öÀý±í°üº¬¸ü¶à»ù´¡×ֶΣ¬Ò»°ãÓû§Õ¹Ê¾£»Ö¸±ê±íÖ÷ÒªÓÃÓھۺϼÆËã¡£

Ê×ÏÈÈ·¶¨Ö÷¼ü£¬ÎãÓ¹ÖÃÒɵÄǰÁ½¸öÒ»¶¨ÊÇ

  • app_id¡£·ÅÊ×룬ÒòΪ¿ÉÄÜ´æÔÚͬһ¸ö²úÆ·²»Í¬¹¦ÄÜÁª¶¯µÄÇé¿ö£¬±ÈÈç»á»°·ÖÎö
  • category¡£·ÅµÚ¶þ룬ÒòΪ¹¦ÄÜÖ®¼ä¶ÀÁ¢£¬´óÁ¿²éÑ¯Ö»Éæ¼°µ¥¹¦ÄÜ

Ö¸±êûÓÐÌØÕ÷¼üÖµ£¬Òò´ËÖ»Ìí¼Ó´¦Àíʱ¼ä×÷ΪµÚÈý¸öÖ÷¼ü¡£

¶ÔÓÚÖ¸±ê±í£¬ÉèÖõÄÖ÷¼üΪ£ºapp_id, category, entrance_time

¸öÀý´æÔÚÌØÕ÷ feature£¬ÓÉÓÚ£º

  1. ´óÁ¿²éѯ¶¼°üº¬ feature_md5
  2. feature ÊÇÐеÄÌØÕ÷£¬ÏàͬµÄÌØÕ÷±íÃ÷Á½ÐÐÏàËÆ£¬

½«ÌØÕ÷µÄ md5 Ôö¼Óµ½Ö÷¼üÖУ¬ÓÃÓÚ¼ÓËÙ²éѯ¡¢Ìá¸ßѹËõÂÊ¡£µ«ÊÇÕâÀïÓÐÁ½¸ö·½Ïò£º

  • Èô feature_md5 ÊǸ߻ùÊý¡¢´óÁ¿³¤Î²µÄ×Ö¶Î
    • ÉèÖõÄÖ÷¼üΪ£ºapp_id, category, intDiv(entrance_time, 3600000), feature_md5
  • Èô feature_md5 »ùÊý¿ÉÒÔ½µµÍµ½Ç§¡¢ÍòÁ¿¼¶
    • ÉèÖõÄÖ÷¼üΪ£ºapp_id, category, feature_md5, entrance_time
·ÖÇø¼üÉèÖÃΪ`PARTITIONBYintDiv(entrance_time,2592000000)

¼øÓÚSAMPLE BYÐèÒª½« xxHash ×ֶηÅÔÚÖ÷¼üÖУ¬Ö÷¼ü¶¼°üº¬¸ß»ùÊý×ֶΣ¬¾Í²»ÉèÖóéÑù¼ü£¬¶øÊÇÔÚÐèÒªµÄʱºòÈí³éÑù[21]£º

SELECTcount()FROMtableWHERE...ANDcityHash64(some_high_card_key)%10=0;--Deterministic
SELECTcount()FROMtableWHERE...ANDrand()%10=0;--Non-deterministic

²åÈëÓÅ»¯

Êý¾Ý²åÈë¿´ÆðÀ´ºÍ²éѯÐÔÄÜûʲôÁªÏµ£¬µ«ÊÇÓмä½ÓÓ°Ïì¡£²»ºÏÀíµÄ²åÈë»áµ¼Ö¸ü¶àµÄдÅÌ¡¢¸ü¶àµÄÊý¾Ý merge ÉõÖÁÓпÉÄܲåÈëʧ°Ü£¬Ó°Ïì¶ÁÅÌÐÔÄÜ¡£

¾ÛºÏдÈë

ClickHouse ×÷Ϊ OLAP ²¢²»ÊʺÏСÅúÁ¿¡¢´ó²¢·¢Ð´È룬Ïà·´¶øÊʺϴóÅúÁ¿¡¢Ð¡²¢·¢Ð´È룬¹Ù·½½¨Òé²åÈëÊý¾ÝÿÅú´ÎÖÁÉÙ 1000 ÐУ¬»òÕßÿÃëÖÓ×î¶à 1 ´Î²åÈë¡£[22]

ÕâһС½ÚÎÒÏëÇ¿µ÷Ô­×Ó£¨Atomic Insert£©Ð´ÈëµÄ¸ÅÄһ´Î²åÈë´´½¨Ò»¸öÊý¾Ý part¡£

ǰÎÄÌá¼°£¬ClickHouse Ò»¸ö part ÊÇÒ»¸öÎļþ¼Ð£¬ºǫ́Óиö merge Ï̳߳ز»¶Ï merge ²»Í¬µÄ part¡£Ô­×Ó²åÈë¿ÉÒÔ¼õÉÙ merge ´ÎÊý£¬Èà ClickHouse ¸ºÔظüµÍ£¬ÐÔÄܸüºÃ¡£

Ô­×ÓдÈëµÄ³ä·ÖÌõ¼þ[23]£º

  • Êý¾ÝÖ±½Ó²åÈëMergeTree±í£¨²»ÄÜÓÐ Buffer ±í£©
  • Êý¾ÝÖ»²åÈëÒ»¸ö partition£¨×¢ÒâǰÎÄÌáµ½µÄ partition ºÍ part µÄÇø±ð£©
  • ¶ÔÓÚ INSERT FORMAT
    • ²åÈëÐÐÊýÉÙÓÚmax_insert_block_size£¨Ä¬ÈÏ 1048545£©
    • ¹Ø±Õ²¢Ðиñʽ»¯input_format_parallel_parsing=0
  • ¶ÔÓÚ INSERT SELECT
    • ²åÈëÐÐÊýÉÙÓÚmax_block_size
  • С block ±»ºÏ²¢µ½ºÏÊ浀 block ´óСmin_insert_block_size_rows and min_insert_block_size_bytes
  • MergeTree±í²»°üº¬ÎﻯÊÓͼ

ÕâÀïÌùÒ»ÏÂÎÒÃÇÉú²úµÄÅäÖã¨users.xml£©¡£

¾­¹ýͳ¼Æ£¬¸öÀý±íÿÐдóÔ¼ 2KB£¬Ö¸±ê±íÿÐдóÔ¼ 100B£¨Î´Ñ¹Ëõ£©¡£

ÉèÖÃmin_insert_block_size_rowsΪ 10000000£¬Ö¸±ê»áÏÈÂú×ãÕâ¸öÌõ¼þ£¬´ó¸ÅÒ»¸ö block ԭʼ´óС 1GB¡£ÉèÖÃmin_insert_block_size_bytesΪ 4096000000£¬¸öÀý»áÏÈÂú×ãÕâ¸öÌõ¼þ£¬´ó¸ÅÒ»¸ö block ԭʼ´óС 1G£¬Ô¼ 1024000 ÐС£

ÕâÈý¸öÅäÖÃÏîÊǿͻ§¶ËÅäÖã¬ÐèÒªÔÚ²åÈëµÄ session ÖÐÉèÖ㬶ø²»ÊÇÔÚÄǼ¸¸ö.xmlÖÐÅäÖá£

max_insert_block_size:16777216
input_format_parallel_parsing:0
min_insert_block_size_rows:10000000
min_insert_block_size_bytes:1024000000

×¢Ò⣬min_insert_block_size_rowsºÍmin_insert_block_size_bytesÊÇ¡°»ò¡±µÄ¹ØÏµ£º

//src/Interpreters/SquashingTransform.cpp

boolSquashingTransform::isEnoughSize(size_trows,size_tbytes)const
{
return(!min_block_size_rows&&!min_block_size_bytes)
||(min_block_size_rows&&rows>=min_block_size_rows)
||(min_block_size_bytes&&bytes>=min_block_size_bytes);
}

¶Áд·ÖÀë

£º±¾·½°¸²¢Ã»Óо­¹ýÉú²úÑéÖ¤£¬×ÃÇ鿼ÂÇ

ClickHouse ÓÐ Shard ºÍ Replica ¿ÉÒÔÅäÖã¬×÷ÓÃÈçÏÂͼËùʾ£º

Ëùν¶Áд·ÖÀëÒ²¾ÍÊǽ« Shard ·ÖΪÁ½°ë£¬Ò»°ëÖ»ÓÃÓÚ²éѯ£¬Ö»ÒªÈ÷ֲ¼Ê½±í²éѯ¶¼µ¼Èëµ½ Shard1 ¼´¿É£¨ÔÚusers.xmlÖÐÅäÖÃload_balancingΪfirst_or_random£©£»Ò»°ëÓÃÓÚдÈ룬²åÈëµÄ³ÌÐòÊÖ¶¯¿ØÖƲåÈë Shard2 µÄ½Úµã£¬ÓÉ ClickHouse µÄ ReplicatedMergeTree ²»Í¬ Shard Êý¾ÝÒÀ¿¿ zookeeper ×Ô¶¯Í¬²½µÄ²ßÂÔ½«Êý¾Ýͬ²½µ½ Shard1¡£[24]

ÕâÖÖ²ßÂÔÓÐÌìÈ»µÄȱÏÝ£º

  • дµÄÄǰë Shard ³ÖÐøÓÐÒ»¶¨Á¿£¨²»»áºÜ¸ß£©µÄ×ÊÔ´ÏûºÄÓÃÓÚдÈë
  • ¶ÁµÄÄǰë Shard »áÓÐ×ÊÔ´ÏûºÄÓÃÓÚͬ²½Ð´È루ÓÉÓÚ²»Óô¦Àí£¬»á±ÈÖ±½ÓдÈëµÄÇé¿ö×ÊÔ´ÏûºÄ¸üµÍ£©£¬µ«ÊǶÁÇëÇó»áµ¼ÖÂ×ÊÔ´ÏûºÄÍ»Ôö
  • ²¢·¢Ôö¼ÓʱÐÔÄܲ»Èç»ìºÏÇé¿ö£¬ÒòΪ¶Áд·ÖÀëÏ൱ÓÚ½«¶Á×ÊÔ´¿³°ë

£º»òÐí¿ÉÒÔÅäÖÃÁ½±ß Shard ×ÊÔ´²»Ò»ÖÂÀ´½â¾öÎÊÌ⣬±ÈÈçдÈëµÄ Shard ×ÊÔ´À­µÍ£¬×¨ÓÃÓÚ´¦ÀíÊý¾Ý²åÈ룻¶ÁµÄ Shard ×ÊÔ´¸ü¸ß£¬×¨ÃÅÓÃÓÚ´¦ÀíÍ»Ôö²¢·¢Á÷Á¿¡£

BufferEngine

Buffer ²¢²»ÍƼö³£¹æÒµÎñʹÓã¬Ö»ÓÐÔÚÆÈÇÐÐèÒª²éѯʵʱÐÔ+²åÈëÎÞ·¨´óÅúÁ¿Ô¤¾ÛºÏʱʹÓãº

  • ÎÞ·¨ atomic insert
  • ¼´Ê¹Ê¹Óà BufferEngine£¬Êý¾Ý²åÈëÒ²ÖÁÉÙ 1000 ÐÐÿ´Î£¬»òÕßÿÃëÖÓ×î¶à 1 ´Î²åÈë[25]

KafkaEngine+MV

¸Ã²¿·Ö´ý²¹³ä£¬Ïë¿´µÄͬѧ¿ÉÒÔÔÚÆÀÂÛÇøÌßÌß

Ô¤¾ÛºÏ

Ô¤¾ÛºÏÓÐÈýÖÖ·½·¨£¬ETL¡¢ÎﻯÊÓͼºÍͶӰ£¬ËûÃǵÄÇø±ðÈçÏÂ[26]£º


ETL MV Projections
Realtime no yes yes
How complex queries can be used to build the preaggregaton any complex very simple
Impacts the insert speed no yes yes
Are inconsistancies possible Depends on ETL. If it process the errors properly - no. yes (no transactions / atomicity) no
Lifetime of aggregation any any Same as the raw data
Requirements need external tools/scripting is a part of database schema is a part of table schema
How complex to use in queries Depends on aggregation, usually simple, quering a separate table Depends on aggregation, sometimes quite complex, quering a separate table Very simple, quering the main table
Can work correctly with ReplacingMergeTree as a source Yes No No
Can work correctly with CollapsingMergeTree as a source Yes For simple aggregations For simple aggregations
Can be chained Yes (Usually with DAGs / special scripts) Yes (but may be not straightforward, and often is a bad idea) No
Resources needed to calculate the increment May be signigicant Usually tiny Usually tiny

ÔÚÎÒÃÇÒµÎñÖУ¬¸öÀýÊDz»Ó¦¸ÃÔ¤¾ÛºÏµÄ£¬ÒòΪÊý¾ÝÐèÒª±»À­È¡Õ¹Ê¾¶ø²»ÓüÆËã¡£Ö¸±êÐèÒª¾ÛºÏ£¬Êý¾ÝÁ¿½Ï´ó£¬Ã¿´Îʵʱ¼ÆËã¶Ô ClickHouse ¸ºÔØÌ«´ó¡£

Æäʵ»¹ÓÐÒ»Ö־ۺϷ½Ê½£¬¹ýÆÚÊý¾Ý¾ÛºÏ¡£¿ÉÒԲο¼£¬Í¬ÑùÏÞÖÆÒªÇó group by µÄ¼üֵΪÖ÷¼üǰ׺¡£

ÔÚÎÒÃÇÒµÎñʹÓÃʱ£¬Ê²Ã´Ê±ºòÓÃÄÄÒ»¸öÄØ£¿

  1. ÐèÒªÕë¶Ôij¸ö¹¦ÄܼÓËÙʱ£¬¿ÉÒÔ¿¼ÂÇÎﻯÊÓͼ/ͶӰ
  2. È«±íÔ¤¾ÛºÏ¼ÓËÙ²éѯ£¬ÐèҪʹÓà ETL

×ÊÔ´¿ØÖÆ

×îºó£¬ÎªÁ˱ÜÃ⼯Ⱥ±»Ä³¸ö²éѯ¡¢²åÈëŪ¿å£¬ÐèÒªºÏÀí°²ÅÅÄÚ´æÊ¹Óã¬ÐèÒª¸ø·ÃÎÊÕË»§·ÖȨÏÞ£¬ÔÚÎÒÃÇÒµÎñ·ÖΪ£º

  • default£º×î¸ß¼¶Õ˺ţ¬²»Ê¹ÓÃ
  • root£ºÊý¾Ý²åÈ룬ÅäÖþۺÏдÈ벿·ÖµÄ¼¸¸öÅäÖÃÏî
  • monitor£ºÄÚ²¿¿ª·¢Ê¹Óã¬È¨Ï޽ϸß
  • viewer£ºweb ʹÓã¬Ìí¼Ó´óÁ¿ÏÞÖÆ

viewerÕË»§ÅäÖÃÈçÏÂËùʾ£º

<yandex>
<profiles>
<query>
<max_memory_usage>10000000000</max_memory_usage>
<max_memory_usage_for_all_queries>100000000000</max_memory_usage_for_all_queries>
<max_rows_to_read>1000000000</max_rows_to_read>
<max_bytes_to_read>100000000000</max_bytes_to_read>
<max_rows_to_group_by>1000000</max_rows_to_group_by>
<group_by_overflow_mode>any</group_by_overflow_mode>
<max_rows_to_sort>1000000</max_rows_to_sort>
<max_bytes_to_sort>1000000000</max_bytes_to_sort>
<max_result_rows>100000</max_result_rows>
<max_result_bytes>100000000</max_result_bytes>
<result_overflow_mode>break</result_overflow_mode>
<max_execution_time>60</max_eecution_time>
<min_execution_speed>1000000</min_execution_speed>
<timeout_before_checking_execution_speed>15</timeout_before_checking_execution_speed>
<max_columns_to_read>25</max_columns_to_read>
<max_temporary_columns>100</max_temporary_columns>
<max_temporary_non_const_columns>50</max_temporary_non_const_columns>
<max_subquery_depth>2</max_subquery_depth>
<max_pipeline_depth>25</max_pipeline_depth>
<max_ast_depth>50</max_ast_depth>
<max_ast_elements>100</max_ast_elements>
<readonly>1</readonly>
</query>
</profiles>
</yandex>

ͬʱ½¨ÒéÉèÖà quota£¬¼õÉÙ´óÁ¿¶ÁÅ̼ÆËã¡¢LIMIT ÉÙÁ¿Êý¾Ý·µ»ØµÄÇé¿ö·¢Éú¡£


ÎÒÃÇÊÇ CSIG ÐÔÄܹ¤³Ì¶þ×é QAPM ÍŶӣ¬QAPM ʱһ¿îÓ¦ÓÃÐÔÄÜ¼à¿Ø¹¤¾ß£¬¸²¸Ç android¡¢ios¡¢Ð¡³ÌÐò¡¢mac ºÍ win ¶à¶Ë£¬ÒÑÓÐÌÚѶ»áÒé¡¢ÓÅÒ¿âµÈ´óÓû§½ÓÈ룬ֵµÃÐÅÀµ£¬»¶Ó­Í¬ÊÂÊÔÓÃÎÒÃÇ QAPM ²úÆ·¡«Ìø×ªÁ´½Ó

ÔÚ ClickHouse ÓÅ»¯¹ý³ÌÓöµ½ÎÞÊýµÄÎÊÌ⣬¿¨ÔÚ ClickHouse ×ÔÉí¼à¿ØÎÞ·¨¸²¸ÇµÄ½ÇÂäʱ£¬È«¿¿ÐÔÄܹ¤³ÌÈý×éÔ±¹¤µÄ Drop£¨ÓêµÎ£©¹¤¾ßµÄ¶¦Á¦ÏàÖú£¬¸ßЧֱ¹Û¼à¿Ø CVM ¸÷ÏîÖ¸±ê£¬½