## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsimportcollections.abcimportcontextlibimporthashlibimportloggingimportmathimportoperatorimportosimportsignalimportwarningsfromcollectionsimportdefaultdictfromdatetimeimportdatetime,timedeltafromfunctoolsimportpartialfromtypesimportTracebackTypefromtypingimport(TYPE_CHECKING,Any,Callable,Collection,ContextManager,Generator,Iterable,NamedTuple,Tuple,)fromurllib.parseimportquoteimportattrimportdillimportjinja2importlazy_object_proxyimportpendulumfromjinja2importTemplateAssertionError,UndefinedErrorfromsqlalchemyimport(Column,DateTime,Float,ForeignKeyConstraint,Index,Integer,PrimaryKeyConstraint,String,and_,false,func,inspect,or_,text,)fromsqlalchemy.ext.associationproxyimportassociation_proxyfromsqlalchemy.ext.mutableimportMutableDictfromsqlalchemy.ormimportreconstructor,relationshipfromsqlalchemy.orm.attributesimportNO_VALUE,set_committed_valuefromsqlalchemy.orm.excimportNoResultFoundfromsqlalchemy.orm.queryimportQueryfromsqlalchemy.orm.sessionimportSessionfromsqlalchemy.sql.elementsimportBooleanClauseListfromsqlalchemy.sql.expressionimportColumnOperatorsfromairflowimportsettingsfromairflow.compat.functoolsimportcachefromairflow.configurationimportconffromairflow.datasetsimportDatasetfromairflow.datasets.managerimportdataset_managerfromairflow.exceptionsimport(AirflowException,AirflowFailException,AirflowRescheduleException,AirflowSensorTimeout,AirflowSkipException,AirflowTaskTimeout,DagRunNotFound,RemovedInAirflow3Warning,TaskDeferralError,TaskDeferred,UnmappableXComLengthPushed,UnmappableXComTypePushed,XComForMappingNotPushed,)fromairflow.models.baseimportBase,StringIDfromairflow.models.logimportLogfromairflow.models.paramimportParamsDictfromairflow.models.taskfailimportTaskFailfromairflow.models.taskmapimportTaskMapfromairflow.models.taskrescheduleimportTaskReschedulefromairflow.models.xcomimportXCOM_RETURN_KEY,XComfromairflow.plugins_managerimportintegrate_macros_pluginsfromairflow.sentryimportSentryfromairflow.statsimportStatsfromairflow.templatesimportSandboxedEnvironmentfromairflow.ti_deps.dep_contextimportDepContextfromairflow.ti_deps.dependencies_depsimportREQUEUEABLE_DEPS,RUNNING_DEPSfromairflow.timetables.baseimportDataIntervalfromairflow.typing_compatimportLiteralfromairflow.utilsimporttimezonefromairflow.utils.contextimportConnectionAccessor,Context,VariableAccessor,context_mergefromairflow.utils.emailimportsend_emailfromairflow.utils.helpersimportrender_template_to_stringfromairflow.utils.log.logging_mixinimportLoggingMixinfromairflow.utils.netimportget_hostnamefromairflow.utils.operator_helpersimportcontext_to_airflow_varsfromairflow.utils.platformimportgetuserfromairflow.utils.retriesimportrun_with_db_retriesfromairflow.utils.sessionimportNEW_SESSION,create_session,provide_sessionfromairflow.utils.sqlalchemyimport(ExecutorConfigType,ExtendedJSON,UtcDateTime,tuple_in_condition,with_row_locks,)fromairflow.utils.stateimportDagRunState,State,TaskInstanceStatefromairflow.utils.timeoutimporttimeout
[docs]defset_current_context(context:Context)->Generator[Context,None,None]:""" Sets the current execution context to the provided context object. This method should be called once per Task execution, before calling operator.execute. """_CURRENT_CONTEXT.append(context)try:yieldcontextfinally:expected_state=_CURRENT_CONTEXT.pop()ifexpected_state!=context:log.warning("Current context is not equal to the state at context stack. Expected=%s, got=%s",context,expected_state,
)
[docs]defclear_task_instances(tis:list[TaskInstance],session:Session,activate_dag_runs:None=None,dag:DAG|None=None,dag_run_state:DagRunState|Literal[False]=DagRunState.QUEUED,)->None:""" Clears a set of task instances, but makes sure the running ones get killed. :param tis: a list of task instances :param session: current session :param dag_run_state: state to set DagRun to. If set to False, dagrun state will not be changed. :param dag: DAG object :param activate_dag_runs: Deprecated parameter, do not pass """job_ids=[]# Keys: dag_id -> run_id -> map_indexes -> try_numbers -> task_idtask_id_by_key:dict[str,dict[str,dict[int,dict[int,set[str]]]]]=defaultdict(lambda:defaultdict(lambda:defaultdict(lambda:defaultdict(set))))fortiintis:ifti.state==TaskInstanceState.RUNNING:ifti.job_id:# If a task is cleared when running, set its state to RESTARTING so that# the task is terminated and becomes eligible for retry.ti.state=TaskInstanceState.RESTARTINGjob_ids.append(ti.job_id)else:task_id=ti.task_idifdaganddag.has_task(task_id):task=dag.get_task(task_id)ti.refresh_from_task(task)task_retries=task.retriesti.max_tries=ti.try_number+task_retries-1else:# Ignore errors when updating max_tries if dag is None or# task not found in dag since database records could be# outdated. We make max_tries the maximum value of its# original max_tries or the last attempted try number.ti.max_tries=max(ti.max_tries,ti.prev_attempted_tries)ti.state=Noneti.external_executor_id=Noneti.clear_next_method_args()session.merge(ti)task_id_by_key[ti.dag_id][ti.run_id][ti.map_index][ti.try_number].add(ti.task_id)iftask_id_by_key:# Clear all reschedules related to the ti to clear# This is an optimization for the common case where all tis are for a small number# of dag_id, run_id, try_number, and map_index. Use a nested dict of dag_id,# run_id, try_number, map_index, and task_id to construct the where clause in a# hierarchical manner. This speeds up the delete statement by more than 40x for# large number of tis (50k+).conditions=or_(and_(TR.dag_id==dag_id,or_(and_(TR.run_id==run_id,or_(and_(TR.map_index==map_index,or_(and_(TR.try_number==try_number,TR.task_id.in_(task_ids))fortry_number,task_idsintask_tries.items()),)formap_index,task_triesinmap_indexes.items()),)forrun_id,map_indexesinrun_ids.items()),)fordag_id,run_idsintask_id_by_key.items())delete_qry=TR.__table__.delete().where(conditions)session.execute(delete_qry)ifjob_ids:fromairflow.jobs.base_jobimportBaseJobforjobinsession.query(BaseJob).filter(BaseJob.id.in_(job_ids)).all():job.state=TaskInstanceState.RESTARTINGifactivate_dag_runsisnotNone:warnings.warn("`activate_dag_runs` parameter to clear_task_instances function is deprecated. ""Please use `dag_run_state`",RemovedInAirflow3Warning,stacklevel=2,)ifnotactivate_dag_runs:dag_run_state=Falseifdag_run_stateisnotFalseandtis:fromairflow.models.dagrunimportDagRun# Avoid circular importrun_ids_by_dag_id=defaultdict(set)forinstanceintis:run_ids_by_dag_id[instance.dag_id].add(instance.run_id)drs=(session.query(DagRun).filter(or_(and_(DagRun.dag_id==dag_id,DagRun.run_id.in_(run_ids))fordag_id,run_idsinrun_ids_by_dag_id.items())).all())dag_run_state=DagRunState(dag_run_state)# Validate the state value.fordrindrs:dr.state=dag_run_statedr.start_date=timezone.utcnow()ifdag_run_state==DagRunState.QUEUED:dr.last_scheduling_decision=Nonedr.start_date=Nonesession.flush()
class_LazyXComAccessIterator(collections.abc.Iterator):__slots__=['_cm','_it']def__init__(self,cm:ContextManager[Query]):self._cm=cmself._it=Nonedef__del__(self):ifself._it:self._cm.__exit__(None,None,None)def__iter__(self):returnselfdef__next__(self):ifnotself._it:self._it=iter(self._cm.__enter__())returnXCom.deserialize_value(next(self._it))@attr.defineclass_LazyXComAccess(collections.abc.Sequence):"""Wrapper to lazily pull XCom with a sequence-like interface. Note that since the session bound to the parent query may have died when we actually access the sequence's content, we must create a new session for every function call with ``with_session()``. """dag_id:strrun_id:strtask_id:str_query:Query=attr.ib(repr=False)_len:int|None=attr.ib(init=False,repr=False,default=None)@classmethoddefbuild_from_single_xcom(cls,first:XCom,query:Query)->_LazyXComAccess:returncls(dag_id=first.dag_id,run_id=first.run_id,task_id=first.task_id,query=query.with_entities(XCom.value).filter(XCom.run_id==first.run_id,XCom.task_id==first.task_id,XCom.dag_id==first.dag_id,XCom.map_index>=0,).order_by(None).order_by(XCom.map_index.asc()),)def__len__(self):ifself._lenisNone:withself._get_bound_query()asquery:self._len=query.count()returnself._lendef__iter__(self):return_LazyXComAccessIterator(self._get_bound_query())def__getitem__(self,key):ifnotisinstance(key,int):raiseValueError("only support index access for now")try:withself._get_bound_query()asquery:r=query.offset(key).limit(1).one()exceptNoResultFound:raiseIndexError(key)fromNonereturnXCom.deserialize_value(r)@contextlib.contextmanagerdef_get_bound_query(self)->Generator[Query,None,None]:# Do we have a valid session already?ifself._query.sessionandself._query.session.is_active:yieldself._queryreturnsession=settings.Session()try:yieldself._query.with_session(session)finally:session.close()
[docs]classTaskInstanceKey(NamedTuple):"""Key used to identify task instance."""
[docs]defprimary(self)->tuple[str,str,str,int]:"""Return task instance primary key part of the key"""returnself.dag_id,self.task_id,self.run_id,self.map_index
@property
[docs]defreduced(self)->TaskInstanceKey:"""Remake the key by subtracting 1 from try number to match in memory information"""returnTaskInstanceKey(self.dag_id,self.task_id,self.run_id,max(1,self.try_number-1),self.map_index
)
[docs]defwith_try_number(self,try_number:int)->TaskInstanceKey:"""Returns TaskInstanceKey with provided ``try_number``"""returnTaskInstanceKey(self.dag_id,self.task_id,self.run_id,try_number,self.map_index)
@property
[docs]defkey(self)->TaskInstanceKey:"""For API-compatibly with TaskInstance. Returns self """returnself
[docs]classTaskInstance(Base,LoggingMixin):""" Task instances store the state of a task instance. This table is the authority and single source of truth around what tasks have run and the state they are in. The SqlAlchemy model doesn't have a SqlAlchemy foreign key to the task or dag model deliberately to have more control over transactions. Database transactions on this table should insure double triggers and any confusion around what task instances are or aren't ready to run even while multiple schedulers may be firing task instances. A value of -1 in map_index represents any of: a TI without mapped tasks; a TI with mapped tasks that has yet to be expanded (state=pending); a TI with mapped tasks that expanded to an empty list (state=skipped). """
# The trigger_timeout should be TIMESTAMP(using UtcDateTime) but for ease of# migration, we are keeping it as DateTime pending a change where expensive# migration is inevitable.# The method to call next, and any extra arguments to pass to it.# Usually used when resuming from DEFERRED.
def__init__(self,task:Operator,execution_date:datetime|None=None,run_id:str|None=None,state:str|None=None,map_index:int=-1,):super().__init__()self.dag_id=task.dag_idself.task_id=task.task_idself.map_index=map_indexself.refresh_from_task(task)# init_on_load will config the logself.init_on_load()ifrun_idisNoneandexecution_dateisnotNone:fromairflow.models.dagrunimportDagRun# Avoid circular importwarnings.warn("Passing an execution_date to `TaskInstance()` is deprecated in favour of passing a run_id",RemovedInAirflow3Warning,# Stack level is 4 because SQLA adds some wrappers around the constructorstacklevel=4,)# make sure we have a localized execution_date stored in UTCifexecution_dateandnottimezone.is_localized(execution_date):self.log.warning("execution date %s has no timezone information. Using default from dag or system",execution_date,)ifself.task.has_dag():ifTYPE_CHECKING:assertself.task.dagexecution_date=timezone.make_aware(execution_date,self.task.dag.timezone)else:execution_date=timezone.make_aware(execution_date)execution_date=timezone.convert_to_utc(execution_date)withcreate_session()assession:run_id=(session.query(DagRun.run_id).filter_by(dag_id=self.dag_id,execution_date=execution_date).scalar())ifrun_idisNone:raiseDagRunNotFound(f"DagRun for {self.dag_id!r} with date {execution_date} not found")fromNoneself.run_id=run_idself.try_number=0self.max_tries=self.task.retriesself.unixname=getuser()ifstate:self.state=stateself.hostname=''# Is this TaskInstance being currently running within `airflow tasks run --raw`.# Not persisted to the database so only valid for the current processself.raw=False# can be changed when calling 'run'self.test_mode=False@staticmethod
[docs]definit_on_load(self)->None:"""Initialize the attributes that aren't stored in the DB"""# correctly config the ti logself._log=logging.getLogger("airflow.task")self.test_mode=False# can be changed when calling 'run'
@property
[docs]deftry_number(self):""" Return the try number that this task number will be when it is actually run. If the TaskInstance is currently running, this will match the column in the database, in all other cases this will be incremented. """# This is designed so that task logs end up in the right file.ifself.stateinState.running:returnself._try_numberreturnself._try_number+1
[docs]defprev_attempted_tries(self)->int:""" Based on this instance's try_number, this will calculate the number of previously attempted tries, defaulting to 0. """# Expose this for the Task Tries and Gantt graph views.# Using `try_number` throws off the counts for non-running tasks.# Also useful in error logging contexts to get# the try number for the last try that was attempted.# https://issues.apache.org/jira/browse/AIRFLOW-2143returnself._try_number
[docs]defcommand_as_list(self,mark_success=False,ignore_all_deps=False,ignore_task_deps=False,ignore_depends_on_past=False,ignore_ti_state=False,local=False,pickle_id=None,raw=False,job_id=None,pool=None,cfg_path=None,):""" Returns a command that can be executed anywhere where airflow is installed. This command is part of the message sent to executors by the orchestrator. """dag:DAG|DagModel# Use the dag if we have it, else fallback to the ORM dag_model, which might not be loadedifhasattr(self,'task')andhasattr(self.task,'dag'):dag=self.task.dagelse:dag=self.dag_modelshould_pass_filepath=notpickle_idanddagpath=Noneifshould_pass_filepath:ifdag.is_subdag:path=dag.parent_dag.relative_filelocelse:path=dag.relative_filelocifpath:ifnotpath.is_absolute():path='DAGS_FOLDER'/pathpath=str(path)returnTaskInstance.generate_command(self.dag_id,self.task_id,run_id=self.run_id,mark_success=mark_success,ignore_all_deps=ignore_all_deps,ignore_task_deps=ignore_task_deps,ignore_depends_on_past=ignore_depends_on_past,ignore_ti_state=ignore_ti_state,local=local,pickle_id=pickle_id,file_path=path,raw=raw,job_id=job_id,pool=pool,cfg_path=cfg_path,map_index=self.map_index,
)@staticmethod
[docs]defgenerate_command(dag_id:str,task_id:str,run_id:str,mark_success:bool=False,ignore_all_deps:bool=False,ignore_depends_on_past:bool=False,ignore_task_deps:bool=False,ignore_ti_state:bool=False,local:bool=False,pickle_id:int|None=None,file_path:str|None=None,raw:bool=False,job_id:str|None=None,pool:str|None=None,cfg_path:str|None=None,map_index:int=-1,)->list[str]:""" Generates the shell command required to execute this task instance. :param dag_id: DAG ID :param task_id: Task ID :param run_id: The run_id of this task's DagRun :param mark_success: Whether to mark the task as successful :param ignore_all_deps: Ignore all ignorable dependencies. Overrides the other ignore_* parameters. :param ignore_depends_on_past: Ignore depends_on_past parameter of DAGs (e.g. for Backfills) :param ignore_task_deps: Ignore task-specific dependencies such as depends_on_past and trigger rule :param ignore_ti_state: Ignore the task instance's previous failure/success :param local: Whether to run the task locally :param pickle_id: If the DAG was serialized to the DB, the ID associated with the pickled DAG :param file_path: path to the file containing the DAG definition :param raw: raw mode (needs more details) :param job_id: job ID (needs more details) :param pool: the Airflow pool that the task should run in :param cfg_path: the Path to the configuration file :return: shell command that can be used to run the task instance :rtype: list[str] """cmd=["airflow","tasks","run",dag_id,task_id,run_id]ifmark_success:cmd.extend(["--mark-success"])ifpickle_id:cmd.extend(["--pickle",str(pickle_id)])ifjob_id:cmd.extend(["--job-id",str(job_id)])ifignore_all_deps:cmd.extend(["--ignore-all-dependencies"])ifignore_task_deps:cmd.extend(["--ignore-dependencies"])ifignore_depends_on_past:cmd.extend(["--ignore-depends-on-past"])ifignore_ti_state:cmd.extend(["--force"])iflocal:cmd.extend(["--local"])ifpool:cmd.extend(["--pool",pool])ifraw:cmd.extend(["--raw"])iffile_path:cmd.extend(["--subdir",file_path])ifcfg_path:cmd.extend(["--cfg-path",cfg_path])ifmap_index!=-1:cmd.extend(['--map-index',str(map_index)])returncmd
@property
[docs]deflog_url(self)->str:"""Log URL for TaskInstance"""iso=quote(self.execution_date.isoformat())base_url=conf.get_mandatory_value('webserver','BASE_URL')return(f"{base_url}/log"
[docs]defcurrent_state(self,session:Session=NEW_SESSION)->str:""" Get the very latest state from the database, if a session is passed, we use and looking up the state becomes part of the session, otherwise a new session is used. :param session: SQLAlchemy ORM Session """return(session.query(TaskInstance.state).filter(TaskInstance.dag_id==self.dag_id,TaskInstance.task_id==self.task_id,TaskInstance.run_id==self.run_id,
).scalar())@provide_session
[docs]deferror(self,session:Session=NEW_SESSION)->None:""" Forces the task instance's state to FAILED in the database. :param session: SQLAlchemy ORM Session """self.log.error("Recording the task instance as FAILED")self.state=State.FAILEDsession.merge(self)session.commit()
@provide_session
[docs]defrefresh_from_db(self,session:Session=NEW_SESSION,lock_for_update:bool=False)->None:""" Refreshes the task instance from the database based on the primary key :param session: SQLAlchemy ORM Session :param lock_for_update: if True, indicates that the database should lock the TaskInstance (issuing a FOR UPDATE clause) until the session is committed. """self.log.debug("Refreshing TaskInstance %s from DB",self)ifselfinsession:session.refresh(self,TaskInstance.__mapper__.column_attrs.keys())qry=(# To avoid joining any relationships, by default select all# columns, not the object. This also means we get (effectively) a# namedtuple back, not a TI objectsession.query(*TaskInstance.__table__.columns).filter(TaskInstance.dag_id==self.dag_id,TaskInstance.task_id==self.task_id,TaskInstance.run_id==self.run_id,TaskInstance.map_index==self.map_index,))iflock_for_update:forattemptinrun_with_db_retries(logger=self.log):withattempt:ti:TaskInstance|None=qry.with_for_update().one_or_none()else:ti=qry.one_or_none()ifti:# Fields ordered per model definitionself.start_date=ti.start_dateself.end_date=ti.end_dateself.duration=ti.durationself.state=ti.state# Since we selected columns, not the object, this is the raw valueself.try_number=ti.try_numberself.max_tries=ti.max_triesself.hostname=ti.hostnameself.unixname=ti.unixnameself.job_id=ti.job_idself.pool=ti.poolself.pool_slots=ti.pool_slotsor1self.queue=ti.queueself.priority_weight=ti.priority_weightself.operator=ti.operatorself.queued_dttm=ti.queued_dttmself.queued_by_job_id=ti.queued_by_job_idself.pid=ti.pidself.executor_config=ti.executor_configself.external_executor_id=ti.external_executor_idself.trigger_id=ti.trigger_idself.next_method=ti.next_methodself.next_kwargs=ti.next_kwargselse:self.state=None
[docs]defrefresh_from_task(self,task:Operator,pool_override:str|None=None)->None:""" Copy common attributes from the given task. :param task: The task object to copy from :param pool_override: Use the pool_override instead of task's pool """self.task=taskself.queue=task.queueself.pool=pool_overrideortask.poolself.pool_slots=task.pool_slotsself.priority_weight=task.priority_weight_totalself.run_as_user=task.run_as_user# Do not set max_tries to task.retries here because max_tries is a cumulative# value that needs to be stored in the db.self.executor_config=task.executor_configself.operator=task.task_type
@provide_session
[docs]defclear_xcom_data(self,session:Session=NEW_SESSION)->None:"""Clear all XCom data from the database for the task instance. If the task is unmapped, all XComs matching this task ID in the same DAG run are removed. If the task is mapped, only the one with matching map index is removed. :param session: SQLAlchemy ORM Session """self.log.debug("Clearing XCom data")ifself.map_index<0:map_index:int|None=Noneelse:map_index=self.map_indexXCom.clear(dag_id=self.dag_id,task_id=self.task_id,run_id=self.run_id,map_index=map_index,session=session,
)@property
[docs]defkey(self)->TaskInstanceKey:"""Returns a tuple that identifies the task instance uniquely"""returnTaskInstanceKey(self.dag_id,self.task_id,self.run_id,self.try_number,self.map_index)
@provide_session
[docs]defset_state(self,state:str|None,session:Session=NEW_SESSION)->bool:""" Set TaskInstance state. :param state: State to set for the TI :param session: SQLAlchemy ORM Session :return: Was the state changed """ifself.state==state:returnFalsecurrent_time=timezone.utcnow()self.log.debug("Setting task state for %s to %s",self,state)self.state=stateself.start_date=self.start_dateorcurrent_timeifself.stateinState.finishedorself.state==State.UP_FOR_RETRY:self.end_date=self.end_dateorcurrent_timeself.duration=(self.end_date-self.start_date).total_seconds()session.merge(self)returnTrue
@property
[docs]defis_premature(self)->bool:""" Returns whether a task is in UP_FOR_RETRY state and its retry interval has elapsed. """# is the task still in the retry waiting period?returnself.state==State.UP_FOR_RETRYandnotself.ready_for_retry()
@provide_session
[docs]defare_dependents_done(self,session:Session=NEW_SESSION)->bool:""" Checks whether the immediate dependents of this task instance have succeeded or have been skipped. This is meant to be used by wait_for_downstream. This is useful when you do not want to start processing the next schedule of a task until the dependents are done. For instance, if the task DROPs and recreates a table. :param session: SQLAlchemy ORM Session """task=self.taskifnottask.downstream_task_ids:returnTrueti=session.query(func.count(TaskInstance.task_id)).filter(TaskInstance.dag_id==self.dag_id,TaskInstance.task_id.in_(task.downstream_task_ids),TaskInstance.run_id==self.run_id,TaskInstance.state.in_([State.SKIPPED,State.SUCCESS]),)count=ti[0][0]returncount==len(task.downstream_task_ids)
@provide_session
[docs]defget_previous_dagrun(self,state:DagRunState|None=None,session:Session|None=None,)->DagRun|None:"""The DagRun that ran before this task instance's DagRun. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session. """dag=self.task.dagifdagisNone:returnNonedr=self.get_dagrun(session=session)dr.dag=dag# We always ignore schedule in dagrun lookup when `state` is given# or the DAG is never scheduled. For legacy reasons, when# `catchup=True`, we use `get_previous_scheduled_dagrun` unless# `ignore_schedule` is `True`.ignore_schedule=stateisnotNoneornotdag.timetable.can_runifdag.catchupisTrueandnotignore_schedule:last_dagrun=dr.get_previous_scheduled_dagrun(session=session)else:last_dagrun=dr.get_previous_dagrun(session=session,state=state)iflast_dagrun:returnlast_dagrunreturnNone
@provide_session
[docs]defget_previous_ti(self,state:DagRunState|None=None,session:Session=NEW_SESSION,)->TaskInstance|None:""" The task instance for the task that ran before this task instance. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session """dagrun=self.get_previous_dagrun(state,session=session)ifdagrunisNone:returnNonereturndagrun.get_task_instance(self.task_id,session=session)
@property
[docs]defprevious_ti(self)->TaskInstance|None:""" This attribute is deprecated. Please use `airflow.models.taskinstance.TaskInstance.get_previous_ti` method. """warnings.warn(""" This attribute is deprecated. Please use `airflow.models.taskinstance.TaskInstance.get_previous_ti` method. """,RemovedInAirflow3Warning,stacklevel=2,)returnself.get_previous_ti()
@property
[docs]defprevious_ti_success(self)->TaskInstance|None:""" This attribute is deprecated. Please use `airflow.models.taskinstance.TaskInstance.get_previous_ti` method. """warnings.warn(""" This attribute is deprecated. Please use `airflow.models.taskinstance.TaskInstance.get_previous_ti` method. """,RemovedInAirflow3Warning,stacklevel=2,)returnself.get_previous_ti(state=DagRunState.SUCCESS)
@provide_session
[docs]defget_previous_execution_date(self,state:DagRunState|None=None,session:Session=NEW_SESSION,)->pendulum.DateTime|None:""" The execution date from property previous_ti_success. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session """self.log.debug("previous_execution_date was called")prev_ti=self.get_previous_ti(state=state,session=session)returnprev_tiandpendulum.instance(prev_ti.execution_date)
@provide_session
[docs]defget_previous_start_date(self,state:DagRunState|None=None,session:Session=NEW_SESSION)->pendulum.DateTime|None:""" The start date from property previous_ti_success. :param state: If passed, it only take into account instances of a specific state. :param session: SQLAlchemy ORM Session """self.log.debug("previous_start_date was called")prev_ti=self.get_previous_ti(state=state,session=session)# prev_ti may not exist and prev_ti.start_date may be None.returnprev_tiandprev_ti.start_dateandpendulum.instance(prev_ti.start_date)
@property
[docs]defprevious_start_date_success(self)->pendulum.DateTime|None:""" This attribute is deprecated. Please use `airflow.models.taskinstance.TaskInstance.get_previous_start_date` method. """warnings.warn(""" This attribute is deprecated. Please use `airflow.models.taskinstance.TaskInstance.get_previous_start_date` method. """,RemovedInAirflow3Warning,stacklevel=2,)returnself.get_previous_start_date(state=DagRunState.SUCCESS)
@provide_session
[docs]defare_dependencies_met(self,dep_context:DepContext|None=None,session:Session=NEW_SESSION,verbose:bool=False)->bool:""" Returns whether or not all the conditions are met for this task instance to be run given the context for the dependencies (e.g. a task instance being force run from the UI will ignore some dependencies). :param dep_context: The execution context that determines the dependencies that should be evaluated. :param session: database session :param verbose: whether log details on failed dependencies on info or debug log level """dep_context=dep_contextorDepContext()failed=Falseverbose_aware_logger=self.log.infoifverboseelseself.log.debugfordep_statusinself.get_failed_dep_statuses(dep_context=dep_context,session=session):failed=Trueverbose_aware_logger("Dependencies not met for %s, dependency '%s' FAILED: %s",self,dep_status.dep_name,dep_status.reason,)iffailed:returnFalseverbose_aware_logger("Dependencies all met for %s",self)returnTrue
[docs]defnext_retry_datetime(self):""" Get datetime of the next retry if the task instance fails. For exponential backoff, retry_delay is used as base and will be converted to seconds. """delay=self.task.retry_delayifself.task.retry_exponential_backoff:# If the min_backoff calculation is below 1, it will be converted to 0 via int. Thus,# we must round up prior to converting to an int, otherwise a divide by zero error# will occur in the modded_hash calculation.min_backoff=int(math.ceil(delay.total_seconds()*(2**(self.try_number-2))))# In the case when delay.total_seconds() is 0, min_backoff will not be rounded up to 1.# To address this, we impose a lower bound of 1 on min_backoff. This effectively makes# the ceiling function unnecessary, but the ceiling function was retained to avoid# introducing a breaking change.ifmin_backoff<1:min_backoff=1# deterministic per task instanceti_hash=int(hashlib.sha1(f"{self.dag_id}#{self.task_id}#{self.execution_date}#{self.try_number}".encode()).hexdigest(),16,)# between 1 and 1.0 * delay * (2^retry_number)modded_hash=min_backoff+ti_hash%min_backoff# timedelta has a maximum representable value. The exponentiation# here means this value can be exceeded after a certain number# of tries (around 50 if the initial delay is 1s, even fewer if# the delay is larger). Cap the value here before creating a# timedelta object so the operation doesn't fail.delay_backoff_in_seconds=min(modded_hash,timedelta.max.total_seconds()-1)delay=timedelta(seconds=delay_backoff_in_seconds)ifself.task.max_retry_delay:delay=min(self.task.max_retry_delay,delay)returnself.end_date+delay
[docs]defready_for_retry(self)->bool:""" Checks on whether the task instance is in the right state and timeframe to be retried. """returnself.state==State.UP_FOR_RETRYandself.next_retry_datetime()<timezone.utcnow()
@provide_session
[docs]defget_dagrun(self,session:Session=NEW_SESSION)->DagRun:""" Returns the DagRun for this TaskInstance :param session: SQLAlchemy ORM Session :return: DagRun """info=inspect(self)ifinfo.attrs.dag_run.loaded_valueisnotNO_VALUE:returnself.dag_runfromairflow.models.dagrunimportDagRun# Avoid circular importdr=session.query(DagRun).filter(DagRun.dag_id==self.dag_id,DagRun.run_id==self.run_id).one()# Record it in the instance for next time. This means that `self.execution_date` will work correctlyset_committed_value(self,'dag_run',dr)returndr
@provide_session
[docs]defcheck_and_change_state_before_execution(self,verbose:bool=True,ignore_all_deps:bool=False,ignore_depends_on_past:bool=False,ignore_task_deps:bool=False,ignore_ti_state:bool=False,mark_success:bool=False,test_mode:bool=False,job_id:str|None=None,pool:str|None=None,external_executor_id:str|None=None,session:Session=NEW_SESSION,)->bool:""" Checks dependencies and then sets state to RUNNING if they are met. Returns True if and only if state is set to RUNNING, which implies that task should be executed, in preparation for _run_raw_task :param verbose: whether to turn on more verbose logging :param ignore_all_deps: Ignore all of the non-critical dependencies, just runs :param ignore_depends_on_past: Ignore depends_on_past DAG attribute :param ignore_task_deps: Don't check the dependencies of this TaskInstance's task :param ignore_ti_state: Disregards previous task instance state :param mark_success: Don't run the task, mark its state as success :param test_mode: Doesn't record success or failure in the DB :param job_id: Job (BackfillJob / LocalTaskJob / SchedulerJob) ID :param pool: specifies the pool to use to run the task instance :param external_executor_id: The identifier of the celery executor :param session: SQLAlchemy ORM Session :return: whether the state was changed to running or not :rtype: bool """task=self.taskself.refresh_from_task(task,pool_override=pool)self.test_mode=test_modeself.refresh_from_db(session=session,lock_for_update=True)self.job_id=job_idself.hostname=get_hostname()self.pid=Noneifnotignore_all_depsandnotignore_ti_stateandself.state==State.SUCCESS:Stats.incr('previously_succeeded',1,1)# TODO: Logging needs cleanup, not clear what is being printedhr_line_break="\n"+("-"*80)# Line breakifnotmark_success:# Firstly find non-runnable and non-requeueable tis.# Since mark_success is not set, we do nothing.non_requeueable_dep_context=DepContext(deps=RUNNING_DEPS-REQUEUEABLE_DEPS,ignore_all_deps=ignore_all_deps,ignore_ti_state=ignore_ti_state,ignore_depends_on_past=ignore_depends_on_past,ignore_task_deps=ignore_task_deps,)ifnotself.are_dependencies_met(dep_context=non_requeueable_dep_context,session=session,verbose=True):session.commit()returnFalse# For reporting purposes, we report based on 1-indexed,# not 0-indexed lists (i.e. Attempt 1 instead of# Attempt 0 for the first attempt).# Set the task start date. In case it was re-scheduled use the initial# start date that is recorded in task_reschedule table# If the task continues after being deferred (next_method is set), use the original start_dateself.start_date=self.start_dateifself.next_methodelsetimezone.utcnow()ifself.state==State.UP_FOR_RESCHEDULE:task_reschedule:TR=TR.query_for_task_instance(self,session=session).first()iftask_reschedule:self.start_date=task_reschedule.start_date# Secondly we find non-runnable but requeueable tis. We reset its state.# This is because we might have hit concurrency limits,# e.g. because of backfilling.dep_context=DepContext(deps=REQUEUEABLE_DEPS,ignore_all_deps=ignore_all_deps,ignore_depends_on_past=ignore_depends_on_past,ignore_task_deps=ignore_task_deps,ignore_ti_state=ignore_ti_state,)ifnotself.are_dependencies_met(dep_context=dep_context,session=session,verbose=True):self.state=State.NONEself.log.warning(hr_line_break)self.log.warning("Rescheduling due to concurrency limits reached ""at task runtime. Attempt %s of ""%s. State set to NONE.",self.try_number,self.max_tries+1,)self.log.warning(hr_line_break)self.queued_dttm=timezone.utcnow()session.merge(self)session.commit()returnFalse# print status messageself.log.info(hr_line_break)self.log.info("Starting attempt %s of %s",self.try_number,self.max_tries+1)self.log.info(hr_line_break)self._try_number+=1ifnottest_mode:session.add(Log(State.RUNNING,self))self.state=State.RUNNINGself.external_executor_id=external_executor_idself.end_date=Noneifnottest_mode:session.merge(self).task=tasksession.commit()# Closing all pooled connections to prevent# "max number of connections reached"settings.engine.dispose()# type: ignoreifverbose:ifmark_success:self.log.info("Marking success for %s on %s",self.task,self.execution_date)else:self.log.info("Executing %s on %s",self.task,self.execution_date)returnTrue
def_date_or_empty(self,attr:str)->str:result:datetime|None=getattr(self,attr,None)returnresult.strftime('%Y%m%dT%H%M%S')ifresultelse''def_log_state(self,lead_msg:str='')->None:params=[lead_msg,str(self.state).upper(),self.dag_id,self.task_id,]message='%sMarking task as %s. dag_id=%s, task_id=%s, 'ifself.map_index>=0:params.append(self.map_index)message+='map_index=%d, 'self.log.info(message+'execution_date=%s, start_date=%s, end_date=%s',*params,self._date_or_empty('execution_date'),self._date_or_empty('start_date'),self._date_or_empty('end_date'),)# Ensure we unset next_method and next_kwargs to ensure that any# retries don't re-use them.
[docs]defclear_next_method_args(self)->None:self.log.debug("Clearing next_method and next_kwargs.")self.next_method=Noneself.next_kwargs=None
@provide_session@Sentry.enrich_errorsdef_run_raw_task(self,mark_success:bool=False,test_mode:bool=False,job_id:str|None=None,pool:str|None=None,session:Session=NEW_SESSION,)->None:""" Immediately runs the task (without checking or changing db state before execution) and then sets the appropriate final state after completion and runs any post-execute callbacks. Meant to be called only after another function changes the state to running. :param mark_success: Don't run the task, mark its state as success :param test_mode: Doesn't record success or failure in the DB :param pool: specifies the pool to use to run the task instance :param session: SQLAlchemy ORM Session """self.test_mode=test_modeself.refresh_from_task(self.task,pool_override=pool)self.refresh_from_db(session=session)self.job_id=job_idself.hostname=get_hostname()self.pid=os.getpid()ifnottest_mode:session.merge(self)session.commit()actual_start_date=timezone.utcnow()Stats.incr(f'ti.start.{self.task.dag_id}.{self.task.task_id}')# Initialize final state counters at zeroforstateinState.task_states:Stats.incr(f'ti.finish.{self.task.dag_id}.{self.task.task_id}.{state}',count=0)self.task=self.task.prepare_for_execution()context=self.get_template_context(ignore_param_exceptions=False)try:ifnotmark_success:self._execute_task_with_callbacks(context,test_mode)ifnottest_mode:self.refresh_from_db(lock_for_update=True,session=session)self.state=State.SUCCESSexceptTaskDeferredasdefer:# The task has signalled it wants to defer execution based on# a trigger.self._defer_task(defer=defer,session=session)self.log.info('Pausing task as DEFERRED. dag_id=%s, task_id=%s, execution_date=%s, start_date=%s',self.dag_id,self.task_id,self._date_or_empty('execution_date'),self._date_or_empty('start_date'),)ifnottest_mode:session.add(Log(self.state,self))session.merge(self)session.commit()returnexceptAirflowSkipExceptionase:# Recording SKIP# log only if exception has any arguments to prevent log floodingife.args:self.log.info(e)ifnottest_mode:self.refresh_from_db(lock_for_update=True,session=session)self.state=State.SKIPPEDexceptAirflowRescheduleExceptionasreschedule_exception:self._handle_reschedule(actual_start_date,reschedule_exception,test_mode,session=session)session.commit()returnexcept(AirflowFailException,AirflowSensorTimeout)ase:# If AirflowFailException is raised, task should not retry.# If a sensor in reschedule mode reaches timeout, task should not retry.self.handle_failure(e,test_mode,context,force_fail=True,session=session)session.commit()raiseexceptAirflowExceptionase:ifnottest_mode:self.refresh_from_db(lock_for_update=True,session=session)# for case when task is marked as success/failed externally# or dagrun timed out and task is marked as skipped# current behavior doesn't hit the callbacksifself.stateinState.finished:self.clear_next_method_args()session.merge(self)session.commit()returnelse:self.handle_failure(e,test_mode,context,session=session)session.commit()raiseexcept(Exception,KeyboardInterrupt)ase:self.handle_failure(e,test_mode,context,session=session)session.commit()raisefinally:Stats.incr(f'ti.finish.{self.dag_id}.{self.task_id}.{self.state}')# Recording SKIPPED or SUCCESSself.clear_next_method_args()self.end_date=timezone.utcnow()self._log_state()self.set_duration()# run on_success_callback before db committing# otherwise, the LocalTaskJob sees the state is changed to `success`,# but the task_runner is still running, LocalTaskJob then treats the state is set externally!self._run_finished_callback(self.task.on_success_callback,context,'on_success')ifnottest_mode:session.add(Log(self.state,self))session.merge(self)ifself.state==TaskInstanceState.SUCCESS:self._register_dataset_changes(session=session)session.commit()def_register_dataset_changes(self,*,session:Session)->None:forobjinself.task.outletsor[]:self.log.debug("outlet obj %s",obj)# Lineage can have other types of objects besides datasetsifisinstance(obj,Dataset):dataset_manager.register_dataset_change(task_instance=self,dataset=obj,session=session,)def_execute_task_with_callbacks(self,context,test_mode=False):"""Prepare Task for Execution"""fromairflow.models.renderedtifieldsimportRenderedTaskInstanceFieldsparent_pid=os.getpid()defsignal_handler(signum,frame):pid=os.getpid()# If a task forks during execution (from DAG code) for whatever# reason, we want to make sure that we react to the signal only in# the process that we've spawned ourselves (referred to here as the# parent process).ifpid!=parent_pid:os._exit(1)returnself.log.error("Received SIGTERM. Terminating subprocesses.")self.task.on_kill()raiseAirflowException("Task received SIGTERM signal")signal.signal(signal.SIGTERM,signal_handler)# Don't clear Xcom until the task is certain to execute, and check if we are resuming from deferral.ifnotself.next_method:self.clear_xcom_data()withStats.timer(f'dag.{self.task.dag_id}.{self.task.task_id}.duration'):# Set the validated/merged params on the task object.self.task.params=context['params']task_orig=self.render_templates(context=context)ifnottest_mode:rtif=RenderedTaskInstanceFields(ti=self,render_templates=False)RenderedTaskInstanceFields.write(rtif)RenderedTaskInstanceFields.delete_old_records(self.task_id,self.dag_id)# Export context to make it available for operators to use.airflow_context_vars=context_to_airflow_vars(context,in_env_var_format=True)os.environ.update(airflow_context_vars)# Log context only for the default execution method, the assumption# being that otherwise we're resuming a deferred task (in which# case there's no need to log these again).ifnotself.next_method:self.log.info("Exporting the following env vars:\n%s",'\n'.join(f"{k}={v}"fork,vinairflow_context_vars.items()),)# Run pre_execute callbackself.task.pre_execute(context=context)# Run on_execute callbackself._run_execute_callback(context,self.task)# Execute the taskwithset_current_context(context):result=self._execute_task(context,task_orig)# Run post_execute callbackself.task.post_execute(context=context,result=result)Stats.incr(f'operator_successes_{self.task.task_type}',1,1)Stats.incr('ti_successes')def_run_finished_callback(self,callback:TaskStateChangeCallback|None,context:Context,callback_type:str)->None:"""Run callback after task finishes"""try:ifcallback:callback(context)exceptException:# pylint: disable=broad-exceptself.log.exception(f"Error when executing {callback_type} callback")def_execute_task(self,context,task_orig):"""Executes Task (optionally with a Timeout) and pushes Xcom results"""task_to_execute=self.task# If the task has been deferred and is being executed due to a trigger,# then we need to pick the right method to come back to, otherwise# we go for the default executeifself.next_method:# __fail__ is a special signal value for next_method that indicates# this task was scheduled specifically to fail.ifself.next_method=="__fail__":next_kwargs=self.next_kwargsor{}traceback=self.next_kwargs.get("traceback")iftracebackisnotNone:self.log.error("Trigger failed:\n%s","\n".join(traceback))raiseTaskDeferralError(next_kwargs.get("error","Unknown"))# Grab the callable off the Operator/Task and add in any kwargsexecute_callable=getattr(task_to_execute,self.next_method)ifself.next_kwargs:execute_callable=partial(execute_callable,**self.next_kwargs)else:execute_callable=task_to_execute.execute# If a timeout is specified for the task, make it fail# if it goes beyondiftask_to_execute.execution_timeout:# If we are coming in with a next_method (i.e. from a deferral),# calculate the timeout from our start_date.ifself.next_method:timeout_seconds=(task_to_execute.execution_timeout-(timezone.utcnow()-self.start_date)).total_seconds()else:timeout_seconds=task_to_execute.execution_timeout.total_seconds()try:# It's possible we're already timed out, so fast-fail if trueiftimeout_seconds<=0:raiseAirflowTaskTimeout()# Run task in timeout wrapperwithtimeout(timeout_seconds):result=execute_callable(context=context)exceptAirflowTaskTimeout:task_to_execute.on_kill()raiseelse:result=execute_callable(context=context)withcreate_session()assession:iftask_to_execute.do_xcom_push:xcom_value=resultelse:xcom_value=Noneifxcom_valueisnotNone:# If the task returns a result, push an XCom containing it.self.xcom_push(key=XCOM_RETURN_KEY,value=xcom_value,session=session)self._record_task_map_for_downstreams(task_orig,xcom_value,session=session)returnresult@provide_sessiondef_defer_task(self,session:Session,defer:TaskDeferred)->None:""" Marks the task as deferred and sets up the trigger that is needed to resume it. """fromairflow.models.triggerimportTrigger# First, make the trigger entrytrigger_row=Trigger.from_object(defer.trigger)session.add(trigger_row)session.flush()# Then, update ourselves so it matches the deferral request# Keep an eye on the logic in `check_and_change_state_before_execution()`# depending on self.next_method semanticsself.state=State.DEFERREDself.trigger_id=trigger_row.idself.next_method=defer.method_nameself.next_kwargs=defer.kwargsor{}# Decrement try number so the next one is the same tryself._try_number-=1# Calculate timeout too if it was passedifdefer.timeoutisnotNone:self.trigger_timeout=timezone.utcnow()+defer.timeoutelse:self.trigger_timeout=None# If an execution_timeout is set, set the timeout to the minimum of# it and the trigger timeoutexecution_timeout=self.task.execution_timeoutifexecution_timeout:ifself.trigger_timeout:self.trigger_timeout=min(self.start_date+execution_timeout,self.trigger_timeout)else:self.trigger_timeout=self.start_date+execution_timeoutdef_run_execute_callback(self,context:Context,task:Operator)->None:"""Functions that need to be run before a Task is executed"""try:iftask.on_execute_callback:task.on_execute_callback(context)exceptException:self.log.exception("Failed when executing execute callback")@provide_session
[docs]defdry_run(self)->None:"""Only Renders Templates for the TI"""fromairflow.models.baseoperatorimportBaseOperatorself.task=self.task.prepare_for_execution()self.render_templates()ifTYPE_CHECKING:assertisinstance(self.task,BaseOperator)self.task.dry_run()
@provide_sessiondef_handle_reschedule(self,actual_start_date,reschedule_exception,test_mode=False,session=NEW_SESSION):# Don't record reschedule request in test modeiftest_mode:returnfromairflow.models.dagrunimportDagRun# Avoid circular importself.refresh_from_db(session)self.end_date=timezone.utcnow()self.set_duration()# Lock DAG run to be sure not to get into a deadlock situation when trying to insert# TaskReschedule which apparently also creates lock on corresponding DagRun entitywith_row_locks(session.query(DagRun).filter_by(dag_id=self.dag_id,run_id=self.run_id,),session=session,).one()# Log reschedule requestsession.add(TaskReschedule(self.task,self.run_id,self._try_number,actual_start_date,self.end_date,reschedule_exception.reschedule_date,self.map_index,))# set stateself.state=State.UP_FOR_RESCHEDULE# Decrement try_number so subsequent runs will use the same try number and write# to same log file.self._try_number-=1self.clear_next_method_args()session.merge(self)session.commit()self.log.info('Rescheduling task, marking task as UP_FOR_RESCHEDULE')@staticmethod
[docs]defget_truncated_error_traceback(error:BaseException,truncate_to:Callable)->TracebackType|None:""" Truncates the traceback of an exception to the first frame called from within a given function :param error: exception to get traceback from :param truncate_to: Function to truncate TB to. Must have a ``__code__`` attribute :meta private: """tb=error.__traceback__code=truncate_to.__func__.__code__# type: ignore[attr-defined]whiletbisnotNone:iftb.tb_frame.f_codeiscode:returntb.tb_nexttb=tb.tb_nextreturntborerror.__traceback__
@provide_session
[docs]defhandle_failure(self,error:None|str|Exception|KeyboardInterrupt,test_mode:bool|None=None,context:Context|None=None,force_fail:bool=False,session:Session=NEW_SESSION,)->None:"""Handle Failure for the TaskInstance"""iftest_modeisNone:test_mode=self.test_modeiferror:ifisinstance(error,BaseException):tb=self.get_truncated_error_traceback(error,truncate_to=self._execute_task)self.log.error("Task failed with exception",exc_info=(type(error),error,tb))else:self.log.error("%s",error)ifnottest_mode:self.refresh_from_db(session)self.end_date=timezone.utcnow()self.set_duration()Stats.incr(f'operator_failures_{self.operator}')Stats.incr('ti_failures')ifnottest_mode:session.add(Log(State.FAILED,self))# Log failure durationsession.add(TaskFail(ti=self))self.clear_next_method_args()# In extreme cases (zombie in case of dag with parse error) we might _not_ have a Task.ifcontextisNoneandgetattr(self,'task',None):context=self.get_template_context(session)ifcontextisnotNone:context['exception']=error# Set state correctly and figure out how to log it and decide whether# to email# Note, callback invocation needs to be handled by caller of# _run_raw_task to avoid race conditions which could lead to duplicate# invocations or miss invocation.# Since this function is called only when the TaskInstance state is running,# try_number contains the current try_number (not the next). We# only mark task instance as FAILED if the next task instance# try_number exceeds the max_tries ... or if force_fail is truthytask:BaseOperator|None=Nonetry:ifgetattr(self,'task',None)andcontext:task=self.task.unmap((context,session))exceptException:self.log.error("Unable to unmap task to determine if we need to send an alert email")ifforce_failornotself.is_eligible_to_retry():self.state=State.FAILEDemail_for_state=operator.attrgetter('email_on_failure')callback=task.on_failure_callbackiftaskelseNonecallback_type='on_failure'else:ifself.state==State.QUEUED:# We increase the try_number so as to fail the task if it fails to start after sometimeself._try_number+=1self.state=State.UP_FOR_RETRYemail_for_state=operator.attrgetter('email_on_retry')callback=task.on_retry_callbackiftaskelseNonecallback_type='on_retry'self._log_state('Immediate failure requested. 'ifforce_failelse'')iftaskandemail_for_state(task)andtask.email:try:self.email_alert(error,task)exceptException:self.log.exception('Failed to send email to: %s',task.email)ifcallbackandcontext:self._run_finished_callback(callback,context,callback_type)ifnottest_mode:session.merge(self)session.flush()
[docs]defis_eligible_to_retry(self):"""Is task instance is eligible for retry"""ifself.state==State.RESTARTING:# If a task is cleared when running, it goes into RESTARTING state and is always# eligible for retryreturnTrueifnotgetattr(self,'task',None):# Couldn't load the task, don't know number of retries, guess:returnself.try_number<=self.max_triesreturnself.task.retriesandself.try_number<=self.max_tries
[docs]defget_template_context(self,session:Session=NEW_SESSION,ignore_param_exceptions:bool=True)->Context:"""Return TI Context"""# Do not use provide_session here -- it expunges everything on exit!ifnotsession:session=settings.Session()fromairflowimportmacrosintegrate_macros_plugins()task=self.taskifTYPE_CHECKING:asserttask.dagdag:DAG=task.dagdag_run=self.get_dagrun(session)data_interval=dag.get_run_data_interval(dag_run)# Validates Params and convert them into a simple dict.params=ParamsDict(suppress_exception=ignore_param_exceptions)withcontextlib.suppress(AttributeError):params.update(dag.params)iftask.params:params.update(task.params)ifconf.getboolean('core','dag_run_conf_overrides_params'):self.overwrite_params_with_dag_run_conf(params=params,dag_run=dag_run)validated_params=params.validate()logical_date=timezone.coerce_datetime(self.execution_date)ds=logical_date.strftime('%Y-%m-%d')ds_nodash=ds.replace('-','')ts=logical_date.isoformat()ts_nodash=logical_date.strftime('%Y%m%dT%H%M%S')ts_nodash_with_tz=ts.replace('-','').replace(':','')@cache# Prevent multiple database access.def_get_previous_dagrun_success()->DagRun|None:returnself.get_previous_dagrun(state=DagRunState.SUCCESS,session=session)def_get_previous_dagrun_data_interval_success()->DataInterval|None:dagrun=_get_previous_dagrun_success()ifdagrunisNone:returnNonereturndag.get_run_data_interval(dagrun)defget_prev_data_interval_start_success()->pendulum.DateTime|None:data_interval=_get_previous_dagrun_data_interval_success()ifdata_intervalisNone:returnNonereturndata_interval.startdefget_prev_data_interval_end_success()->pendulum.DateTime|None:data_interval=_get_previous_dagrun_data_interval_success()ifdata_intervalisNone:returnNonereturndata_interval.enddefget_prev_start_date_success()->pendulum.DateTime|None:dagrun=_get_previous_dagrun_success()ifdagrunisNone:returnNonereturntimezone.coerce_datetime(dagrun.start_date)@cachedefget_yesterday_ds()->str:return(logical_date-timedelta(1)).strftime('%Y-%m-%d')defget_yesterday_ds_nodash()->str:returnget_yesterday_ds().replace('-','')@cachedefget_tomorrow_ds()->str:return(logical_date+timedelta(1)).strftime('%Y-%m-%d')defget_tomorrow_ds_nodash()->str:returnget_tomorrow_ds().replace('-','')@cachedefget_next_execution_date()->pendulum.DateTime|None:# For manually triggered dagruns that aren't run on a schedule,# the "next" execution date doesn't make sense, and should be set# to execution date for consistency with how execution_date is set# for manually triggered tasks, i.e. triggered_date == execution_date.ifdag_run.external_trigger:returnlogical_dateifdagisNone:returnNonenext_info=dag.next_dagrun_info(data_interval,restricted=False)ifnext_infoisNone:returnNonereturntimezone.coerce_datetime(next_info.logical_date)defget_next_ds()->str|None:execution_date=get_next_execution_date()ifexecution_dateisNone:returnNonereturnexecution_date.strftime('%Y-%m-%d')defget_next_ds_nodash()->str|None:ds=get_next_ds()ifdsisNone:returndsreturnds.replace('-','')@cachedefget_prev_execution_date():# For manually triggered dagruns that aren't run on a schedule,# the "previous" execution date doesn't make sense, and should be set# to execution date for consistency with how execution_date is set# for manually triggered tasks, i.e. triggered_date == execution_date.ifdag_run.external_trigger:returnlogical_datewithwarnings.catch_warnings():warnings.simplefilter("ignore",RemovedInAirflow3Warning)returndag.previous_schedule(logical_date)@cachedefget_prev_ds()->str|None:execution_date=get_prev_execution_date()ifexecution_dateisNone:returnNonereturnexecution_date.strftime(r'%Y-%m-%d')defget_prev_ds_nodash()->str|None:prev_ds=get_prev_ds()ifprev_dsisNone:returnNonereturnprev_ds.replace('-','')defget_triggering_events()->dict[str,list[DatasetEvent]]:nonlocaldag_run# The dag_run may not be attached to the session anymore (code base is over-zealous with use of# `session.expunge_all()`) so re-attach it if we get calledifdag_runnotinsession:dag_run=session.merge(dag_run,load=False)dataset_events=dag_run.consumed_dataset_eventstriggering_events:dict[str,list[DatasetEvent]]=defaultdict(list)foreventindataset_events:triggering_events[event.dataset.uri].append(event)returntriggering_events# NOTE: If you add anything to this dict, make sure to also update the# definition in airflow/utils/context.pyi, and KNOWN_CONTEXT_KEYS in# airflow/utils/context.py!context={'conf':conf,'dag':dag,'dag_run':dag_run,'data_interval_end':timezone.coerce_datetime(data_interval.end),'data_interval_start':timezone.coerce_datetime(data_interval.start),'ds':ds,'ds_nodash':ds_nodash,'execution_date':logical_date,'inlets':task.inlets,'logical_date':logical_date,'macros':macros,'next_ds':get_next_ds(),'next_ds_nodash':get_next_ds_nodash(),'next_execution_date':get_next_execution_date(),'outlets':task.outlets,'params':validated_params,'prev_data_interval_start_success':get_prev_data_interval_start_success(),'prev_data_interval_end_success':get_prev_data_interval_end_success(),'prev_ds':get_prev_ds(),'prev_ds_nodash':get_prev_ds_nodash(),'prev_execution_date':get_prev_execution_date(),'prev_execution_date_success':self.get_previous_execution_date(state=DagRunState.SUCCESS,session=session,),'prev_start_date_success':get_prev_start_date_success(),'run_id':self.run_id,'task':task,'task_instance':self,'task_instance_key_str':f"{task.dag_id}__{task.task_id}__{ds_nodash}",'test_mode':self.test_mode,'ti':self,'tomorrow_ds':get_tomorrow_ds(),'tomorrow_ds_nodash':get_tomorrow_ds_nodash(),'triggering_dataset_events':lazy_object_proxy.Proxy(get_triggering_events),'ts':ts,'ts_nodash':ts_nodash,'ts_nodash_with_tz':ts_nodash_with_tz,'var':{'json':VariableAccessor(deserialize_json=True),'value':VariableAccessor(deserialize_json=False),},'conn':ConnectionAccessor(),'yesterday_ds':get_yesterday_ds(),'yesterday_ds_nodash':get_yesterday_ds_nodash(),}# Mypy doesn't like turning existing dicts in to a TypeDict -- and we "lie" in the type stub to say it# is one, but in practice it isn't. See https://github.com/python/mypy/issues/8890returnContext(context)# type: ignore
@provide_session
[docs]defget_rendered_template_fields(self,session:Session=NEW_SESSION)->None:""" Update task with rendered template fields for presentation in UI. If task has already run, will fetch from DB; otherwise will render. """fromairflow.models.renderedtifieldsimportRenderedTaskInstanceFieldsrendered_task_instance_fields=RenderedTaskInstanceFields.get_templated_fields(self,session=session)ifrendered_task_instance_fields:self.task=self.task.unmap(None)forfield_name,rendered_valueinrendered_task_instance_fields.items():setattr(self.task,field_name,rendered_value)returntry:# If we get here, either the task hasn't run or the RTIF record was purged.fromairflow.utils.log.secrets_maskerimportredactself.render_templates()forfield_nameinself.task.template_fields:rendered_value=getattr(self.task,field_name)setattr(self.task,field_name,redact(rendered_value,field_name))except(TemplateAssertionError,UndefinedError)ase:raiseAirflowException("Webserver does not have access to User-defined Macros or Filters ""when Dag Serialization is enabled. Hence for the task that have not yet ""started running, please use 'airflow tasks render' for debugging the ""rendering of template_fields.")frome
@provide_session
[docs]defget_rendered_k8s_spec(self,session:Session=NEW_SESSION):"""Fetch rendered template fields from DB"""fromairflow.models.renderedtifieldsimportRenderedTaskInstanceFieldsrendered_k8s_spec=RenderedTaskInstanceFields.get_k8s_pod_yaml(self,session=session)ifnotrendered_k8s_spec:try:rendered_k8s_spec=self.render_k8s_pod_yaml()except(TemplateAssertionError,UndefinedError)ase:raiseAirflowException(f"Unable to render a k8s spec for this taskinstance: {e}")fromereturnrendered_k8s_spec
[docs]defoverwrite_params_with_dag_run_conf(self,params,dag_run):"""Overwrite Task Params with DagRun.conf"""ifdag_runanddag_run.conf:self.log.debug("Updating task params (%s) with DagRun.conf (%s)",params,dag_run.conf)params.update(dag_run.conf)
[docs]defrender_templates(self,context:Context|None=None)->Operator:"""Render templates in the operator fields. If the task was originally mapped, this may replace ``self.task`` with the unmapped, fully rendered BaseOperator. The original ``self.task`` before replacement is returned. """ifnotcontext:context=self.get_template_context()rendered_task=self.task.render_template_fields(context)ifrendered_taskisNone:# Compatibility -- custom renderer, assume unmapped.returnself.taskoriginal_task,self.task=self.task,rendered_taskreturnoriginal_task
[docs]defrender_k8s_pod_yaml(self)->dict|None:"""Render k8s pod yaml"""fromkubernetes.client.api_clientimportApiClientfromairflow.kubernetes.kube_configimportKubeConfigfromairflow.kubernetes.kubernetes_helper_functionsimportcreate_pod_id# Circular importfromairflow.kubernetes.pod_generatorimportPodGeneratorkube_config=KubeConfig()pod=PodGenerator.construct_pod(dag_id=self.dag_id,run_id=self.run_id,task_id=self.task_id,map_index=self.map_index,date=None,pod_id=create_pod_id(self.dag_id,self.task_id),try_number=self.try_number,kube_image=kube_config.kube_image,args=self.command_as_list(),pod_override_object=PodGenerator.from_obj(self.executor_config),scheduler_job_id="0",namespace=kube_config.executor_namespace,base_worker_pod=PodGenerator.deserialize_model_file(kube_config.pod_template_file),)settings.pod_mutation_hook(pod)sanitized_pod=ApiClient().sanitize_for_serialization(pod)returnsanitized_pod
[docs]defget_email_subject_content(self,exception:BaseException,task:BaseOperator|None=None)->tuple[str,str,str]:"""Get the email subject content for exceptions."""# For a ti from DB (without ti.task), return the default valueiftaskisNone:task=getattr(self,'task')use_default=taskisNoneexception_html=str(exception).replace('\n','<br>')default_subject='Airflow alert: {{ti}}'# For reporting purposes, we report based on 1-indexed,# not 0-indexed lists (i.e. Try 1 instead of# Try 0 for the first attempt).default_html_content=('Try {{try_number}} out of {{max_tries + 1}}<br>''Exception:<br>{{exception_html}}<br>''Log: <a href="{{ti.log_url}}">Link</a><br>''Host: {{ti.hostname}}<br>''Mark success: <a href="{{ti.mark_success_url}}">Link</a><br>')default_html_content_err=('Try {{try_number}} out of {{max_tries + 1}}<br>''Exception:<br>Failed attempt to attach error logs<br>''Log: <a href="{{ti.log_url}}">Link</a><br>''Host: {{ti.hostname}}<br>''Mark success: <a href="{{ti.mark_success_url}}">Link</a><br>')# This function is called after changing the state from State.RUNNING,# so we need to subtract 1 from self.try_number here.current_try_number=self.try_number-1additional_context:dict[str,Any]={"exception":exception,"exception_html":exception_html,"try_number":current_try_number,"max_tries":self.max_tries,}ifuse_default:default_context={"ti":self,**additional_context}jinja_env=jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.dirname(__file__)),autoescape=True)subject=jinja_env.from_string(default_subject).render(**default_context)html_content=jinja_env.from_string(default_html_content).render(**default_context)html_content_err=jinja_env.from_string(default_html_content_err).render(**default_context)else:# Use the DAG's get_template_env() to set force_sandboxed. Don't add# the flag to the function on task object -- that function can be# overridden, and adding a flag breaks backward compatibility.dag=self.task.get_dag()ifdag:jinja_env=dag.get_template_env(force_sandboxed=True)else:jinja_env=SandboxedEnvironment(cache_size=0)jinja_context=self.get_template_context()context_merge(jinja_context,additional_context)defrender(key:str,content:str)->str:ifconf.has_option('email',key):path=conf.get_mandatory_value('email',key)try:withopen(path)asf:content=f.read()exceptFileNotFoundError:self.log.warning(f"Could not find email template file '{path!r}'. Using defaults...")exceptOSError:self.log.exception(f"Error while using email template '{path!r}'. Using defaults...")returnrender_template_to_string(jinja_env.from_string(content),jinja_context)subject=render('subject_template',default_subject)html_content=render('html_content_template',default_html_content)html_content_err=render('html_content_template',default_html_content_err)returnsubject,html_content,html_content_err
[docs]defemail_alert(self,exception,task:BaseOperator)->None:"""Send alert email with exception information."""subject,html_content,html_content_err=self.get_email_subject_content(exception,task=task)asserttask.emailtry:send_email(task.email,subject,html_content)exceptException:send_email(task.email,subject,html_content_err)
[docs]defset_duration(self)->None:"""Set TI duration"""ifself.end_dateandself.start_date:self.duration=(self.end_date-self.start_date).total_seconds()else:self.duration=Noneself.log.debug("Task Duration set to %s",self.duration)
def_record_task_map_for_downstreams(self,task:Operator,value:Any,*,session:Session)->None:ifnext(task.iter_mapped_dependants(),None)isNone:# No mapped dependants, no need to validate.return# TODO: We don't push TaskMap for mapped task instances because it's not# currently possible for a downstream to depend on one individual mapped# task instance. This will change when we implement task group mapping,# and we'll need to further analyze the mapped task case.iftask.is_mapped:returnifvalueisNone:raiseXComForMappingNotPushed()ifnotisinstance(value,(collections.abc.Sequence,dict)):raiseUnmappableXComTypePushed(value)ifisinstance(value,(bytes,str)):raiseUnmappableXComTypePushed(value)ifTYPE_CHECKING:# The isinstance() checks above guard this.assertisinstance(value,collections.abc.Collection)task_map=TaskMap.from_task_instance_xcom(self,value)max_map_length=conf.getint("core","max_map_length",fallback=1024)iftask_map.length>max_map_length:raiseUnmappableXComLengthPushed(value,max_map_length)session.merge(task_map)@provide_session
[docs]defxcom_push(self,key:str,value:Any,execution_date:datetime|None=None,session:Session=NEW_SESSION,)->None:""" Make an XCom available for tasks to pull. :param key: Key to store the value under. :param value: Value to store. What types are possible depends on whether ``enable_xcom_pickling`` is true or not. If so, this can be any picklable object; only be JSON-serializable may be used otherwise. :param execution_date: Deprecated parameter that has no effect. """ifexecution_dateisnotNone:self_execution_date=self.get_dagrun(session).execution_dateifexecution_date<self_execution_date:raiseValueError(f'execution_date can not be in the past (current execution_date is 'f'{self_execution_date}; received {execution_date})')elifexecution_dateisnotNone:message="Passing 'execution_date' to 'TaskInstance.xcom_push()' is deprecated."warnings.warn(message,RemovedInAirflow3Warning,stacklevel=3)XCom.set(key=key,value=value,task_id=self.task_id,dag_id=self.dag_id,run_id=self.run_id,map_index=self.map_index,session=session,
)@provide_session
[docs]defxcom_pull(self,task_ids:str|Iterable[str]|None=None,dag_id:str|None=None,key:str=XCOM_RETURN_KEY,include_prior_dates:bool=False,session:Session=NEW_SESSION,*,map_indexes:int|Iterable[int]|None=None,default:Any=None,)->Any:"""Pull XComs that optionally meet certain criteria. :param key: A key for the XCom. If provided, only XComs with matching keys will be returned. The default key is ``'return_value'``, also available as constant ``XCOM_RETURN_KEY``. This key is automatically given to XComs returned by tasks (as opposed to being pushed manually). To remove the filter, pass *None*. :param task_ids: Only XComs from tasks with matching ids will be pulled. Pass *None* to remove the filter. :param dag_id: If provided, only pulls XComs from this DAG. If *None* (default), the DAG of the calling task is used. :param map_indexes: If provided, only pull XComs with matching indexes. If *None* (default), this is inferred from the task(s) being pulled (see below for details). :param include_prior_dates: If False, only XComs from the current execution_date are returned. If *True*, XComs from previous dates are returned as well. When pulling one single task (``task_id`` is *None* or a str) without specifying ``map_indexes``, the return value is inferred from whether the specified task is mapped. If not, value from the one single task instance is returned. If the task to pull is mapped, an iterator (not a list) yielding XComs from mapped task instances is returned. In either case, ``default`` (*None* if not specified) is returned if no matching XComs are found. When pulling multiple tasks (i.e. either ``task_id`` or ``map_index`` is a non-str iterable), a list of matching XComs is returned. Elements in the list is ordered by item ordering in ``task_id`` and ``map_index``. """ifdag_idisNone:dag_id=self.dag_idquery=XCom.get_many(key=key,run_id=self.run_id,dag_ids=dag_id,task_ids=task_ids,map_indexes=map_indexes,include_prior_dates=include_prior_dates,session=session,)# NOTE: Since we're only fetching the value field and not the whole# class, the @recreate annotation does not kick in. Therefore we need to# call XCom.deserialize_value() manually.# We are only pulling one single task.if(task_idsisNoneorisinstance(task_ids,str))andnotisinstance(map_indexes,Iterable):first=query.with_entities(XCom.run_id,XCom.task_id,XCom.dag_id,XCom.map_index,XCom.value).first()iffirstisNone:# No matching XCom at all.returndefaultifmap_indexesisnotNoneorfirst.map_index<0:returnXCom.deserialize_value(first)return_LazyXComAccess.build_from_single_xcom(first,query)# At this point either task_ids or map_indexes is explicitly multi-value.results=((r.task_id,r.map_index,XCom.deserialize_value(r))forrinquery.with_entities(XCom.task_id,XCom.map_index,XCom.value))iftask_idsisNone:task_id_pos:dict[str,int]=defaultdict(int)elifisinstance(task_ids,str):task_id_pos={task_ids:0}else:task_id_pos={task_id:ifori,task_idinenumerate(task_ids)}ifmap_indexesisNone:map_index_pos:dict[int,int]=defaultdict(int)elifisinstance(map_indexes,int):map_index_pos={map_indexes:0}else:map_index_pos={map_index:ifori,map_indexinenumerate(map_indexes)}def_arg_pos(item:tuple[str,int,Any])->tuple[int,int]:task_id,map_index,_=itemreturntask_id_pos[task_id],map_index_pos[map_index]results_sorted_by_arg_pos=sorted(results,key=_arg_pos)return[valuefor_,_,valueinresults_sorted_by_arg_pos]
@provide_session
[docs]defget_num_running_task_instances(self,session:Session)->int:"""Return Number of running TIs from the DB"""# .count() is inefficientreturn(session.query(func.count()).filter(TaskInstance.dag_id==self.dag_id,TaskInstance.task_id==self.task_id,TaskInstance.state==State.RUNNING,
).scalar())
[docs]definit_run_context(self,raw:bool=False)->None:"""Sets the log context."""self.raw=rawself._set_context(self)
@staticmethod
[docs]deffilter_for_tis(tis:Iterable[TaskInstance|TaskInstanceKey])->BooleanClauseList|None:"""Returns SQLAlchemy filter to query selected task instances"""# DictKeys type, (what we often pass here from the scheduler) is not directly indexable :(# Or it might be a generator, but we need to be able to iterate over it more than oncetis=list(tis)ifnottis:returnNonefirst=tis[0]dag_id=first.dag_idrun_id=first.run_idmap_index=first.map_indexfirst_task_id=first.task_id# Common path optimisations: when all TIs are for the same dag_id and run_id, or same dag_id# and task_id -- this can be over 150x faster for huge numbers of TIs (20k+)ifall(t.dag_id==dag_idandt.run_id==run_idandt.map_index==map_indexfortintis):returnand_(TaskInstance.dag_id==dag_id,TaskInstance.run_id==run_id,TaskInstance.map_index==map_index,TaskInstance.task_id.in_(t.task_idfortintis),)ifall(t.dag_id==dag_idandt.task_id==first_task_idandt.map_index==map_indexfortintis):returnand_(TaskInstance.dag_id==dag_id,TaskInstance.run_id.in_(t.run_idfortintis),TaskInstance.map_index==map_index,TaskInstance.task_id==first_task_id,)ifall(t.dag_id==dag_idandt.run_id==run_idandt.task_id==first_task_idfortintis):returnand_(TaskInstance.dag_id==dag_id,TaskInstance.run_id==run_id,TaskInstance.map_index.in_(t.map_indexfortintis),TaskInstance.task_id==first_task_id,)returntuple_in_condition((TaskInstance.dag_id,TaskInstance.task_id,TaskInstance.run_id,TaskInstance.map_index),(ti.key.primaryfortiintis),
)@classmethod
[docs]defti_selector_condition(cls,vals:Collection[str|tuple[str,int]])->ColumnOperators:""" Build an SQLAlchemy filter for a list where each element can contain whether a task_id, or a tuple of (task_id,map_index) :meta private: """# Compute a filter for TI.task_id and TI.map_index based on input values# For each item, it will either be a task_id, or (task_id, map_index)task_id_only=[vforvinvalsifisinstance(v,str)]with_map_index=[vforvinvalsifnotisinstance(v,str)]filters:list[ColumnOperators]=[]iftask_id_only:filters.append(cls.task_id.in_(task_id_only))ifwith_map_index:filters.append(tuple_in_condition((cls.task_id,cls.map_index),with_map_index))ifnotfilters:returnfalse()iflen(filters)==1:returnfilters[0]returnor_(*filters)
# State of the task instance.# Stores string version of the task state.
[docs]classSimpleTaskInstance:""" Simplified Task Instance. Used to send data between processes via Queues. """def__init__(self,dag_id:str,task_id:str,run_id:str,start_date:datetime|None,end_date:datetime|None,try_number:int,map_index:int,state:str,executor_config:Any,pool:str,queue:str,key:TaskInstanceKey,run_as_user:str|None=None,priority_weight:int|None=None,):self.dag_id=dag_idself.task_id=task_idself.run_id=run_idself.map_index=map_indexself.start_date=start_dateself.end_date=end_dateself.try_number=try_numberself.state=stateself.executor_config=executor_configself.run_as_user=run_as_userself.pool=poolself.priority_weight=priority_weightself.queue=queueself.key=key